2021-06-18 18:38:38 | INFO | fairseq_cli.train | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 1, 'log_format': 'simple', 'log_file': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 1, 'distributed_num_procs': 1, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': None, 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'pytorch_ddp', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': False, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_algorithm': 'LocalSGD', 'localsgd_frequency': 3, 'nprocs_per_node': 1, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False}, 'dataset': {'_name': None, 'num_workers': 1, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': None, 'batch_size': 1, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 1, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 1, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 12500000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': False, 'update_freq': [128], 'lr': [0.0001], 'stop_min_lr': -1.0, 'use_bmuf': False}, 'checkpoint': {'_name': None, 'save_dir': 'checkpoints', 'restore_file': 'roberta.large/model.pt', 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 0, 'keep_interval_updates': -1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': False, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'loss', 'maximize_best_checkpoint_metric': False, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 1}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': Namespace(no_progress_bar=False, log_interval=1, log_format='simple', log_file=None, tensorboard_logdir=None, wandb_project=None, azureml_logging=False, seed=1, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=True, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='masked_lm', tokenizer=None, bpe=None, optimizer='adam', lr_scheduler='polynomial_decay', simul_type=None, scoring='bleu', task='masked_lm', num_workers=1, skip_invalid_size_inputs_valid_test=False, max_tokens=None, batch_size=1, required_batch_size_multiple=8, required_seq_len_multiple=1, dataset_impl=None, data_buffer_size=10, train_subset='train', valid_subset='valid', combine_valid_subsets=None, ignore_unused_valid_subsets=False, validate_interval=1, validate_interval_updates=0, validate_after_updates=0, fixed_validation_seed=None, disable_validation=False, max_tokens_valid=None, batch_size_valid=1, max_valid_steps=None, curriculum=0, gen_subset='test', num_shards=1, shard_id=0, distributed_world_size=1, distributed_num_procs=1, distributed_rank=0, distributed_backend='nccl', distributed_init_method=None, distributed_port=-1, device_id=0, distributed_no_spawn=False, ddp_backend='pytorch_ddp', ddp_comm_hook='none', bucket_cap_mb=25, fix_batches_to_gpus=False, find_unused_parameters=False, fast_stat_sync=False, heartbeat_timeout=-1, broadcast_buffers=False, slowmo_momentum=None, slowmo_algorithm='LocalSGD', localsgd_frequency=3, nprocs_per_node=1, pipeline_model_parallel=False, pipeline_balance=None, pipeline_devices=None, pipeline_chunks=0, pipeline_encoder_balance=None, pipeline_encoder_devices=None, pipeline_decoder_balance=None, pipeline_decoder_devices=None, pipeline_checkpoint='never', zero_sharding='none', no_reshard_after_forward=False, fp32_reduce_scatter=False, cpu_offload=False, use_sharded_state=False, arch='roberta_large', max_epoch=0, max_update=12500000, stop_time_hours=0, clip_norm=0.0, sentence_avg=False, update_freq=[128], lr=[0.0001], stop_min_lr=-1.0, use_bmuf=False, save_dir='checkpoints', restore_file='roberta.large/model.pt', finetune_from_model=None, reset_dataloader=False, reset_lr_scheduler=False, reset_meters=False, reset_optimizer=False, optimizer_overrides='{}', save_interval=1, save_interval_updates=0, keep_interval_updates=-1, keep_interval_updates_pattern=-1, keep_last_epochs=-1, keep_best_checkpoints=-1, no_save=False, no_epoch_checkpoints=False, no_last_checkpoints=False, no_save_optimizer_state=False, best_checkpoint_metric='loss', maximize_best_checkpoint_metric=False, patience=-1, checkpoint_suffix='', checkpoint_shard_count=1, load_checkpoint_on_all_dp_ranks=False, write_checkpoints_asynchronously=False, encoder_layerdrop=0, encoder_layers_to_keep=None, quant_noise_pq=0, quant_noise_pq_block_size=8, quant_noise_scalar=0, spectral_norm_classification_head=False, min_params_to_wrap=100000000, data='data-bin/wikitext-103', sample_break_mode='complete', tokens_per_sample=512, mask_prob=0.15, leave_unmasked_prob=0.1, random_token_prob=0.1, freq_weighted_replacement=False, mask_whole_words=False, mask_multiple_length=1, mask_stdev=0.0, shorten_method='none', shorten_data_split_list='', adam_betas='(0.9,0.98)', adam_eps=1e-06, weight_decay=0.01, use_old_adam=False, warmup_updates=1000, force_anneal=None, end_learning_rate=0.0, power=1.0, total_num_update='12500000', pad=1, eos=2, unk=3, dropout=0.1, attention_dropout=0.1, no_seed_provided=False, encoder_layers=24, encoder_embed_dim=1024, encoder_ffn_embed_dim=4096, encoder_attention_heads=16, activation_dropout=0.0, pooler_dropout=0.0, max_source_positions=512, no_token_positional_embeddings=False, encoder_learned_pos=True, layernorm_embedding=True, no_scale_embedding=True, activation_fn='gelu', encoder_normalize_before=False, pooler_activation_fn='tanh', untie_weights_roberta=False, adaptive_input=False, _name='roberta_large'), 'task': Namespace(no_progress_bar=False, log_interval=1, log_format='simple', log_file=None, tensorboard_logdir=None, wandb_project=None, azureml_logging=False, seed=1, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=True, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='masked_lm', tokenizer=None, bpe=None, optimizer='adam', lr_scheduler='polynomial_decay', simul_type=None, scoring='bleu', task='masked_lm', num_workers=1, skip_invalid_size_inputs_valid_test=False, max_tokens=None, batch_size=1, required_batch_size_multiple=8, required_seq_len_multiple=1, dataset_impl=None, data_buffer_size=10, train_subset='train', valid_subset='valid', combine_valid_subsets=None, ignore_unused_valid_subsets=False, validate_interval=1, validate_interval_updates=0, validate_after_updates=0, fixed_validation_seed=None, disable_validation=False, max_tokens_valid=None, batch_size_valid=1, max_valid_steps=None, curriculum=0, gen_subset='test', num_shards=1, shard_id=0, distributed_world_size=1, distributed_num_procs=1, distributed_rank=0, distributed_backend='nccl', distributed_init_method=None, distributed_port=-1, device_id=0, distributed_no_spawn=False, ddp_backend='pytorch_ddp', ddp_comm_hook='none', bucket_cap_mb=25, fix_batches_to_gpus=False, find_unused_parameters=False, fast_stat_sync=False, heartbeat_timeout=-1, broadcast_buffers=False, slowmo_momentum=None, slowmo_algorithm='LocalSGD', localsgd_frequency=3, nprocs_per_node=1, pipeline_model_parallel=False, pipeline_balance=None, pipeline_devices=None, pipeline_chunks=0, pipeline_encoder_balance=None, pipeline_encoder_devices=None, pipeline_decoder_balance=None, pipeline_decoder_devices=None, pipeline_checkpoint='never', zero_sharding='none', no_reshard_after_forward=False, fp32_reduce_scatter=False, cpu_offload=False, use_sharded_state=False, arch='roberta_large', max_epoch=0, max_update=12500000, stop_time_hours=0, clip_norm=0.0, sentence_avg=False, update_freq=[128], lr=[0.0001], stop_min_lr=-1.0, use_bmuf=False, save_dir='checkpoints', restore_file='roberta.large/model.pt', finetune_from_model=None, reset_dataloader=False, reset_lr_scheduler=False, reset_meters=False, reset_optimizer=False, optimizer_overrides='{}', save_interval=1, save_interval_updates=0, keep_interval_updates=-1, keep_interval_updates_pattern=-1, keep_last_epochs=-1, keep_best_checkpoints=-1, no_save=False, no_epoch_checkpoints=False, no_last_checkpoints=False, no_save_optimizer_state=False, best_checkpoint_metric='loss', maximize_best_checkpoint_metric=False, patience=-1, checkpoint_suffix='', checkpoint_shard_count=1, load_checkpoint_on_all_dp_ranks=False, write_checkpoints_asynchronously=False, encoder_layerdrop=0, encoder_layers_to_keep=None, quant_noise_pq=0, quant_noise_pq_block_size=8, quant_noise_scalar=0, spectral_norm_classification_head=False, min_params_to_wrap=100000000, data='data-bin/wikitext-103', sample_break_mode='complete', tokens_per_sample=512, mask_prob=0.15, leave_unmasked_prob=0.1, random_token_prob=0.1, freq_weighted_replacement=False, mask_whole_words=False, mask_multiple_length=1, mask_stdev=0.0, shorten_method='none', shorten_data_split_list='', adam_betas='(0.9,0.98)', adam_eps=1e-06, weight_decay=0.01, use_old_adam=False, warmup_updates=1000, force_anneal=None, end_learning_rate=0.0, power=1.0, total_num_update='12500000', pad=1, eos=2, unk=3, dropout=0.1, attention_dropout=0.1, no_seed_provided=False, encoder_layers=24, encoder_embed_dim=1024, encoder_ffn_embed_dim=4096, encoder_attention_heads=16, activation_dropout=0.0, pooler_dropout=0.0, max_source_positions=512, no_token_positional_embeddings=False, encoder_learned_pos=True, layernorm_embedding=True, no_scale_embedding=True, activation_fn='gelu', encoder_normalize_before=False, pooler_activation_fn='tanh', untie_weights_roberta=False, adaptive_input=False, _name='masked_lm'), 'criterion': Namespace(no_progress_bar=False, log_interval=1, log_format='simple', log_file=None, tensorboard_logdir=None, wandb_project=None, azureml_logging=False, seed=1, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=True, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='masked_lm', tokenizer=None, bpe=None, optimizer='adam', lr_scheduler='polynomial_decay', simul_type=None, scoring='bleu', task='masked_lm', num_workers=1, skip_invalid_size_inputs_valid_test=False, max_tokens=None, batch_size=1, required_batch_size_multiple=8, required_seq_len_multiple=1, dataset_impl=None, data_buffer_size=10, train_subset='train', valid_subset='valid', combine_valid_subsets=None, ignore_unused_valid_subsets=False, validate_interval=1, validate_interval_updates=0, validate_after_updates=0, fixed_validation_seed=None, disable_validation=False, max_tokens_valid=None, batch_size_valid=1, max_valid_steps=None, curriculum=0, gen_subset='test', num_shards=1, shard_id=0, distributed_world_size=1, distributed_num_procs=1, distributed_rank=0, distributed_backend='nccl', distributed_init_method=None, distributed_port=-1, device_id=0, distributed_no_spawn=False, ddp_backend='pytorch_ddp', ddp_comm_hook='none', bucket_cap_mb=25, fix_batches_to_gpus=False, find_unused_parameters=False, fast_stat_sync=False, heartbeat_timeout=-1, broadcast_buffers=False, slowmo_momentum=None, slowmo_algorithm='LocalSGD', localsgd_frequency=3, nprocs_per_node=1, pipeline_model_parallel=False, pipeline_balance=None, pipeline_devices=None, pipeline_chunks=0, pipeline_encoder_balance=None, pipeline_encoder_devices=None, pipeline_decoder_balance=None, pipeline_decoder_devices=None, pipeline_checkpoint='never', zero_sharding='none', no_reshard_after_forward=False, fp32_reduce_scatter=False, cpu_offload=False, use_sharded_state=False, arch='roberta_large', max_epoch=0, max_update=12500000, stop_time_hours=0, clip_norm=0.0, sentence_avg=False, update_freq=[128], lr=[0.0001], stop_min_lr=-1.0, use_bmuf=False, save_dir='checkpoints', restore_file='roberta.large/model.pt', finetune_from_model=None, reset_dataloader=False, reset_lr_scheduler=False, reset_meters=False, reset_optimizer=False, optimizer_overrides='{}', save_interval=1, save_interval_updates=0, keep_interval_updates=-1, keep_interval_updates_pattern=-1, keep_last_epochs=-1, keep_best_checkpoints=-1, no_save=False, no_epoch_checkpoints=False, no_last_checkpoints=False, no_save_optimizer_state=False, best_checkpoint_metric='loss', maximize_best_checkpoint_metric=False, patience=-1, checkpoint_suffix='', checkpoint_shard_count=1, load_checkpoint_on_all_dp_ranks=False, write_checkpoints_asynchronously=False, encoder_layerdrop=0, encoder_layers_to_keep=None, quant_noise_pq=0, quant_noise_pq_block_size=8, quant_noise_scalar=0, spectral_norm_classification_head=False, min_params_to_wrap=100000000, data='data-bin/wikitext-103', sample_break_mode='complete', tokens_per_sample=512, mask_prob=0.15, leave_unmasked_prob=0.1, random_token_prob=0.1, freq_weighted_replacement=False, mask_whole_words=False, mask_multiple_length=1, mask_stdev=0.0, shorten_method='none', shorten_data_split_list='', adam_betas='(0.9,0.98)', adam_eps=1e-06, weight_decay=0.01, use_old_adam=False, warmup_updates=1000, force_anneal=None, end_learning_rate=0.0, power=1.0, total_num_update='12500000', pad=1, eos=2, unk=3, dropout=0.1, attention_dropout=0.1, no_seed_provided=False, encoder_layers=24, encoder_embed_dim=1024, encoder_ffn_embed_dim=4096, encoder_attention_heads=16, activation_dropout=0.0, pooler_dropout=0.0, max_source_positions=512, no_token_positional_embeddings=False, encoder_learned_pos=True, layernorm_embedding=True, no_scale_embedding=True, activation_fn='gelu', encoder_normalize_before=False, pooler_activation_fn='tanh', untie_weights_roberta=False, adaptive_input=False, _name='masked_lm'), 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9,0.98)', 'adam_eps': 1e-06, 'weight_decay': 0.01, 'use_old_adam': False, 'tpu': False, 'lr': [0.0001]}, 'lr_scheduler': {'_name': 'polynomial_decay', 'warmup_updates': 1000, 'force_anneal': None, 'end_learning_rate': 0.0, 'power': 1.0, 'total_num_update': 12500000.0, 'lr': [0.0001]}, 'scoring': {'_name': 'bleu', 'pad': 1, 'eos': 2, 'unk': 3}, 'bpe': None, 'tokenizer': None, 'simul_type': None} 2021-06-18 18:38:38 | INFO | fairseq.tasks.masked_lm | dictionary: 50264 types 2021-06-18 18:38:44 | INFO | fairseq_cli.train | RobertaModel( (encoder): RobertaEncoder( (sentence_encoder): TransformerEncoder( (dropout_module): FairseqDropout() (embed_tokens): Embedding(50265, 1024, padding_idx=1) (embed_positions): LearnedPositionalEmbedding(514, 1024, padding_idx=1) (layernorm_embedding): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (layers): ModuleList( (0): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (1): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (2): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (3): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (4): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (5): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (6): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (7): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (8): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (9): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (10): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (11): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (12): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (13): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (14): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (15): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (16): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (17): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (18): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (19): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (20): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (21): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (22): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (23): TransformerEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) ) (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) ) ) (lm_head): RobertaLMHead( (dense): Linear(in_features=1024, out_features=1024, bias=True) (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) ) (classification_heads): ModuleDict() ) 2021-06-18 18:38:44 | INFO | fairseq_cli.train | task: MaskedLMTask 2021-06-18 18:38:44 | INFO | fairseq_cli.train | model: RobertaModel 2021-06-18 18:38:44 | INFO | fairseq_cli.train | criterion: MaskedLmLoss 2021-06-18 18:38:44 | INFO | fairseq_cli.train | num. shared model params: 355,411,033 (num. trained: 355,411,033) 2021-06-18 18:38:44 | INFO | fairseq_cli.train | num. expert model params: 0 (num. trained: 0) 2021-06-18 18:38:44 | INFO | fairseq.data.data_utils | loaded 1,632,586 examples from: data-bin/wikitext-103/valid 2021-06-18 18:38:44 | INFO | fairseq.tasks.masked_lm | loaded 34843 blocks from: data-bin/wikitext-103/valid 2021-06-18 18:38:46 | INFO | fairseq.trainer | detected shared parameter: encoder.sentence_encoder.embed_tokens.weight <- encoder.lm_head.weight 2021-06-18 18:38:46 | INFO | fairseq.utils | ***********************CUDA enviroments for all 1 workers*********************** 2021-06-18 18:38:46 | INFO | fairseq.utils | rank 0: capabilities = 7.5 ; total memory = 10.761 GB ; name = NVIDIA GeForce RTX 2080 Ti 2021-06-18 18:38:46 | INFO | fairseq.utils | ***********************CUDA enviroments for all 1 workers*********************** 2021-06-18 18:38:46 | INFO | fairseq_cli.train | training on 1 devices (GPUs/TPUs) 2021-06-18 18:38:46 | INFO | fairseq_cli.train | max tokens per device = None and max sentences per device = 1 2021-06-18 18:38:46 | INFO | fairseq.trainer | Preparing to load checkpoint roberta.large/model.pt 2021-06-18 18:38:47 | INFO | fairseq.trainer | Loaded checkpoint roberta.large/model.pt (epoch 1 @ 0 updates) 2021-06-18 18:38:47 | INFO | fairseq.trainer | loading train data for epoch 1 2021-06-18 18:38:47 | INFO | fairseq.data.data_utils | loaded 17,956,888 examples from: data-bin/wikitext-103/train 2021-06-18 18:38:47 | INFO | fairseq.tasks.masked_lm | loaded 384203 blocks from: data-bin/wikitext-103/train 2021-06-18 18:38:47 | WARNING | fairseq.tasks.fairseq_task | 3 samples have invalid sizes and will be skipped, max_positions=512, first few sample ids=[64349, 331776, 59863] 2021-06-18 18:38:48 | INFO | fairseq.trainer | begin training epoch 1 2021-06-18 18:38:48 | INFO | fairseq_cli.train | Start iterating over samples 2021-06-18 18:39:08 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 2021-06-18 18:39:24 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 2021-06-18 18:39:43 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 2021-06-18 18:40:02 | INFO | train_inner | epoch 001: 4 / 3002 loss=3.569, ppl=11.87, wps=0, ups=0, wpb=64860, bsz=128, num_updates=1, lr=1e-07, gnorm=3.964, loss_scale=16, train_wall=72, gb_free=2.8, wall=77 2021-06-18 18:40:13 | INFO | train_inner | epoch 001: 5 / 3002 loss=3.558, ppl=11.77, wps=5882, ups=0.09, wpb=64767, bsz=128, num_updates=2, lr=2e-07, gnorm=3.582, loss_scale=16, train_wall=11, gb_free=2.8, wall=88 2021-06-18 18:40:24 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-18 18:40:35 | INFO | train_inner | epoch 001: 7 / 3002 loss=3.906, ppl=14.99, wps=2932.7, ups=0.05, wpb=64887, bsz=128, num_updates=3, lr=3e-07, gnorm=3.441, loss_scale=8, train_wall=21, gb_free=2.8, wall=110 2021-06-18 18:40:47 | INFO | train_inner | epoch 001: 8 / 3002 loss=3.275, ppl=9.68, wps=5846.8, ups=0.09, wpb=64787, bsz=128, num_updates=4, lr=4e-07, gnorm=3.198, loss_scale=8, train_wall=11, gb_free=2.8, wall=121 2021-06-18 18:40:57 | INFO | train_inner | epoch 001: 9 / 3002 loss=3.637, ppl=12.44, wps=5940.2, ups=0.09, wpb=64809, bsz=128, num_updates=5, lr=5e-07, gnorm=3.445, loss_scale=8, train_wall=10, gb_free=2.8, wall=132 2021-06-18 18:41:08 | INFO | train_inner | epoch 001: 10 / 3002 loss=3.616, ppl=12.26, wps=5926.2, ups=0.09, wpb=64881, bsz=128, num_updates=6, lr=6e-07, gnorm=5.36, loss_scale=8, train_wall=11, gb_free=2.8, wall=143 2021-06-18 18:41:19 | INFO | train_inner | epoch 001: 11 / 3002 loss=3.569, ppl=11.87, wps=5947.9, ups=0.09, wpb=64948, bsz=128, num_updates=7, lr=7e-07, gnorm=3.479, loss_scale=8, train_wall=10, gb_free=2.8, wall=154 2021-06-18 18:41:30 | INFO | train_inner | epoch 001: 12 / 3002 loss=3.726, ppl=13.23, wps=5854.5, ups=0.09, wpb=64832, bsz=128, num_updates=8, lr=8e-07, gnorm=3.433, loss_scale=8, train_wall=11, gb_free=2.8, wall=165 2021-06-18 18:41:41 | INFO | train_inner | epoch 001: 13 / 3002 loss=3.573, ppl=11.9, wps=5861.8, ups=0.09, wpb=64828, bsz=128, num_updates=9, lr=9e-07, gnorm=3.643, loss_scale=8, train_wall=11, gb_free=2.8, wall=176 2021-06-18 18:41:53 | INFO | train_inner | epoch 001: 14 / 3002 loss=3.462, ppl=11.02, wps=5807.1, ups=0.09, wpb=64797, bsz=128, num_updates=10, lr=1e-06, gnorm=3.72, loss_scale=8, train_wall=11, gb_free=2.8, wall=187 2021-06-18 18:42:04 | INFO | train_inner | epoch 001: 15 / 3002 loss=3.599, ppl=12.11, wps=5857.8, ups=0.09, wpb=64869, bsz=128, num_updates=11, lr=1.1e-06, gnorm=3.424, loss_scale=8, train_wall=11, gb_free=2.8, wall=198 2021-06-18 18:42:15 | INFO | train_inner | epoch 001: 16 / 3002 loss=3.492, ppl=11.25, wps=5926.9, ups=0.09, wpb=64829, bsz=128, num_updates=12, lr=1.2e-06, gnorm=3.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=209 2021-06-18 18:42:26 | INFO | train_inner | epoch 001: 17 / 3002 loss=3.548, ppl=11.69, wps=5841.6, ups=0.09, wpb=64835, bsz=128, num_updates=13, lr=1.3e-06, gnorm=3.787, loss_scale=8, train_wall=11, gb_free=2.8, wall=220 2021-06-18 18:42:37 | INFO | train_inner | epoch 001: 18 / 3002 loss=3.546, ppl=11.68, wps=5985.2, ups=0.09, wpb=64822, bsz=128, num_updates=14, lr=1.4e-06, gnorm=3.763, loss_scale=8, train_wall=10, gb_free=2.8, wall=231 2021-06-18 18:42:48 | INFO | train_inner | epoch 001: 19 / 3002 loss=3.859, ppl=14.51, wps=5799.5, ups=0.09, wpb=64786, bsz=128, num_updates=15, lr=1.5e-06, gnorm=3.282, loss_scale=8, train_wall=11, gb_free=2.8, wall=242 2021-06-18 18:42:59 | INFO | train_inner | epoch 001: 20 / 3002 loss=3.651, ppl=12.56, wps=5880.6, ups=0.09, wpb=64800, bsz=128, num_updates=16, lr=1.6e-06, gnorm=3.145, loss_scale=8, train_wall=11, gb_free=2.8, wall=253 2021-06-18 18:43:10 | INFO | train_inner | epoch 001: 21 / 3002 loss=3.564, ppl=11.83, wps=5874.5, ups=0.09, wpb=64823, bsz=128, num_updates=17, lr=1.7e-06, gnorm=3.388, loss_scale=8, train_wall=11, gb_free=2.8, wall=264 2021-06-18 18:43:21 | INFO | train_inner | epoch 001: 22 / 3002 loss=3.633, ppl=12.41, wps=5830.1, ups=0.09, wpb=64878, bsz=128, num_updates=18, lr=1.8e-06, gnorm=3.109, loss_scale=8, train_wall=11, gb_free=2.8, wall=275 2021-06-18 18:43:32 | INFO | train_inner | epoch 001: 23 / 3002 loss=3.678, ppl=12.8, wps=5835, ups=0.09, wpb=64879, bsz=128, num_updates=19, lr=1.9e-06, gnorm=3.333, loss_scale=8, train_wall=11, gb_free=2.8, wall=286 2021-06-18 18:43:43 | INFO | train_inner | epoch 001: 24 / 3002 loss=3.615, ppl=12.25, wps=5806.7, ups=0.09, wpb=64756, bsz=128, num_updates=20, lr=2e-06, gnorm=3.177, loss_scale=8, train_wall=11, gb_free=2.8, wall=297 2021-06-18 18:43:54 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-18 18:44:05 | INFO | train_inner | epoch 001: 26 / 3002 loss=3.55, ppl=11.71, wps=2924.9, ups=0.05, wpb=64849, bsz=128, num_updates=21, lr=2.1e-06, gnorm=3.453, loss_scale=4, train_wall=21, gb_free=2.8, wall=320 2021-06-18 18:44:16 | INFO | train_inner | epoch 001: 27 / 3002 loss=3.533, ppl=11.57, wps=5817.8, ups=0.09, wpb=64831, bsz=128, num_updates=22, lr=2.2e-06, gnorm=3.318, loss_scale=4, train_wall=11, gb_free=2.8, wall=331 2021-06-18 18:44:27 | INFO | train_inner | epoch 001: 28 / 3002 loss=3.649, ppl=12.54, wps=5885.1, ups=0.09, wpb=64771, bsz=128, num_updates=23, lr=2.3e-06, gnorm=3.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=342 2021-06-18 18:44:38 | INFO | train_inner | epoch 001: 29 / 3002 loss=3.733, ppl=13.3, wps=5933.2, ups=0.09, wpb=64882, bsz=128, num_updates=24, lr=2.4e-06, gnorm=3.132, loss_scale=4, train_wall=10, gb_free=2.8, wall=353 2021-06-18 18:44:49 | INFO | train_inner | epoch 001: 30 / 3002 loss=3.907, ppl=15, wps=5908.7, ups=0.09, wpb=64748, bsz=128, num_updates=25, lr=2.5e-06, gnorm=3.306, loss_scale=4, train_wall=10, gb_free=2.8, wall=364 2021-06-18 18:45:00 | INFO | train_inner | epoch 001: 31 / 3002 loss=3.697, ppl=12.97, wps=5858.9, ups=0.09, wpb=64800, bsz=128, num_updates=26, lr=2.6e-06, gnorm=3.339, loss_scale=4, train_wall=11, gb_free=2.8, wall=375 2021-06-18 18:45:11 | INFO | train_inner | epoch 001: 32 / 3002 loss=3.447, ppl=10.91, wps=5855.1, ups=0.09, wpb=64823, bsz=128, num_updates=27, lr=2.7e-06, gnorm=3.21, loss_scale=4, train_wall=11, gb_free=2.8, wall=386 2021-06-18 18:45:22 | INFO | train_inner | epoch 001: 33 / 3002 loss=3.61, ppl=12.21, wps=5905.7, ups=0.09, wpb=64738, bsz=128, num_updates=28, lr=2.8e-06, gnorm=3.213, loss_scale=4, train_wall=11, gb_free=2.8, wall=397 2021-06-18 18:45:33 | INFO | train_inner | epoch 001: 34 / 3002 loss=3.881, ppl=14.73, wps=5919.6, ups=0.09, wpb=64822, bsz=128, num_updates=29, lr=2.9e-06, gnorm=3.462, loss_scale=4, train_wall=10, gb_free=2.8, wall=408 2021-06-18 18:45:44 | INFO | train_inner | epoch 001: 35 / 3002 loss=3.607, ppl=12.18, wps=6026.8, ups=0.09, wpb=64879, bsz=128, num_updates=30, lr=3e-06, gnorm=3.014, loss_scale=4, train_wall=10, gb_free=2.8, wall=419 2021-06-18 18:45:55 | INFO | train_inner | epoch 001: 36 / 3002 loss=3.572, ppl=11.89, wps=5864, ups=0.09, wpb=64796, bsz=128, num_updates=31, lr=3.1e-06, gnorm=3.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=430 2021-06-18 18:46:06 | INFO | train_inner | epoch 001: 37 / 3002 loss=3.424, ppl=10.73, wps=5846.3, ups=0.09, wpb=64799, bsz=128, num_updates=32, lr=3.2e-06, gnorm=2.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=441 2021-06-18 18:46:17 | INFO | train_inner | epoch 001: 38 / 3002 loss=3.658, ppl=12.63, wps=5851.2, ups=0.09, wpb=64874, bsz=128, num_updates=33, lr=3.3e-06, gnorm=3.793, loss_scale=4, train_wall=11, gb_free=2.8, wall=452 2021-06-18 18:46:28 | INFO | train_inner | epoch 001: 39 / 3002 loss=3.602, ppl=12.14, wps=5884.3, ups=0.09, wpb=64825, bsz=128, num_updates=34, lr=3.4e-06, gnorm=3.274, loss_scale=4, train_wall=11, gb_free=2.8, wall=463 2021-06-18 18:46:40 | INFO | train_inner | epoch 001: 40 / 3002 loss=3.65, ppl=12.56, wps=5818.2, ups=0.09, wpb=64809, bsz=128, num_updates=35, lr=3.5e-06, gnorm=3.095, loss_scale=4, train_wall=11, gb_free=2.8, wall=474 2021-06-18 18:46:51 | INFO | train_inner | epoch 001: 41 / 3002 loss=3.643, ppl=12.49, wps=5911.2, ups=0.09, wpb=64797, bsz=128, num_updates=36, lr=3.6e-06, gnorm=3.168, loss_scale=4, train_wall=10, gb_free=2.8, wall=485 2021-06-18 18:47:02 | INFO | train_inner | epoch 001: 42 / 3002 loss=3.551, ppl=11.72, wps=5876.4, ups=0.09, wpb=64805, bsz=128, num_updates=37, lr=3.7e-06, gnorm=2.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=496 2021-06-18 18:47:12 | INFO | train_inner | epoch 001: 43 / 3002 loss=3.48, ppl=11.16, wps=5942.3, ups=0.09, wpb=64876, bsz=128, num_updates=38, lr=3.8e-06, gnorm=3.414, loss_scale=4, train_wall=10, gb_free=2.8, wall=507 2021-06-18 18:47:24 | INFO | train_inner | epoch 001: 44 / 3002 loss=3.493, ppl=11.26, wps=5821, ups=0.09, wpb=64663, bsz=128, num_updates=39, lr=3.9e-06, gnorm=3.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=518 2021-06-18 18:47:35 | INFO | train_inner | epoch 001: 45 / 3002 loss=3.484, ppl=11.19, wps=5874.8, ups=0.09, wpb=64839, bsz=128, num_updates=40, lr=4e-06, gnorm=3.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=529 2021-06-18 18:47:46 | INFO | train_inner | epoch 001: 46 / 3002 loss=3.58, ppl=11.96, wps=5845.6, ups=0.09, wpb=64851, bsz=128, num_updates=41, lr=4.1e-06, gnorm=3.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=540 2021-06-18 18:47:57 | INFO | train_inner | epoch 001: 47 / 3002 loss=3.605, ppl=12.17, wps=5803.7, ups=0.09, wpb=64794, bsz=128, num_updates=42, lr=4.2e-06, gnorm=3.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=551 2021-06-18 18:48:08 | INFO | train_inner | epoch 001: 48 / 3002 loss=3.779, ppl=13.72, wps=5853.9, ups=0.09, wpb=64833, bsz=128, num_updates=43, lr=4.3e-06, gnorm=3.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=562 2021-06-18 18:48:19 | INFO | train_inner | epoch 001: 49 / 3002 loss=3.782, ppl=13.76, wps=5831.2, ups=0.09, wpb=64742, bsz=128, num_updates=44, lr=4.4e-06, gnorm=5.321, loss_scale=4, train_wall=11, gb_free=2.8, wall=573 2021-06-18 18:48:30 | INFO | train_inner | epoch 001: 50 / 3002 loss=3.66, ppl=12.64, wps=5727.1, ups=0.09, wpb=64875, bsz=128, num_updates=45, lr=4.5e-06, gnorm=3.136, loss_scale=4, train_wall=11, gb_free=2.8, wall=585 2021-06-18 18:48:42 | INFO | train_inner | epoch 001: 51 / 3002 loss=3.668, ppl=12.71, wps=5742.3, ups=0.09, wpb=64719, bsz=128, num_updates=46, lr=4.6e-06, gnorm=3.246, loss_scale=4, train_wall=11, gb_free=2.8, wall=596 2021-06-18 18:48:53 | INFO | train_inner | epoch 001: 52 / 3002 loss=3.53, ppl=11.55, wps=5880.5, ups=0.09, wpb=64846, bsz=128, num_updates=47, lr=4.7e-06, gnorm=3.064, loss_scale=4, train_wall=11, gb_free=2.8, wall=607 2021-06-18 18:49:04 | INFO | train_inner | epoch 001: 53 / 3002 loss=3.608, ppl=12.19, wps=5858.2, ups=0.09, wpb=64883, bsz=128, num_updates=48, lr=4.8e-06, gnorm=2.834, loss_scale=4, train_wall=11, gb_free=2.8, wall=618 2021-06-18 18:49:15 | INFO | train_inner | epoch 001: 54 / 3002 loss=3.487, ppl=11.22, wps=5852.3, ups=0.09, wpb=64756, bsz=128, num_updates=49, lr=4.9e-06, gnorm=2.899, loss_scale=4, train_wall=11, gb_free=2.8, wall=629 2021-06-18 18:49:26 | INFO | train_inner | epoch 001: 55 / 3002 loss=3.507, ppl=11.37, wps=5812.6, ups=0.09, wpb=64801, bsz=128, num_updates=50, lr=5e-06, gnorm=2.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=640 2021-06-18 18:49:37 | INFO | train_inner | epoch 001: 56 / 3002 loss=3.645, ppl=12.51, wps=5817.4, ups=0.09, wpb=64834, bsz=128, num_updates=51, lr=5.1e-06, gnorm=3.255, loss_scale=4, train_wall=11, gb_free=2.8, wall=651 2021-06-18 18:49:48 | INFO | train_inner | epoch 001: 57 / 3002 loss=3.564, ppl=11.83, wps=5793.8, ups=0.09, wpb=64793, bsz=128, num_updates=52, lr=5.2e-06, gnorm=3.108, loss_scale=4, train_wall=11, gb_free=2.8, wall=663 2021-06-18 18:49:59 | INFO | train_inner | epoch 001: 58 / 3002 loss=3.505, ppl=11.36, wps=5914.7, ups=0.09, wpb=64779, bsz=128, num_updates=53, lr=5.3e-06, gnorm=3.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=674 2021-06-18 18:50:10 | INFO | train_inner | epoch 001: 59 / 3002 loss=3.495, ppl=11.28, wps=5829.9, ups=0.09, wpb=64947, bsz=128, num_updates=54, lr=5.4e-06, gnorm=3.044, loss_scale=4, train_wall=11, gb_free=2.8, wall=685 2021-06-18 18:50:21 | INFO | train_inner | epoch 001: 60 / 3002 loss=3.575, ppl=11.92, wps=5852.8, ups=0.09, wpb=64851, bsz=128, num_updates=55, lr=5.5e-06, gnorm=3.236, loss_scale=4, train_wall=11, gb_free=2.8, wall=696 2021-06-18 18:50:33 | INFO | train_inner | epoch 001: 61 / 3002 loss=3.729, ppl=13.26, wps=5843.5, ups=0.09, wpb=64876, bsz=128, num_updates=56, lr=5.6e-06, gnorm=3.073, loss_scale=4, train_wall=11, gb_free=2.8, wall=707 2021-06-18 18:50:44 | INFO | train_inner | epoch 001: 62 / 3002 loss=3.527, ppl=11.53, wps=5894.7, ups=0.09, wpb=64912, bsz=128, num_updates=57, lr=5.7e-06, gnorm=3.38, loss_scale=4, train_wall=11, gb_free=2.8, wall=718 2021-06-18 18:50:55 | INFO | train_inner | epoch 001: 63 / 3002 loss=3.683, ppl=12.84, wps=5855.9, ups=0.09, wpb=64833, bsz=128, num_updates=58, lr=5.8e-06, gnorm=3.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=729 2021-06-18 18:51:06 | INFO | train_inner | epoch 001: 64 / 3002 loss=3.427, ppl=10.75, wps=5883.1, ups=0.09, wpb=64809, bsz=128, num_updates=59, lr=5.9e-06, gnorm=2.923, loss_scale=4, train_wall=11, gb_free=2.8, wall=740 2021-06-18 18:51:17 | INFO | train_inner | epoch 001: 65 / 3002 loss=3.546, ppl=11.68, wps=5743.5, ups=0.09, wpb=64794, bsz=128, num_updates=60, lr=6e-06, gnorm=3.001, loss_scale=4, train_wall=11, gb_free=2.8, wall=751 2021-06-18 18:51:28 | INFO | train_inner | epoch 001: 66 / 3002 loss=3.402, ppl=10.57, wps=5846.9, ups=0.09, wpb=64814, bsz=128, num_updates=61, lr=6.1e-06, gnorm=3.096, loss_scale=4, train_wall=11, gb_free=2.8, wall=762 2021-06-18 18:51:39 | INFO | train_inner | epoch 001: 67 / 3002 loss=3.629, ppl=12.37, wps=5871.9, ups=0.09, wpb=64886, bsz=128, num_updates=62, lr=6.2e-06, gnorm=3.344, loss_scale=4, train_wall=11, gb_free=2.8, wall=773 2021-06-18 18:51:50 | INFO | train_inner | epoch 001: 68 / 3002 loss=3.508, ppl=11.38, wps=5799.7, ups=0.09, wpb=64813, bsz=128, num_updates=63, lr=6.3e-06, gnorm=3.045, loss_scale=4, train_wall=11, gb_free=2.8, wall=785 2021-06-18 18:52:01 | INFO | train_inner | epoch 001: 69 / 3002 loss=3.765, ppl=13.59, wps=5947.1, ups=0.09, wpb=64780, bsz=128, num_updates=64, lr=6.4e-06, gnorm=3.147, loss_scale=4, train_wall=10, gb_free=2.8, wall=795 2021-06-18 18:52:12 | INFO | train_inner | epoch 001: 70 / 3002 loss=3.443, ppl=10.87, wps=5924.5, ups=0.09, wpb=64779, bsz=128, num_updates=65, lr=6.5e-06, gnorm=7.358, loss_scale=4, train_wall=10, gb_free=2.8, wall=806 2021-06-18 18:52:23 | INFO | train_inner | epoch 001: 71 / 3002 loss=3.686, ppl=12.87, wps=5879.2, ups=0.09, wpb=64866, bsz=128, num_updates=66, lr=6.6e-06, gnorm=2.964, loss_scale=4, train_wall=11, gb_free=2.8, wall=817 2021-06-18 18:52:34 | INFO | train_inner | epoch 001: 72 / 3002 loss=3.743, ppl=13.39, wps=5807.3, ups=0.09, wpb=64782, bsz=128, num_updates=67, lr=6.7e-06, gnorm=4.679, loss_scale=4, train_wall=11, gb_free=2.8, wall=829 2021-06-18 18:52:45 | INFO | train_inner | epoch 001: 73 / 3002 loss=3.491, ppl=11.24, wps=5811.3, ups=0.09, wpb=64812, bsz=128, num_updates=68, lr=6.8e-06, gnorm=2.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=840 2021-06-18 18:52:56 | INFO | train_inner | epoch 001: 74 / 3002 loss=3.331, ppl=10.07, wps=5876.1, ups=0.09, wpb=64865, bsz=128, num_updates=69, lr=6.9e-06, gnorm=3.603, loss_scale=4, train_wall=11, gb_free=2.8, wall=851 2021-06-18 18:53:07 | INFO | train_inner | epoch 001: 75 / 3002 loss=3.667, ppl=12.71, wps=5988.2, ups=0.09, wpb=64837, bsz=128, num_updates=70, lr=7e-06, gnorm=3.21, loss_scale=4, train_wall=10, gb_free=2.8, wall=862 2021-06-18 18:53:18 | INFO | train_inner | epoch 001: 76 / 3002 loss=3.651, ppl=12.56, wps=6019.5, ups=0.09, wpb=64814, bsz=128, num_updates=71, lr=7.1e-06, gnorm=3.076, loss_scale=4, train_wall=10, gb_free=2.8, wall=872 2021-06-18 18:53:29 | INFO | train_inner | epoch 001: 77 / 3002 loss=3.605, ppl=12.17, wps=5801.9, ups=0.09, wpb=64806, bsz=128, num_updates=72, lr=7.2e-06, gnorm=3.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=884 2021-06-18 18:53:40 | INFO | train_inner | epoch 001: 78 / 3002 loss=3.463, ppl=11.02, wps=5917.9, ups=0.09, wpb=64821, bsz=128, num_updates=73, lr=7.3e-06, gnorm=3.143, loss_scale=4, train_wall=10, gb_free=2.8, wall=895 2021-06-18 18:53:51 | INFO | train_inner | epoch 001: 79 / 3002 loss=3.452, ppl=10.94, wps=5983.2, ups=0.09, wpb=64793, bsz=128, num_updates=74, lr=7.4e-06, gnorm=3.109, loss_scale=4, train_wall=10, gb_free=2.8, wall=905 2021-06-18 18:54:02 | INFO | train_inner | epoch 001: 80 / 3002 loss=3.514, ppl=11.43, wps=5844.3, ups=0.09, wpb=64847, bsz=128, num_updates=75, lr=7.5e-06, gnorm=3.164, loss_scale=4, train_wall=11, gb_free=2.8, wall=916 2021-06-18 18:54:13 | INFO | train_inner | epoch 001: 81 / 3002 loss=3.581, ppl=11.97, wps=5896.9, ups=0.09, wpb=64862, bsz=128, num_updates=76, lr=7.6e-06, gnorm=2.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=927 2021-06-18 18:54:24 | INFO | train_inner | epoch 001: 82 / 3002 loss=3.385, ppl=10.44, wps=5886, ups=0.09, wpb=64814, bsz=128, num_updates=77, lr=7.7e-06, gnorm=2.943, loss_scale=4, train_wall=11, gb_free=2.8, wall=938 2021-06-18 18:54:35 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-18 18:54:46 | INFO | train_inner | epoch 001: 84 / 3002 loss=3.588, ppl=12.02, wps=2943.6, ups=0.05, wpb=64827, bsz=128, num_updates=78, lr=7.8e-06, gnorm=3.169, loss_scale=2, train_wall=21, gb_free=2.8, wall=960 2021-06-18 18:54:57 | INFO | train_inner | epoch 001: 85 / 3002 loss=3.544, ppl=11.66, wps=5882, ups=0.09, wpb=64908, bsz=128, num_updates=79, lr=7.9e-06, gnorm=3.094, loss_scale=2, train_wall=11, gb_free=2.8, wall=972 2021-06-18 18:55:08 | INFO | train_inner | epoch 001: 86 / 3002 loss=3.604, ppl=12.16, wps=5851.6, ups=0.09, wpb=64866, bsz=128, num_updates=80, lr=8e-06, gnorm=3.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=983 2021-06-18 18:55:19 | INFO | train_inner | epoch 001: 87 / 3002 loss=3.467, ppl=11.06, wps=5878.5, ups=0.09, wpb=64863, bsz=128, num_updates=81, lr=8.1e-06, gnorm=16.576, loss_scale=2, train_wall=11, gb_free=2.8, wall=994 2021-06-18 18:55:31 | INFO | train_inner | epoch 001: 88 / 3002 loss=3.567, ppl=11.85, wps=5734.4, ups=0.09, wpb=64866, bsz=128, num_updates=82, lr=8.2e-06, gnorm=2.973, loss_scale=2, train_wall=11, gb_free=2.8, wall=1005 2021-06-18 18:55:42 | INFO | train_inner | epoch 001: 89 / 3002 loss=3.566, ppl=11.84, wps=5799.7, ups=0.09, wpb=64781, bsz=128, num_updates=83, lr=8.3e-06, gnorm=3.187, loss_scale=2, train_wall=11, gb_free=2.8, wall=1016 2021-06-18 18:55:53 | INFO | train_inner | epoch 001: 90 / 3002 loss=3.55, ppl=11.71, wps=5931.1, ups=0.09, wpb=64837, bsz=128, num_updates=84, lr=8.4e-06, gnorm=2.954, loss_scale=2, train_wall=10, gb_free=2.8, wall=1027 2021-06-18 18:56:04 | INFO | train_inner | epoch 001: 91 / 3002 loss=3.445, ppl=10.89, wps=5843, ups=0.09, wpb=64800, bsz=128, num_updates=85, lr=8.5e-06, gnorm=3.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=1038 2021-06-18 18:56:15 | INFO | train_inner | epoch 001: 92 / 3002 loss=3.574, ppl=11.91, wps=5823.7, ups=0.09, wpb=64844, bsz=128, num_updates=86, lr=8.6e-06, gnorm=6.47, loss_scale=2, train_wall=11, gb_free=2.8, wall=1049 2021-06-18 18:56:26 | INFO | train_inner | epoch 001: 93 / 3002 loss=3.539, ppl=11.62, wps=5845.5, ups=0.09, wpb=64863, bsz=128, num_updates=87, lr=8.7e-06, gnorm=7.98, loss_scale=2, train_wall=11, gb_free=2.8, wall=1060 2021-06-18 18:56:37 | INFO | train_inner | epoch 001: 94 / 3002 loss=3.604, ppl=12.16, wps=5870.7, ups=0.09, wpb=64844, bsz=128, num_updates=88, lr=8.8e-06, gnorm=3.057, loss_scale=2, train_wall=11, gb_free=2.8, wall=1071 2021-06-18 18:56:48 | INFO | train_inner | epoch 001: 95 / 3002 loss=3.525, ppl=11.51, wps=5776, ups=0.09, wpb=64846, bsz=128, num_updates=89, lr=8.9e-06, gnorm=2.861, loss_scale=2, train_wall=11, gb_free=2.8, wall=1083 2021-06-18 18:56:59 | INFO | train_inner | epoch 001: 96 / 3002 loss=3.582, ppl=11.98, wps=5887.9, ups=0.09, wpb=64835, bsz=128, num_updates=90, lr=9e-06, gnorm=3.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=1094 2021-06-18 18:57:10 | INFO | train_inner | epoch 001: 97 / 3002 loss=3.531, ppl=11.56, wps=5943.5, ups=0.09, wpb=64842, bsz=128, num_updates=91, lr=9.1e-06, gnorm=3.046, loss_scale=2, train_wall=10, gb_free=2.8, wall=1105 2021-06-18 18:57:22 | INFO | train_inner | epoch 001: 98 / 3002 loss=3.597, ppl=12.1, wps=5745.5, ups=0.09, wpb=64815, bsz=128, num_updates=92, lr=9.2e-06, gnorm=3.365, loss_scale=2, train_wall=11, gb_free=2.8, wall=1116 2021-06-18 18:57:33 | INFO | train_inner | epoch 001: 99 / 3002 loss=3.484, ppl=11.19, wps=5871, ups=0.09, wpb=64838, bsz=128, num_updates=93, lr=9.3e-06, gnorm=3.044, loss_scale=2, train_wall=11, gb_free=2.8, wall=1127 2021-06-18 18:57:44 | INFO | train_inner | epoch 001: 100 / 3002 loss=3.581, ppl=11.96, wps=5910.5, ups=0.09, wpb=64829, bsz=128, num_updates=94, lr=9.4e-06, gnorm=23.145, loss_scale=2, train_wall=11, gb_free=2.8, wall=1138 2021-06-18 18:57:54 | INFO | train_inner | epoch 001: 101 / 3002 loss=3.488, ppl=11.22, wps=5916.8, ups=0.09, wpb=64848, bsz=128, num_updates=95, lr=9.5e-06, gnorm=2.964, loss_scale=2, train_wall=11, gb_free=2.8, wall=1149 2021-06-18 18:58:06 | INFO | train_inner | epoch 001: 102 / 3002 loss=3.486, ppl=11.21, wps=5767.9, ups=0.09, wpb=64885, bsz=128, num_updates=96, lr=9.6e-06, gnorm=2.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=1160 2021-06-18 18:58:17 | INFO | train_inner | epoch 001: 103 / 3002 loss=3.417, ppl=10.68, wps=5783.4, ups=0.09, wpb=64753, bsz=128, num_updates=97, lr=9.7e-06, gnorm=3.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=1171 2021-06-18 18:58:28 | INFO | train_inner | epoch 001: 104 / 3002 loss=3.475, ppl=11.12, wps=5785.9, ups=0.09, wpb=64789, bsz=128, num_updates=98, lr=9.8e-06, gnorm=3.417, loss_scale=2, train_wall=11, gb_free=2.8, wall=1182 2021-06-18 18:58:39 | INFO | train_inner | epoch 001: 105 / 3002 loss=3.638, ppl=12.45, wps=5872.3, ups=0.09, wpb=64763, bsz=128, num_updates=99, lr=9.9e-06, gnorm=3.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=1193 2021-06-18 18:58:50 | INFO | train_inner | epoch 001: 106 / 3002 loss=3.477, ppl=11.13, wps=5922.1, ups=0.09, wpb=64871, bsz=128, num_updates=100, lr=1e-05, gnorm=2.973, loss_scale=2, train_wall=11, gb_free=2.8, wall=1204 2021-06-18 18:59:01 | INFO | train_inner | epoch 001: 107 / 3002 loss=3.347, ppl=10.18, wps=5863.7, ups=0.09, wpb=64814, bsz=128, num_updates=101, lr=1.01e-05, gnorm=3.047, loss_scale=2, train_wall=11, gb_free=2.8, wall=1215 2021-06-18 18:59:12 | INFO | train_inner | epoch 001: 108 / 3002 loss=3.382, ppl=10.42, wps=5945.6, ups=0.09, wpb=64883, bsz=128, num_updates=102, lr=1.02e-05, gnorm=3.033, loss_scale=2, train_wall=10, gb_free=2.8, wall=1226 2021-06-18 18:59:23 | INFO | train_inner | epoch 001: 109 / 3002 loss=3.581, ppl=11.96, wps=5840.4, ups=0.09, wpb=64847, bsz=128, num_updates=103, lr=1.03e-05, gnorm=2.915, loss_scale=2, train_wall=11, gb_free=2.8, wall=1238 2021-06-18 18:59:34 | INFO | train_inner | epoch 001: 110 / 3002 loss=3.603, ppl=12.15, wps=5843.3, ups=0.09, wpb=64799, bsz=128, num_updates=104, lr=1.04e-05, gnorm=3.019, loss_scale=2, train_wall=11, gb_free=2.8, wall=1249 2021-06-18 18:59:45 | INFO | train_inner | epoch 001: 111 / 3002 loss=3.383, ppl=10.43, wps=5840.8, ups=0.09, wpb=64792, bsz=128, num_updates=105, lr=1.05e-05, gnorm=3.087, loss_scale=2, train_wall=11, gb_free=2.8, wall=1260 2021-06-18 18:59:56 | INFO | train_inner | epoch 001: 112 / 3002 loss=3.447, ppl=10.9, wps=5847.8, ups=0.09, wpb=64856, bsz=128, num_updates=106, lr=1.06e-05, gnorm=2.958, loss_scale=2, train_wall=11, gb_free=2.8, wall=1271 2021-06-18 19:00:08 | INFO | train_inner | epoch 001: 113 / 3002 loss=3.45, ppl=10.93, wps=5762.9, ups=0.09, wpb=64824, bsz=128, num_updates=107, lr=1.07e-05, gnorm=3.007, loss_scale=2, train_wall=11, gb_free=2.8, wall=1282 2021-06-18 19:00:19 | INFO | train_inner | epoch 001: 114 / 3002 loss=3.716, ppl=13.14, wps=5829.3, ups=0.09, wpb=64877, bsz=128, num_updates=108, lr=1.08e-05, gnorm=2.867, loss_scale=2, train_wall=11, gb_free=2.8, wall=1293 2021-06-18 19:00:30 | INFO | train_inner | epoch 001: 115 / 3002 loss=3.588, ppl=12.02, wps=5979.2, ups=0.09, wpb=64730, bsz=128, num_updates=109, lr=1.09e-05, gnorm=3.095, loss_scale=2, train_wall=10, gb_free=2.8, wall=1304 2021-06-18 19:00:41 | INFO | train_inner | epoch 001: 116 / 3002 loss=3.468, ppl=11.07, wps=5803.5, ups=0.09, wpb=64820, bsz=128, num_updates=110, lr=1.1e-05, gnorm=3.117, loss_scale=2, train_wall=11, gb_free=2.8, wall=1315 2021-06-18 19:00:52 | INFO | train_inner | epoch 001: 117 / 3002 loss=3.584, ppl=11.99, wps=5779.5, ups=0.09, wpb=64792, bsz=128, num_updates=111, lr=1.11e-05, gnorm=3.027, loss_scale=2, train_wall=11, gb_free=2.8, wall=1326 2021-06-18 19:01:03 | INFO | train_inner | epoch 001: 118 / 3002 loss=3.58, ppl=11.96, wps=5717.5, ups=0.09, wpb=64824, bsz=128, num_updates=112, lr=1.12e-05, gnorm=3.164, loss_scale=2, train_wall=11, gb_free=2.8, wall=1338 2021-06-18 19:01:14 | INFO | train_inner | epoch 001: 119 / 3002 loss=3.382, ppl=10.42, wps=5844.6, ups=0.09, wpb=64822, bsz=128, num_updates=113, lr=1.13e-05, gnorm=3.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=1349 2021-06-18 19:01:25 | INFO | train_inner | epoch 001: 120 / 3002 loss=3.632, ppl=12.4, wps=5913.8, ups=0.09, wpb=64801, bsz=128, num_updates=114, lr=1.14e-05, gnorm=3.396, loss_scale=2, train_wall=11, gb_free=2.8, wall=1360 2021-06-18 19:01:37 | INFO | train_inner | epoch 001: 121 / 3002 loss=3.39, ppl=10.49, wps=5838.9, ups=0.09, wpb=64857, bsz=128, num_updates=115, lr=1.15e-05, gnorm=3.112, loss_scale=2, train_wall=11, gb_free=2.8, wall=1371 2021-06-18 19:01:48 | INFO | train_inner | epoch 001: 122 / 3002 loss=3.321, ppl=9.99, wps=5886.8, ups=0.09, wpb=64862, bsz=128, num_updates=116, lr=1.16e-05, gnorm=3.513, loss_scale=2, train_wall=11, gb_free=2.8, wall=1382 2021-06-18 19:01:59 | INFO | train_inner | epoch 001: 123 / 3002 loss=3.45, ppl=10.93, wps=5881.6, ups=0.09, wpb=64896, bsz=128, num_updates=117, lr=1.17e-05, gnorm=2.984, loss_scale=2, train_wall=11, gb_free=2.8, wall=1393 2021-06-18 19:02:10 | INFO | train_inner | epoch 001: 124 / 3002 loss=3.449, ppl=10.92, wps=5923.3, ups=0.09, wpb=64871, bsz=128, num_updates=118, lr=1.18e-05, gnorm=2.902, loss_scale=2, train_wall=10, gb_free=2.8, wall=1404 2021-06-18 19:02:21 | INFO | train_inner | epoch 001: 125 / 3002 loss=3.623, ppl=12.32, wps=5829, ups=0.09, wpb=64841, bsz=128, num_updates=119, lr=1.19e-05, gnorm=3.024, loss_scale=2, train_wall=11, gb_free=2.8, wall=1415 2021-06-18 19:02:32 | INFO | train_inner | epoch 001: 126 / 3002 loss=3.378, ppl=10.4, wps=5930.4, ups=0.09, wpb=64879, bsz=128, num_updates=120, lr=1.2e-05, gnorm=7.351, loss_scale=2, train_wall=10, gb_free=2.8, wall=1426 2021-06-18 19:02:43 | INFO | train_inner | epoch 001: 127 / 3002 loss=3.348, ppl=10.18, wps=5849.2, ups=0.09, wpb=64822, bsz=128, num_updates=121, lr=1.21e-05, gnorm=2.942, loss_scale=2, train_wall=11, gb_free=2.8, wall=1437 2021-06-18 19:02:54 | INFO | train_inner | epoch 001: 128 / 3002 loss=3.554, ppl=11.75, wps=5826, ups=0.09, wpb=64753, bsz=128, num_updates=122, lr=1.22e-05, gnorm=17.434, loss_scale=2, train_wall=11, gb_free=2.8, wall=1448 2021-06-18 19:03:05 | INFO | train_inner | epoch 001: 129 / 3002 loss=3.537, ppl=11.61, wps=5921.9, ups=0.09, wpb=64842, bsz=128, num_updates=123, lr=1.23e-05, gnorm=2.883, loss_scale=2, train_wall=10, gb_free=2.8, wall=1459 2021-06-18 19:03:16 | INFO | train_inner | epoch 001: 130 / 3002 loss=3.508, ppl=11.38, wps=5876.6, ups=0.09, wpb=64847, bsz=128, num_updates=124, lr=1.24e-05, gnorm=3.024, loss_scale=2, train_wall=11, gb_free=2.8, wall=1470 2021-06-18 19:03:27 | INFO | train_inner | epoch 001: 131 / 3002 loss=3.607, ppl=12.19, wps=5944.3, ups=0.09, wpb=64818, bsz=128, num_updates=125, lr=1.25e-05, gnorm=3.138, loss_scale=2, train_wall=10, gb_free=2.8, wall=1481 2021-06-18 19:03:38 | INFO | train_inner | epoch 001: 132 / 3002 loss=3.462, ppl=11.02, wps=5803.8, ups=0.09, wpb=64804, bsz=128, num_updates=126, lr=1.26e-05, gnorm=3.033, loss_scale=2, train_wall=11, gb_free=2.8, wall=1492 2021-06-18 19:03:49 | INFO | train_inner | epoch 001: 133 / 3002 loss=3.424, ppl=10.73, wps=5864.9, ups=0.09, wpb=64800, bsz=128, num_updates=127, lr=1.27e-05, gnorm=3.08, loss_scale=2, train_wall=11, gb_free=2.8, wall=1503 2021-06-18 19:04:00 | INFO | train_inner | epoch 001: 134 / 3002 loss=3.443, ppl=10.88, wps=5811.6, ups=0.09, wpb=64827, bsz=128, num_updates=128, lr=1.28e-05, gnorm=2.986, loss_scale=2, train_wall=11, gb_free=2.8, wall=1514 2021-06-18 19:04:11 | INFO | train_inner | epoch 001: 135 / 3002 loss=3.407, ppl=10.61, wps=5810.3, ups=0.09, wpb=64878, bsz=128, num_updates=129, lr=1.29e-05, gnorm=2.95, loss_scale=2, train_wall=11, gb_free=2.8, wall=1526 2021-06-18 19:04:22 | INFO | train_inner | epoch 001: 136 / 3002 loss=3.467, ppl=11.06, wps=5862.2, ups=0.09, wpb=64742, bsz=128, num_updates=130, lr=1.3e-05, gnorm=4.61, loss_scale=2, train_wall=11, gb_free=2.8, wall=1537 2021-06-18 19:04:33 | INFO | train_inner | epoch 001: 137 / 3002 loss=3.528, ppl=11.53, wps=5932.7, ups=0.09, wpb=64823, bsz=128, num_updates=131, lr=1.31e-05, gnorm=3.098, loss_scale=2, train_wall=10, gb_free=2.8, wall=1548 2021-06-18 19:04:44 | INFO | train_inner | epoch 001: 138 / 3002 loss=3.404, ppl=10.59, wps=5904.9, ups=0.09, wpb=64865, bsz=128, num_updates=132, lr=1.32e-05, gnorm=2.954, loss_scale=2, train_wall=11, gb_free=2.8, wall=1559 2021-06-18 19:04:55 | INFO | train_inner | epoch 001: 139 / 3002 loss=3.435, ppl=10.82, wps=5783.1, ups=0.09, wpb=64787, bsz=128, num_updates=133, lr=1.33e-05, gnorm=3.084, loss_scale=2, train_wall=11, gb_free=2.8, wall=1570 2021-06-18 19:05:07 | INFO | train_inner | epoch 001: 140 / 3002 loss=3.447, ppl=10.91, wps=5799.7, ups=0.09, wpb=64787, bsz=128, num_updates=134, lr=1.34e-05, gnorm=3.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=1581 2021-06-18 19:05:18 | INFO | train_inner | epoch 001: 141 / 3002 loss=3.367, ppl=10.32, wps=5858.6, ups=0.09, wpb=64773, bsz=128, num_updates=135, lr=1.35e-05, gnorm=4.146, loss_scale=2, train_wall=11, gb_free=2.8, wall=1592 2021-06-18 19:05:29 | INFO | train_inner | epoch 001: 142 / 3002 loss=3.391, ppl=10.49, wps=5766.8, ups=0.09, wpb=64821, bsz=128, num_updates=136, lr=1.36e-05, gnorm=2.888, loss_scale=2, train_wall=11, gb_free=2.8, wall=1603 2021-06-18 19:05:40 | INFO | train_inner | epoch 001: 143 / 3002 loss=3.413, ppl=10.65, wps=5856.9, ups=0.09, wpb=64828, bsz=128, num_updates=137, lr=1.37e-05, gnorm=3.111, loss_scale=2, train_wall=11, gb_free=2.8, wall=1614 2021-06-18 19:05:51 | INFO | train_inner | epoch 001: 144 / 3002 loss=3.643, ppl=12.5, wps=5819.1, ups=0.09, wpb=64898, bsz=128, num_updates=138, lr=1.38e-05, gnorm=3.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=1625 2021-06-18 19:06:02 | INFO | train_inner | epoch 001: 145 / 3002 loss=3.348, ppl=10.18, wps=5823.7, ups=0.09, wpb=64824, bsz=128, num_updates=139, lr=1.39e-05, gnorm=12.752, loss_scale=2, train_wall=11, gb_free=2.8, wall=1637 2021-06-18 19:06:13 | INFO | train_inner | epoch 001: 146 / 3002 loss=3.541, ppl=11.64, wps=5879.5, ups=0.09, wpb=64803, bsz=128, num_updates=140, lr=1.4e-05, gnorm=2.98, loss_scale=2, train_wall=11, gb_free=2.8, wall=1648 2021-06-18 19:06:24 | INFO | train_inner | epoch 001: 147 / 3002 loss=3.5, ppl=11.32, wps=5820.1, ups=0.09, wpb=64846, bsz=128, num_updates=141, lr=1.41e-05, gnorm=3.14, loss_scale=2, train_wall=11, gb_free=2.8, wall=1659 2021-06-18 19:06:35 | INFO | train_inner | epoch 001: 148 / 3002 loss=3.533, ppl=11.58, wps=5839.8, ups=0.09, wpb=64727, bsz=128, num_updates=142, lr=1.42e-05, gnorm=3.135, loss_scale=2, train_wall=11, gb_free=2.8, wall=1670 2021-06-18 19:06:47 | INFO | train_inner | epoch 001: 149 / 3002 loss=3.292, ppl=9.8, wps=5843, ups=0.09, wpb=64761, bsz=128, num_updates=143, lr=1.43e-05, gnorm=3.472, loss_scale=2, train_wall=11, gb_free=2.8, wall=1681 2021-06-18 19:06:57 | INFO | train_inner | epoch 001: 150 / 3002 loss=3.415, ppl=10.67, wps=6021.2, ups=0.09, wpb=64921, bsz=128, num_updates=144, lr=1.44e-05, gnorm=2.809, loss_scale=2, train_wall=10, gb_free=2.8, wall=1692 2021-06-18 19:07:08 | INFO | train_inner | epoch 001: 151 / 3002 loss=3.769, ppl=13.63, wps=5812.2, ups=0.09, wpb=64866, bsz=128, num_updates=145, lr=1.45e-05, gnorm=3.187, loss_scale=2, train_wall=11, gb_free=2.8, wall=1703 2021-06-18 19:07:20 | INFO | train_inner | epoch 001: 152 / 3002 loss=3.233, ppl=9.4, wps=5868.6, ups=0.09, wpb=64826, bsz=128, num_updates=146, lr=1.46e-05, gnorm=6.437, loss_scale=2, train_wall=11, gb_free=2.8, wall=1714 2021-06-18 19:07:31 | INFO | train_inner | epoch 001: 153 / 3002 loss=3.38, ppl=10.41, wps=5822.9, ups=0.09, wpb=64818, bsz=128, num_updates=147, lr=1.47e-05, gnorm=3.188, loss_scale=2, train_wall=11, gb_free=2.8, wall=1725 2021-06-18 19:07:42 | INFO | train_inner | epoch 001: 154 / 3002 loss=3.429, ppl=10.77, wps=5942.2, ups=0.09, wpb=64760, bsz=128, num_updates=148, lr=1.48e-05, gnorm=3.248, loss_scale=2, train_wall=10, gb_free=2.8, wall=1736 2021-06-18 19:07:52 | INFO | train_inner | epoch 001: 155 / 3002 loss=3.559, ppl=11.79, wps=6051.6, ups=0.09, wpb=64943, bsz=128, num_updates=149, lr=1.49e-05, gnorm=3.4, loss_scale=2, train_wall=10, gb_free=2.8, wall=1747 2021-06-18 19:08:03 | INFO | train_inner | epoch 001: 156 / 3002 loss=3.588, ppl=12.03, wps=5874.2, ups=0.09, wpb=64801, bsz=128, num_updates=150, lr=1.5e-05, gnorm=11.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=1758 2021-06-18 19:08:14 | INFO | train_inner | epoch 001: 157 / 3002 loss=3.446, ppl=10.9, wps=5932.6, ups=0.09, wpb=64811, bsz=128, num_updates=151, lr=1.51e-05, gnorm=2.893, loss_scale=2, train_wall=10, gb_free=2.8, wall=1769 2021-06-18 19:08:25 | INFO | train_inner | epoch 001: 158 / 3002 loss=3.567, ppl=11.85, wps=5804.1, ups=0.09, wpb=64907, bsz=128, num_updates=152, lr=1.52e-05, gnorm=3.141, loss_scale=2, train_wall=11, gb_free=2.8, wall=1780 2021-06-18 19:08:36 | INFO | train_inner | epoch 001: 159 / 3002 loss=3.389, ppl=10.47, wps=5901.7, ups=0.09, wpb=64862, bsz=128, num_updates=153, lr=1.53e-05, gnorm=3.414, loss_scale=2, train_wall=11, gb_free=2.8, wall=1791 2021-06-18 19:08:47 | INFO | train_inner | epoch 001: 160 / 3002 loss=3.366, ppl=10.31, wps=5898.9, ups=0.09, wpb=64898, bsz=128, num_updates=154, lr=1.54e-05, gnorm=5.62, loss_scale=2, train_wall=11, gb_free=2.8, wall=1802 2021-06-18 19:08:58 | INFO | train_inner | epoch 001: 161 / 3002 loss=3.262, ppl=9.59, wps=5873, ups=0.09, wpb=64909, bsz=128, num_updates=155, lr=1.55e-05, gnorm=3.032, loss_scale=2, train_wall=11, gb_free=2.8, wall=1813 2021-06-18 19:09:10 | INFO | train_inner | epoch 001: 162 / 3002 loss=3.356, ppl=10.24, wps=5772.7, ups=0.09, wpb=64855, bsz=128, num_updates=156, lr=1.56e-05, gnorm=3.294, loss_scale=2, train_wall=11, gb_free=2.8, wall=1824 2021-06-18 19:09:21 | INFO | train_inner | epoch 001: 163 / 3002 loss=3.42, ppl=10.71, wps=5863.4, ups=0.09, wpb=64873, bsz=128, num_updates=157, lr=1.57e-05, gnorm=3.039, loss_scale=2, train_wall=11, gb_free=2.8, wall=1835 2021-06-18 19:09:32 | INFO | train_inner | epoch 001: 164 / 3002 loss=3.529, ppl=11.54, wps=5804.4, ups=0.09, wpb=64882, bsz=128, num_updates=158, lr=1.58e-05, gnorm=2.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=1846 2021-06-18 19:09:43 | INFO | train_inner | epoch 001: 165 / 3002 loss=3.487, ppl=11.21, wps=5897.5, ups=0.09, wpb=64824, bsz=128, num_updates=159, lr=1.59e-05, gnorm=3.069, loss_scale=2, train_wall=11, gb_free=2.8, wall=1857 2021-06-18 19:09:54 | INFO | train_inner | epoch 001: 166 / 3002 loss=3.576, ppl=11.92, wps=5844.6, ups=0.09, wpb=64881, bsz=128, num_updates=160, lr=1.6e-05, gnorm=3.118, loss_scale=2, train_wall=11, gb_free=2.8, wall=1868 2021-06-18 19:10:05 | INFO | train_inner | epoch 001: 167 / 3002 loss=3.5, ppl=11.32, wps=5842.5, ups=0.09, wpb=64863, bsz=128, num_updates=161, lr=1.61e-05, gnorm=3.346, loss_scale=2, train_wall=11, gb_free=2.8, wall=1879 2021-06-18 19:10:16 | INFO | train_inner | epoch 001: 168 / 3002 loss=3.649, ppl=12.54, wps=5869, ups=0.09, wpb=64868, bsz=128, num_updates=162, lr=1.62e-05, gnorm=3.137, loss_scale=2, train_wall=11, gb_free=2.8, wall=1891 2021-06-18 19:10:27 | INFO | train_inner | epoch 001: 169 / 3002 loss=3.38, ppl=10.41, wps=5922, ups=0.09, wpb=64767, bsz=128, num_updates=163, lr=1.63e-05, gnorm=3.086, loss_scale=2, train_wall=10, gb_free=2.8, wall=1901 2021-06-18 19:10:38 | INFO | train_inner | epoch 001: 170 / 3002 loss=3.452, ppl=10.94, wps=6013.5, ups=0.09, wpb=64911, bsz=128, num_updates=164, lr=1.64e-05, gnorm=3.112, loss_scale=2, train_wall=10, gb_free=2.8, wall=1912 2021-06-18 19:10:49 | INFO | train_inner | epoch 001: 171 / 3002 loss=3.33, ppl=10.06, wps=5962.9, ups=0.09, wpb=64879, bsz=128, num_updates=165, lr=1.65e-05, gnorm=3.192, loss_scale=2, train_wall=10, gb_free=2.8, wall=1923 2021-06-18 19:11:00 | INFO | train_inner | epoch 001: 172 / 3002 loss=3.442, ppl=10.87, wps=5955.2, ups=0.09, wpb=64804, bsz=128, num_updates=166, lr=1.66e-05, gnorm=3.35, loss_scale=2, train_wall=10, gb_free=2.8, wall=1934 2021-06-18 19:11:11 | INFO | train_inner | epoch 001: 173 / 3002 loss=3.369, ppl=10.33, wps=5796.5, ups=0.09, wpb=64871, bsz=128, num_updates=167, lr=1.67e-05, gnorm=2.965, loss_scale=2, train_wall=11, gb_free=2.8, wall=1945 2021-06-18 19:11:22 | INFO | train_inner | epoch 001: 174 / 3002 loss=3.391, ppl=10.49, wps=5966.1, ups=0.09, wpb=64751, bsz=128, num_updates=168, lr=1.68e-05, gnorm=3.05, loss_scale=2, train_wall=10, gb_free=2.8, wall=1956 2021-06-18 19:11:33 | INFO | train_inner | epoch 001: 175 / 3002 loss=3.51, ppl=11.39, wps=5939.7, ups=0.09, wpb=64854, bsz=128, num_updates=169, lr=1.69e-05, gnorm=2.916, loss_scale=2, train_wall=10, gb_free=2.8, wall=1967 2021-06-18 19:11:44 | INFO | train_inner | epoch 001: 176 / 3002 loss=3.422, ppl=10.72, wps=5854.5, ups=0.09, wpb=64754, bsz=128, num_updates=170, lr=1.7e-05, gnorm=3.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=1978 2021-06-18 19:11:55 | INFO | train_inner | epoch 001: 177 / 3002 loss=3.559, ppl=11.78, wps=5864.7, ups=0.09, wpb=64868, bsz=128, num_updates=171, lr=1.71e-05, gnorm=2.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=1989 2021-06-18 19:12:06 | INFO | train_inner | epoch 001: 178 / 3002 loss=3.482, ppl=11.17, wps=5986.7, ups=0.09, wpb=64891, bsz=128, num_updates=172, lr=1.72e-05, gnorm=3.149, loss_scale=2, train_wall=10, gb_free=2.8, wall=2000 2021-06-18 19:12:17 | INFO | train_inner | epoch 001: 179 / 3002 loss=3.358, ppl=10.26, wps=5867.3, ups=0.09, wpb=64836, bsz=128, num_updates=173, lr=1.73e-05, gnorm=3.106, loss_scale=2, train_wall=11, gb_free=2.8, wall=2011 2021-06-18 19:12:28 | INFO | train_inner | epoch 001: 180 / 3002 loss=3.508, ppl=11.38, wps=5832.6, ups=0.09, wpb=64859, bsz=128, num_updates=174, lr=1.74e-05, gnorm=3.24, loss_scale=2, train_wall=11, gb_free=2.8, wall=2022 2021-06-18 19:12:39 | INFO | train_inner | epoch 001: 181 / 3002 loss=3.473, ppl=11.1, wps=5878.3, ups=0.09, wpb=64898, bsz=128, num_updates=175, lr=1.75e-05, gnorm=3.311, loss_scale=2, train_wall=11, gb_free=2.8, wall=2033 2021-06-18 19:12:50 | INFO | train_inner | epoch 001: 182 / 3002 loss=3.449, ppl=10.92, wps=5900.9, ups=0.09, wpb=64903, bsz=128, num_updates=176, lr=1.76e-05, gnorm=2.915, loss_scale=2, train_wall=11, gb_free=2.8, wall=2044 2021-06-18 19:13:01 | INFO | train_inner | epoch 001: 183 / 3002 loss=3.744, ppl=13.4, wps=5814.4, ups=0.09, wpb=64757, bsz=128, num_updates=177, lr=1.77e-05, gnorm=3.065, loss_scale=2, train_wall=11, gb_free=2.8, wall=2055 2021-06-18 19:13:12 | INFO | train_inner | epoch 001: 184 / 3002 loss=3.471, ppl=11.09, wps=5797.8, ups=0.09, wpb=64834, bsz=128, num_updates=178, lr=1.78e-05, gnorm=3.413, loss_scale=2, train_wall=11, gb_free=2.8, wall=2066 2021-06-18 19:13:23 | INFO | train_inner | epoch 001: 185 / 3002 loss=3.397, ppl=10.53, wps=5901.7, ups=0.09, wpb=64842, bsz=128, num_updates=179, lr=1.79e-05, gnorm=3.273, loss_scale=2, train_wall=11, gb_free=2.8, wall=2077 2021-06-18 19:13:34 | INFO | train_inner | epoch 001: 186 / 3002 loss=3.404, ppl=10.58, wps=5860.5, ups=0.09, wpb=64856, bsz=128, num_updates=180, lr=1.8e-05, gnorm=3.012, loss_scale=2, train_wall=11, gb_free=2.8, wall=2089 2021-06-18 19:13:45 | INFO | train_inner | epoch 001: 187 / 3002 loss=3.425, ppl=10.74, wps=5875.1, ups=0.09, wpb=64851, bsz=128, num_updates=181, lr=1.81e-05, gnorm=2.936, loss_scale=2, train_wall=11, gb_free=2.8, wall=2100 2021-06-18 19:13:56 | INFO | train_inner | epoch 001: 188 / 3002 loss=3.193, ppl=9.14, wps=5898.3, ups=0.09, wpb=64812, bsz=128, num_updates=182, lr=1.82e-05, gnorm=2.886, loss_scale=2, train_wall=11, gb_free=2.8, wall=2111 2021-06-18 19:14:07 | INFO | train_inner | epoch 001: 189 / 3002 loss=3.446, ppl=10.9, wps=5896.1, ups=0.09, wpb=64795, bsz=128, num_updates=183, lr=1.83e-05, gnorm=14.423, loss_scale=2, train_wall=11, gb_free=2.8, wall=2122 2021-06-18 19:14:18 | INFO | train_inner | epoch 001: 190 / 3002 loss=3.494, ppl=11.26, wps=5846.3, ups=0.09, wpb=64770, bsz=128, num_updates=184, lr=1.84e-05, gnorm=9.766, loss_scale=2, train_wall=11, gb_free=2.8, wall=2133 2021-06-18 19:14:30 | INFO | train_inner | epoch 001: 191 / 3002 loss=3.347, ppl=10.17, wps=5743.1, ups=0.09, wpb=64918, bsz=128, num_updates=185, lr=1.85e-05, gnorm=2.956, loss_scale=2, train_wall=11, gb_free=2.8, wall=2144 2021-06-18 19:14:41 | INFO | train_inner | epoch 001: 192 / 3002 loss=3.419, ppl=10.7, wps=5870.6, ups=0.09, wpb=64742, bsz=128, num_updates=186, lr=1.86e-05, gnorm=2.935, loss_scale=2, train_wall=11, gb_free=2.8, wall=2155 2021-06-18 19:14:52 | INFO | train_inner | epoch 001: 193 / 3002 loss=3.36, ppl=10.27, wps=5802.6, ups=0.09, wpb=64865, bsz=128, num_updates=187, lr=1.87e-05, gnorm=2.964, loss_scale=2, train_wall=11, gb_free=2.8, wall=2166 2021-06-18 19:15:03 | INFO | train_inner | epoch 001: 194 / 3002 loss=3.439, ppl=10.84, wps=5970.1, ups=0.09, wpb=64815, bsz=128, num_updates=188, lr=1.88e-05, gnorm=2.915, loss_scale=2, train_wall=10, gb_free=2.8, wall=2177 2021-06-18 19:15:14 | INFO | train_inner | epoch 001: 195 / 3002 loss=3.327, ppl=10.03, wps=5790.1, ups=0.09, wpb=64769, bsz=128, num_updates=189, lr=1.89e-05, gnorm=7.126, loss_scale=2, train_wall=11, gb_free=2.8, wall=2188 2021-06-18 19:15:25 | INFO | train_inner | epoch 001: 196 / 3002 loss=3.35, ppl=10.19, wps=5843.9, ups=0.09, wpb=64816, bsz=128, num_updates=190, lr=1.9e-05, gnorm=3.441, loss_scale=2, train_wall=11, gb_free=2.8, wall=2199 2021-06-18 19:15:36 | INFO | train_inner | epoch 001: 197 / 3002 loss=3.313, ppl=9.94, wps=5877.9, ups=0.09, wpb=64801, bsz=128, num_updates=191, lr=1.91e-05, gnorm=2.91, loss_scale=2, train_wall=11, gb_free=2.8, wall=2210 2021-06-18 19:15:47 | INFO | train_inner | epoch 001: 198 / 3002 loss=3.42, ppl=10.71, wps=5806.5, ups=0.09, wpb=64898, bsz=128, num_updates=192, lr=1.92e-05, gnorm=2.945, loss_scale=2, train_wall=11, gb_free=2.8, wall=2221 2021-06-18 19:15:58 | INFO | train_inner | epoch 001: 199 / 3002 loss=3.533, ppl=11.58, wps=5790.3, ups=0.09, wpb=64868, bsz=128, num_updates=193, lr=1.93e-05, gnorm=3.248, loss_scale=2, train_wall=11, gb_free=2.8, wall=2233 2021-06-18 19:16:10 | INFO | train_inner | epoch 001: 200 / 3002 loss=3.446, ppl=10.9, wps=5795, ups=0.09, wpb=64915, bsz=128, num_updates=194, lr=1.94e-05, gnorm=4.842, loss_scale=2, train_wall=11, gb_free=2.8, wall=2244 2021-06-18 19:16:21 | INFO | train_inner | epoch 001: 201 / 3002 loss=3.439, ppl=10.85, wps=5874.6, ups=0.09, wpb=64750, bsz=128, num_updates=195, lr=1.95e-05, gnorm=3.008, loss_scale=2, train_wall=11, gb_free=2.8, wall=2255 2021-06-18 19:16:32 | INFO | train_inner | epoch 001: 202 / 3002 loss=3.51, ppl=11.39, wps=5892.5, ups=0.09, wpb=64779, bsz=128, num_updates=196, lr=1.96e-05, gnorm=3.035, loss_scale=2, train_wall=11, gb_free=2.8, wall=2266 2021-06-18 19:16:43 | INFO | train_inner | epoch 001: 203 / 3002 loss=3.338, ppl=10.11, wps=5849.1, ups=0.09, wpb=64843, bsz=128, num_updates=197, lr=1.97e-05, gnorm=22.318, loss_scale=2, train_wall=11, gb_free=2.8, wall=2277 2021-06-18 19:16:54 | INFO | train_inner | epoch 001: 204 / 3002 loss=3.314, ppl=9.95, wps=5780.5, ups=0.09, wpb=64841, bsz=128, num_updates=198, lr=1.98e-05, gnorm=2.946, loss_scale=2, train_wall=11, gb_free=2.8, wall=2288 2021-06-18 19:17:05 | INFO | train_inner | epoch 001: 205 / 3002 loss=3.386, ppl=10.45, wps=5898.2, ups=0.09, wpb=64798, bsz=128, num_updates=199, lr=1.99e-05, gnorm=3.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=2299 2021-06-18 19:17:16 | INFO | train_inner | epoch 001: 206 / 3002 loss=3.462, ppl=11.02, wps=5921.7, ups=0.09, wpb=64887, bsz=128, num_updates=200, lr=2e-05, gnorm=2.987, loss_scale=2, train_wall=11, gb_free=2.8, wall=2310 2021-06-18 19:17:27 | INFO | train_inner | epoch 001: 207 / 3002 loss=3.57, ppl=11.88, wps=5985.1, ups=0.09, wpb=64901, bsz=128, num_updates=201, lr=2.01e-05, gnorm=3.097, loss_scale=2, train_wall=10, gb_free=2.8, wall=2321 2021-06-18 19:17:38 | INFO | train_inner | epoch 001: 208 / 3002 loss=3.482, ppl=11.18, wps=5895.1, ups=0.09, wpb=64826, bsz=128, num_updates=202, lr=2.02e-05, gnorm=3.143, loss_scale=2, train_wall=11, gb_free=2.8, wall=2332 2021-06-18 19:17:49 | INFO | train_inner | epoch 001: 209 / 3002 loss=3.588, ppl=12.03, wps=5776, ups=0.09, wpb=64763, bsz=128, num_updates=203, lr=2.03e-05, gnorm=3.086, loss_scale=2, train_wall=11, gb_free=2.8, wall=2343 2021-06-18 19:18:00 | INFO | train_inner | epoch 001: 210 / 3002 loss=3.174, ppl=9.02, wps=5890.1, ups=0.09, wpb=64907, bsz=128, num_updates=204, lr=2.04e-05, gnorm=3.306, loss_scale=2, train_wall=11, gb_free=2.8, wall=2354 2021-06-18 19:18:11 | INFO | train_inner | epoch 001: 211 / 3002 loss=3.432, ppl=10.79, wps=5893.5, ups=0.09, wpb=64823, bsz=128, num_updates=205, lr=2.05e-05, gnorm=3.057, loss_scale=4, train_wall=11, gb_free=2.8, wall=2365 2021-06-18 19:18:22 | INFO | train_inner | epoch 001: 212 / 3002 loss=3.2, ppl=9.19, wps=5888.4, ups=0.09, wpb=64864, bsz=128, num_updates=206, lr=2.06e-05, gnorm=3.072, loss_scale=4, train_wall=11, gb_free=2.8, wall=2376 2021-06-18 19:18:33 | INFO | train_inner | epoch 001: 213 / 3002 loss=3.423, ppl=10.73, wps=5870.3, ups=0.09, wpb=64827, bsz=128, num_updates=207, lr=2.07e-05, gnorm=2.898, loss_scale=4, train_wall=11, gb_free=2.8, wall=2387 2021-06-18 19:18:44 | INFO | train_inner | epoch 001: 214 / 3002 loss=3.449, ppl=10.92, wps=5776.5, ups=0.09, wpb=64749, bsz=128, num_updates=208, lr=2.08e-05, gnorm=2.983, loss_scale=4, train_wall=11, gb_free=2.8, wall=2398 2021-06-18 19:18:55 | INFO | train_inner | epoch 001: 215 / 3002 loss=3.231, ppl=9.39, wps=5916.7, ups=0.09, wpb=64911, bsz=128, num_updates=209, lr=2.09e-05, gnorm=2.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=2409 2021-06-18 19:19:06 | INFO | train_inner | epoch 001: 216 / 3002 loss=3.397, ppl=10.54, wps=5851.3, ups=0.09, wpb=64852, bsz=128, num_updates=210, lr=2.1e-05, gnorm=3.301, loss_scale=4, train_wall=11, gb_free=2.8, wall=2421 2021-06-18 19:19:17 | INFO | train_inner | epoch 001: 217 / 3002 loss=3.417, ppl=10.68, wps=5876.1, ups=0.09, wpb=64867, bsz=128, num_updates=211, lr=2.11e-05, gnorm=2.915, loss_scale=4, train_wall=11, gb_free=2.8, wall=2432 2021-06-18 19:19:28 | INFO | train_inner | epoch 001: 218 / 3002 loss=3.413, ppl=10.65, wps=5805.6, ups=0.09, wpb=64866, bsz=128, num_updates=212, lr=2.12e-05, gnorm=3.103, loss_scale=4, train_wall=11, gb_free=2.8, wall=2443 2021-06-18 19:19:40 | INFO | train_inner | epoch 001: 219 / 3002 loss=3.542, ppl=11.65, wps=5847.3, ups=0.09, wpb=64784, bsz=128, num_updates=213, lr=2.13e-05, gnorm=2.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=2454 2021-06-18 19:19:50 | INFO | train_inner | epoch 001: 220 / 3002 loss=3.353, ppl=10.22, wps=5953.6, ups=0.09, wpb=64809, bsz=128, num_updates=214, lr=2.14e-05, gnorm=3.491, loss_scale=4, train_wall=10, gb_free=2.8, wall=2465 2021-06-18 19:20:01 | INFO | train_inner | epoch 001: 221 / 3002 loss=3.545, ppl=11.67, wps=5865.9, ups=0.09, wpb=64836, bsz=128, num_updates=215, lr=2.15e-05, gnorm=3.048, loss_scale=4, train_wall=11, gb_free=2.8, wall=2476 2021-06-18 19:20:12 | INFO | train_inner | epoch 001: 222 / 3002 loss=3.55, ppl=11.72, wps=5867.6, ups=0.09, wpb=64693, bsz=128, num_updates=216, lr=2.16e-05, gnorm=2.93, loss_scale=4, train_wall=11, gb_free=2.8, wall=2487 2021-06-18 19:20:23 | INFO | train_inner | epoch 001: 223 / 3002 loss=3.473, ppl=11.1, wps=5886.7, ups=0.09, wpb=64800, bsz=128, num_updates=217, lr=2.17e-05, gnorm=3.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=2498 2021-06-18 19:20:34 | INFO | train_inner | epoch 001: 224 / 3002 loss=3.355, ppl=10.23, wps=5899.4, ups=0.09, wpb=64875, bsz=128, num_updates=218, lr=2.18e-05, gnorm=3.297, loss_scale=4, train_wall=11, gb_free=2.8, wall=2509 2021-06-18 19:20:46 | INFO | train_inner | epoch 001: 225 / 3002 loss=3.395, ppl=10.52, wps=5806.1, ups=0.09, wpb=64789, bsz=128, num_updates=219, lr=2.19e-05, gnorm=2.891, loss_scale=4, train_wall=11, gb_free=2.8, wall=2520 2021-06-18 19:20:57 | INFO | train_inner | epoch 001: 226 / 3002 loss=3.369, ppl=10.33, wps=5841.8, ups=0.09, wpb=64895, bsz=128, num_updates=220, lr=2.2e-05, gnorm=3.323, loss_scale=4, train_wall=11, gb_free=2.8, wall=2531 2021-06-18 19:21:08 | INFO | train_inner | epoch 001: 227 / 3002 loss=3.363, ppl=10.29, wps=5915.1, ups=0.09, wpb=64853, bsz=128, num_updates=221, lr=2.21e-05, gnorm=2.839, loss_scale=4, train_wall=11, gb_free=2.8, wall=2542 2021-06-18 19:21:19 | INFO | train_inner | epoch 001: 228 / 3002 loss=3.469, ppl=11.08, wps=5880.1, ups=0.09, wpb=64877, bsz=128, num_updates=222, lr=2.22e-05, gnorm=2.988, loss_scale=4, train_wall=11, gb_free=2.8, wall=2553 2021-06-18 19:21:30 | INFO | train_inner | epoch 001: 229 / 3002 loss=3.359, ppl=10.26, wps=5990.1, ups=0.09, wpb=64849, bsz=128, num_updates=223, lr=2.23e-05, gnorm=2.982, loss_scale=4, train_wall=10, gb_free=2.8, wall=2564 2021-06-18 19:21:41 | INFO | train_inner | epoch 001: 230 / 3002 loss=3.429, ppl=10.77, wps=5715.6, ups=0.09, wpb=64727, bsz=128, num_updates=224, lr=2.24e-05, gnorm=2.947, loss_scale=4, train_wall=11, gb_free=2.8, wall=2575 2021-06-18 19:21:52 | INFO | train_inner | epoch 001: 231 / 3002 loss=3.252, ppl=9.53, wps=5897.5, ups=0.09, wpb=64851, bsz=128, num_updates=225, lr=2.25e-05, gnorm=2.81, loss_scale=4, train_wall=11, gb_free=2.8, wall=2586 2021-06-18 19:22:03 | INFO | train_inner | epoch 001: 232 / 3002 loss=3.692, ppl=12.92, wps=5750.8, ups=0.09, wpb=64680, bsz=128, num_updates=226, lr=2.26e-05, gnorm=2.957, loss_scale=4, train_wall=11, gb_free=2.8, wall=2597 2021-06-18 19:22:14 | INFO | train_inner | epoch 001: 233 / 3002 loss=3.341, ppl=10.13, wps=5903.4, ups=0.09, wpb=64815, bsz=128, num_updates=227, lr=2.27e-05, gnorm=2.896, loss_scale=4, train_wall=11, gb_free=2.8, wall=2608 2021-06-18 19:22:25 | INFO | train_inner | epoch 001: 234 / 3002 loss=3.277, ppl=9.69, wps=5826.4, ups=0.09, wpb=64887, bsz=128, num_updates=228, lr=2.28e-05, gnorm=3.032, loss_scale=4, train_wall=11, gb_free=2.8, wall=2620 2021-06-18 19:22:36 | INFO | train_inner | epoch 001: 235 / 3002 loss=3.479, ppl=11.15, wps=5845.9, ups=0.09, wpb=64817, bsz=128, num_updates=229, lr=2.29e-05, gnorm=3.12, loss_scale=4, train_wall=11, gb_free=2.8, wall=2631 2021-06-18 19:22:48 | INFO | train_inner | epoch 001: 236 / 3002 loss=3.306, ppl=9.89, wps=5801.8, ups=0.09, wpb=64827, bsz=128, num_updates=230, lr=2.3e-05, gnorm=2.929, loss_scale=4, train_wall=11, gb_free=2.8, wall=2642 2021-06-18 19:22:59 | INFO | train_inner | epoch 001: 237 / 3002 loss=3.467, ppl=11.06, wps=5798.7, ups=0.09, wpb=64740, bsz=128, num_updates=231, lr=2.31e-05, gnorm=3.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=2653 2021-06-18 19:23:10 | INFO | train_inner | epoch 001: 238 / 3002 loss=3.416, ppl=10.67, wps=5843.2, ups=0.09, wpb=64822, bsz=128, num_updates=232, lr=2.32e-05, gnorm=2.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=2664 2021-06-18 19:23:21 | INFO | train_inner | epoch 001: 239 / 3002 loss=3.419, ppl=10.7, wps=5810.9, ups=0.09, wpb=64838, bsz=128, num_updates=233, lr=2.33e-05, gnorm=3, loss_scale=4, train_wall=11, gb_free=2.8, wall=2675 2021-06-18 19:23:32 | INFO | train_inner | epoch 001: 240 / 3002 loss=3.375, ppl=10.37, wps=5817.8, ups=0.09, wpb=64845, bsz=128, num_updates=234, lr=2.34e-05, gnorm=3.042, loss_scale=4, train_wall=11, gb_free=2.8, wall=2686 2021-06-18 19:23:43 | INFO | train_inner | epoch 001: 241 / 3002 loss=3.419, ppl=10.7, wps=5815.9, ups=0.09, wpb=64891, bsz=128, num_updates=235, lr=2.35e-05, gnorm=3.407, loss_scale=4, train_wall=11, gb_free=2.8, wall=2698 2021-06-18 19:23:54 | INFO | train_inner | epoch 001: 242 / 3002 loss=3.355, ppl=10.23, wps=5766, ups=0.09, wpb=64780, bsz=128, num_updates=236, lr=2.36e-05, gnorm=4.515, loss_scale=4, train_wall=11, gb_free=2.8, wall=2709 2021-06-18 19:24:06 | INFO | train_inner | epoch 001: 243 / 3002 loss=3.469, ppl=11.07, wps=5783.3, ups=0.09, wpb=64775, bsz=128, num_updates=237, lr=2.37e-05, gnorm=3.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=2720 2021-06-18 19:24:17 | INFO | train_inner | epoch 001: 244 / 3002 loss=3.474, ppl=11.11, wps=5938.3, ups=0.09, wpb=64818, bsz=128, num_updates=238, lr=2.38e-05, gnorm=3.276, loss_scale=4, train_wall=10, gb_free=2.8, wall=2731 2021-06-18 19:24:27 | INFO | train_inner | epoch 001: 245 / 3002 loss=3.454, ppl=10.96, wps=5969.9, ups=0.09, wpb=64865, bsz=128, num_updates=239, lr=2.39e-05, gnorm=2.836, loss_scale=4, train_wall=10, gb_free=2.8, wall=2742 2021-06-18 19:24:39 | INFO | train_inner | epoch 001: 246 / 3002 loss=3.061, ppl=8.35, wps=5758.1, ups=0.09, wpb=64823, bsz=128, num_updates=240, lr=2.4e-05, gnorm=2.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=2753 2021-06-18 19:24:50 | INFO | train_inner | epoch 001: 247 / 3002 loss=3.247, ppl=9.49, wps=5908, ups=0.09, wpb=64891, bsz=128, num_updates=241, lr=2.41e-05, gnorm=2.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=2764 2021-06-18 19:25:01 | INFO | train_inner | epoch 001: 248 / 3002 loss=3.31, ppl=9.92, wps=5855.8, ups=0.09, wpb=64877, bsz=128, num_updates=242, lr=2.42e-05, gnorm=3.054, loss_scale=4, train_wall=11, gb_free=2.8, wall=2775 2021-06-18 19:25:12 | INFO | train_inner | epoch 001: 249 / 3002 loss=3.446, ppl=10.9, wps=5941.7, ups=0.09, wpb=64922, bsz=128, num_updates=243, lr=2.43e-05, gnorm=2.865, loss_scale=4, train_wall=10, gb_free=2.8, wall=2786 2021-06-18 19:25:23 | INFO | train_inner | epoch 001: 250 / 3002 loss=3.396, ppl=10.52, wps=5726.5, ups=0.09, wpb=64907, bsz=128, num_updates=244, lr=2.44e-05, gnorm=2.944, loss_scale=4, train_wall=11, gb_free=2.8, wall=2797 2021-06-18 19:25:34 | INFO | train_inner | epoch 001: 251 / 3002 loss=3.429, ppl=10.77, wps=5827.2, ups=0.09, wpb=64752, bsz=128, num_updates=245, lr=2.45e-05, gnorm=2.898, loss_scale=4, train_wall=11, gb_free=2.8, wall=2808 2021-06-18 19:25:45 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-18 19:25:56 | INFO | train_inner | epoch 001: 253 / 3002 loss=3.437, ppl=10.83, wps=2953, ups=0.05, wpb=64838, bsz=128, num_updates=246, lr=2.46e-05, gnorm=5.197, loss_scale=2, train_wall=21, gb_free=2.8, wall=2830 2021-06-18 19:26:07 | INFO | train_inner | epoch 001: 254 / 3002 loss=3.352, ppl=10.21, wps=5851.3, ups=0.09, wpb=64807, bsz=128, num_updates=247, lr=2.47e-05, gnorm=2.896, loss_scale=2, train_wall=11, gb_free=2.8, wall=2842 2021-06-18 19:26:18 | INFO | train_inner | epoch 001: 255 / 3002 loss=3.537, ppl=11.61, wps=5865.6, ups=0.09, wpb=64804, bsz=128, num_updates=248, lr=2.48e-05, gnorm=2.942, loss_scale=2, train_wall=11, gb_free=2.8, wall=2853 2021-06-18 19:26:29 | INFO | train_inner | epoch 001: 256 / 3002 loss=3.209, ppl=9.25, wps=5852.5, ups=0.09, wpb=64761, bsz=128, num_updates=249, lr=2.49e-05, gnorm=3.827, loss_scale=2, train_wall=11, gb_free=2.8, wall=2864 2021-06-18 19:26:40 | INFO | train_inner | epoch 001: 257 / 3002 loss=3.41, ppl=10.63, wps=5974.3, ups=0.09, wpb=64776, bsz=128, num_updates=250, lr=2.5e-05, gnorm=15.947, loss_scale=2, train_wall=10, gb_free=2.8, wall=2874 2021-06-18 19:26:51 | INFO | train_inner | epoch 001: 258 / 3002 loss=3.335, ppl=10.09, wps=5879.5, ups=0.09, wpb=64796, bsz=128, num_updates=251, lr=2.51e-05, gnorm=2.979, loss_scale=2, train_wall=11, gb_free=2.8, wall=2885 2021-06-18 19:27:02 | INFO | train_inner | epoch 001: 259 / 3002 loss=3.333, ppl=10.08, wps=5803.6, ups=0.09, wpb=64868, bsz=128, num_updates=252, lr=2.52e-05, gnorm=3.008, loss_scale=2, train_wall=11, gb_free=2.8, wall=2897 2021-06-18 19:27:13 | INFO | train_inner | epoch 001: 260 / 3002 loss=3.361, ppl=10.27, wps=5805.8, ups=0.09, wpb=64754, bsz=128, num_updates=253, lr=2.53e-05, gnorm=3.661, loss_scale=2, train_wall=11, gb_free=2.8, wall=2908 2021-06-18 19:27:25 | INFO | train_inner | epoch 001: 261 / 3002 loss=3.37, ppl=10.34, wps=5766.4, ups=0.09, wpb=64807, bsz=128, num_updates=254, lr=2.54e-05, gnorm=3.504, loss_scale=2, train_wall=11, gb_free=2.8, wall=2919 2021-06-18 19:27:36 | INFO | train_inner | epoch 001: 262 / 3002 loss=3.255, ppl=9.55, wps=5848.2, ups=0.09, wpb=64788, bsz=128, num_updates=255, lr=2.55e-05, gnorm=3.002, loss_scale=2, train_wall=11, gb_free=2.8, wall=2930 2021-06-18 19:27:47 | INFO | train_inner | epoch 001: 263 / 3002 loss=3.538, ppl=11.61, wps=5846.5, ups=0.09, wpb=64892, bsz=128, num_updates=256, lr=2.56e-05, gnorm=3.047, loss_scale=2, train_wall=11, gb_free=2.8, wall=2941 2021-06-18 19:27:58 | INFO | train_inner | epoch 001: 264 / 3002 loss=3.329, ppl=10.05, wps=5858.8, ups=0.09, wpb=64745, bsz=128, num_updates=257, lr=2.57e-05, gnorm=3.02, loss_scale=2, train_wall=11, gb_free=2.8, wall=2952 2021-06-18 19:28:09 | INFO | train_inner | epoch 001: 265 / 3002 loss=3.27, ppl=9.65, wps=5811.4, ups=0.09, wpb=64794, bsz=128, num_updates=258, lr=2.58e-05, gnorm=2.907, loss_scale=2, train_wall=11, gb_free=2.8, wall=2963 2021-06-18 19:28:20 | INFO | train_inner | epoch 001: 266 / 3002 loss=3.25, ppl=9.51, wps=5862.7, ups=0.09, wpb=64925, bsz=128, num_updates=259, lr=2.59e-05, gnorm=2.978, loss_scale=2, train_wall=11, gb_free=2.8, wall=2975 2021-06-18 19:28:31 | INFO | train_inner | epoch 001: 267 / 3002 loss=3.367, ppl=10.32, wps=5917, ups=0.09, wpb=64824, bsz=128, num_updates=260, lr=2.6e-05, gnorm=3.002, loss_scale=2, train_wall=11, gb_free=2.8, wall=2985 2021-06-18 19:28:42 | INFO | train_inner | epoch 001: 268 / 3002 loss=3.292, ppl=9.79, wps=5910.3, ups=0.09, wpb=64846, bsz=128, num_updates=261, lr=2.61e-05, gnorm=2.996, loss_scale=2, train_wall=11, gb_free=2.8, wall=2996 2021-06-18 19:28:53 | INFO | train_inner | epoch 001: 269 / 3002 loss=3.464, ppl=11.04, wps=5927.7, ups=0.09, wpb=64909, bsz=128, num_updates=262, lr=2.62e-05, gnorm=2.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=3007 2021-06-18 19:29:04 | INFO | train_inner | epoch 001: 270 / 3002 loss=3.356, ppl=10.24, wps=5896.6, ups=0.09, wpb=64912, bsz=128, num_updates=263, lr=2.63e-05, gnorm=2.866, loss_scale=2, train_wall=11, gb_free=2.8, wall=3018 2021-06-18 19:29:15 | INFO | train_inner | epoch 001: 271 / 3002 loss=3.327, ppl=10.04, wps=5851.4, ups=0.09, wpb=64872, bsz=128, num_updates=264, lr=2.64e-05, gnorm=5.543, loss_scale=2, train_wall=11, gb_free=2.8, wall=3029 2021-06-18 19:29:26 | INFO | train_inner | epoch 001: 272 / 3002 loss=3.398, ppl=10.54, wps=5801.2, ups=0.09, wpb=64812, bsz=128, num_updates=265, lr=2.65e-05, gnorm=3.957, loss_scale=2, train_wall=11, gb_free=2.8, wall=3041 2021-06-18 19:29:37 | INFO | train_inner | epoch 001: 273 / 3002 loss=3.32, ppl=9.98, wps=5881.6, ups=0.09, wpb=64887, bsz=128, num_updates=266, lr=2.66e-05, gnorm=2.991, loss_scale=2, train_wall=11, gb_free=2.8, wall=3052 2021-06-18 19:29:48 | INFO | train_inner | epoch 001: 274 / 3002 loss=3.464, ppl=11.03, wps=5912.5, ups=0.09, wpb=64879, bsz=128, num_updates=267, lr=2.67e-05, gnorm=3.103, loss_scale=2, train_wall=11, gb_free=2.8, wall=3063 2021-06-18 19:29:59 | INFO | train_inner | epoch 001: 275 / 3002 loss=3.4, ppl=10.56, wps=6027.6, ups=0.09, wpb=64732, bsz=128, num_updates=268, lr=2.68e-05, gnorm=3.232, loss_scale=2, train_wall=10, gb_free=2.8, wall=3073 2021-06-18 19:30:10 | INFO | train_inner | epoch 001: 276 / 3002 loss=3.439, ppl=10.84, wps=5942.4, ups=0.09, wpb=64759, bsz=128, num_updates=269, lr=2.69e-05, gnorm=2.999, loss_scale=2, train_wall=10, gb_free=2.8, wall=3084 2021-06-18 19:30:21 | INFO | train_inner | epoch 001: 277 / 3002 loss=3.262, ppl=9.59, wps=5881.4, ups=0.09, wpb=64848, bsz=128, num_updates=270, lr=2.7e-05, gnorm=2.991, loss_scale=2, train_wall=11, gb_free=2.8, wall=3095 2021-06-18 19:30:32 | INFO | train_inner | epoch 001: 278 / 3002 loss=3.384, ppl=10.44, wps=5936.9, ups=0.09, wpb=64782, bsz=128, num_updates=271, lr=2.71e-05, gnorm=3.044, loss_scale=2, train_wall=10, gb_free=2.8, wall=3106 2021-06-18 19:30:43 | INFO | train_inner | epoch 001: 279 / 3002 loss=3.248, ppl=9.5, wps=5757.5, ups=0.09, wpb=64832, bsz=128, num_updates=272, lr=2.72e-05, gnorm=3.903, loss_scale=2, train_wall=11, gb_free=2.8, wall=3118 2021-06-18 19:30:54 | INFO | train_inner | epoch 001: 280 / 3002 loss=3.354, ppl=10.23, wps=5834.6, ups=0.09, wpb=64859, bsz=128, num_updates=273, lr=2.73e-05, gnorm=3.846, loss_scale=2, train_wall=11, gb_free=2.8, wall=3129 2021-06-18 19:31:06 | INFO | train_inner | epoch 001: 281 / 3002 loss=3.387, ppl=10.46, wps=5767.9, ups=0.09, wpb=64776, bsz=128, num_updates=274, lr=2.74e-05, gnorm=2.903, loss_scale=2, train_wall=11, gb_free=2.8, wall=3140 2021-06-18 19:31:17 | INFO | train_inner | epoch 001: 282 / 3002 loss=3.306, ppl=9.89, wps=5857.9, ups=0.09, wpb=64825, bsz=128, num_updates=275, lr=2.75e-05, gnorm=3.031, loss_scale=2, train_wall=11, gb_free=2.8, wall=3151 2021-06-18 19:31:28 | INFO | train_inner | epoch 001: 283 / 3002 loss=3.253, ppl=9.54, wps=5906.8, ups=0.09, wpb=64792, bsz=128, num_updates=276, lr=2.76e-05, gnorm=3.055, loss_scale=2, train_wall=11, gb_free=2.8, wall=3162 2021-06-18 19:31:39 | INFO | train_inner | epoch 001: 284 / 3002 loss=3.318, ppl=9.98, wps=5862.2, ups=0.09, wpb=64795, bsz=128, num_updates=277, lr=2.77e-05, gnorm=2.993, loss_scale=2, train_wall=11, gb_free=2.8, wall=3173 2021-06-18 19:31:50 | INFO | train_inner | epoch 001: 285 / 3002 loss=3.28, ppl=9.72, wps=5871.7, ups=0.09, wpb=64857, bsz=128, num_updates=278, lr=2.78e-05, gnorm=2.874, loss_scale=2, train_wall=11, gb_free=2.8, wall=3184 2021-06-18 19:32:01 | INFO | train_inner | epoch 001: 286 / 3002 loss=3.541, ppl=11.64, wps=5821.3, ups=0.09, wpb=64766, bsz=128, num_updates=279, lr=2.79e-05, gnorm=3.112, loss_scale=2, train_wall=11, gb_free=2.8, wall=3195 2021-06-18 19:32:12 | INFO | train_inner | epoch 001: 287 / 3002 loss=3.362, ppl=10.28, wps=5859.4, ups=0.09, wpb=64819, bsz=128, num_updates=280, lr=2.8e-05, gnorm=3.014, loss_scale=2, train_wall=11, gb_free=2.8, wall=3206 2021-06-18 19:32:23 | INFO | train_inner | epoch 001: 288 / 3002 loss=3.312, ppl=9.93, wps=5896.4, ups=0.09, wpb=64843, bsz=128, num_updates=281, lr=2.81e-05, gnorm=3.121, loss_scale=2, train_wall=11, gb_free=2.8, wall=3217 2021-06-18 19:32:34 | INFO | train_inner | epoch 001: 289 / 3002 loss=3.489, ppl=11.23, wps=5877.5, ups=0.09, wpb=64860, bsz=128, num_updates=282, lr=2.82e-05, gnorm=3.063, loss_scale=2, train_wall=11, gb_free=2.8, wall=3228 2021-06-18 19:32:45 | INFO | train_inner | epoch 001: 290 / 3002 loss=3.255, ppl=9.55, wps=5897, ups=0.09, wpb=64841, bsz=128, num_updates=283, lr=2.83e-05, gnorm=3.062, loss_scale=2, train_wall=11, gb_free=2.8, wall=3239 2021-06-18 19:32:56 | INFO | train_inner | epoch 001: 291 / 3002 loss=3.447, ppl=10.91, wps=5778.8, ups=0.09, wpb=64849, bsz=128, num_updates=284, lr=2.84e-05, gnorm=2.938, loss_scale=2, train_wall=11, gb_free=2.8, wall=3250 2021-06-18 19:33:07 | INFO | train_inner | epoch 001: 292 / 3002 loss=3.11, ppl=8.63, wps=5786, ups=0.09, wpb=64834, bsz=128, num_updates=285, lr=2.85e-05, gnorm=2.874, loss_scale=2, train_wall=11, gb_free=2.8, wall=3262 2021-06-18 19:33:18 | INFO | train_inner | epoch 001: 293 / 3002 loss=3.21, ppl=9.25, wps=5899.3, ups=0.09, wpb=64857, bsz=128, num_updates=286, lr=2.86e-05, gnorm=2.886, loss_scale=2, train_wall=11, gb_free=2.8, wall=3273 2021-06-18 19:33:29 | INFO | train_inner | epoch 001: 294 / 3002 loss=3.421, ppl=10.71, wps=5843.8, ups=0.09, wpb=64835, bsz=128, num_updates=287, lr=2.87e-05, gnorm=3.044, loss_scale=2, train_wall=11, gb_free=2.8, wall=3284 2021-06-18 19:33:41 | INFO | train_inner | epoch 001: 295 / 3002 loss=3.382, ppl=10.43, wps=5761.9, ups=0.09, wpb=64796, bsz=128, num_updates=288, lr=2.88e-05, gnorm=3.081, loss_scale=2, train_wall=11, gb_free=2.8, wall=3295 2021-06-18 19:33:52 | INFO | train_inner | epoch 001: 296 / 3002 loss=3.344, ppl=10.15, wps=5824.4, ups=0.09, wpb=64775, bsz=128, num_updates=289, lr=2.89e-05, gnorm=2.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=3306 2021-06-18 19:34:03 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 2021-06-18 19:34:14 | INFO | train_inner | epoch 001: 298 / 3002 loss=3.373, ppl=10.36, wps=2914.8, ups=0.04, wpb=64780, bsz=128, num_updates=290, lr=2.9e-05, gnorm=4.879, loss_scale=1, train_wall=21, gb_free=2.8, wall=3328 2021-06-18 19:34:25 | INFO | train_inner | epoch 001: 299 / 3002 loss=3.356, ppl=10.24, wps=5848.8, ups=0.09, wpb=64794, bsz=128, num_updates=291, lr=2.91e-05, gnorm=10.523, loss_scale=1, train_wall=11, gb_free=2.8, wall=3339 2021-06-18 19:34:36 | INFO | train_inner | epoch 001: 300 / 3002 loss=3.312, ppl=9.93, wps=5759.6, ups=0.09, wpb=64845, bsz=128, num_updates=292, lr=2.92e-05, gnorm=12.127, loss_scale=1, train_wall=11, gb_free=2.8, wall=3351 2021-06-18 19:34:47 | INFO | train_inner | epoch 001: 301 / 3002 loss=3.187, ppl=9.11, wps=5842.6, ups=0.09, wpb=64828, bsz=128, num_updates=293, lr=2.93e-05, gnorm=3.189, loss_scale=1, train_wall=11, gb_free=2.8, wall=3362 2021-06-18 19:34:58 | INFO | train_inner | epoch 001: 302 / 3002 loss=3.282, ppl=9.73, wps=5902.1, ups=0.09, wpb=64773, bsz=128, num_updates=294, lr=2.94e-05, gnorm=3.438, loss_scale=1, train_wall=11, gb_free=2.8, wall=3373 2021-06-18 19:35:09 | INFO | train_inner | epoch 001: 303 / 3002 loss=3.269, ppl=9.64, wps=5926, ups=0.09, wpb=64862, bsz=128, num_updates=295, lr=2.95e-05, gnorm=2.786, loss_scale=1, train_wall=10, gb_free=2.8, wall=3384 2021-06-18 19:35:20 | INFO | train_inner | epoch 001: 304 / 3002 loss=3.387, ppl=10.46, wps=5878.1, ups=0.09, wpb=64848, bsz=128, num_updates=296, lr=2.96e-05, gnorm=2.936, loss_scale=1, train_wall=11, gb_free=2.8, wall=3395 2021-06-18 19:35:31 | INFO | train_inner | epoch 001: 305 / 3002 loss=3.602, ppl=12.15, wps=5876.2, ups=0.09, wpb=64846, bsz=128, num_updates=297, lr=2.97e-05, gnorm=3.042, loss_scale=1, train_wall=11, gb_free=2.8, wall=3406 2021-06-18 19:35:43 | INFO | train_inner | epoch 001: 306 / 3002 loss=3.295, ppl=9.81, wps=5799.1, ups=0.09, wpb=64834, bsz=128, num_updates=298, lr=2.98e-05, gnorm=3.04, loss_scale=1, train_wall=11, gb_free=2.8, wall=3417 2021-06-18 19:35:54 | INFO | train_inner | epoch 001: 307 / 3002 loss=3.356, ppl=10.24, wps=5813.5, ups=0.09, wpb=64874, bsz=128, num_updates=299, lr=2.99e-05, gnorm=2.991, loss_scale=1, train_wall=11, gb_free=2.8, wall=3428 2021-06-18 19:36:05 | INFO | train_inner | epoch 001: 308 / 3002 loss=3.378, ppl=10.4, wps=5848.9, ups=0.09, wpb=64836, bsz=128, num_updates=300, lr=3e-05, gnorm=2.82, loss_scale=1, train_wall=11, gb_free=2.8, wall=3439 2021-06-18 19:36:16 | INFO | train_inner | epoch 001: 309 / 3002 loss=3.34, ppl=10.12, wps=5932.5, ups=0.09, wpb=64893, bsz=128, num_updates=301, lr=3.01e-05, gnorm=3.001, loss_scale=1, train_wall=10, gb_free=2.8, wall=3450 2021-06-18 19:36:27 | INFO | train_inner | epoch 001: 310 / 3002 loss=3.227, ppl=9.37, wps=5827.5, ups=0.09, wpb=64853, bsz=128, num_updates=302, lr=3.02e-05, gnorm=2.987, loss_scale=1, train_wall=11, gb_free=2.8, wall=3461 2021-06-18 19:36:38 | INFO | train_inner | epoch 001: 311 / 3002 loss=3.311, ppl=9.93, wps=5902.6, ups=0.09, wpb=64786, bsz=128, num_updates=303, lr=3.03e-05, gnorm=3.005, loss_scale=1, train_wall=11, gb_free=2.8, wall=3472 2021-06-18 19:36:49 | INFO | train_inner | epoch 001: 312 / 3002 loss=3.245, ppl=9.48, wps=5913.2, ups=0.09, wpb=64880, bsz=128, num_updates=304, lr=3.04e-05, gnorm=2.95, loss_scale=1, train_wall=10, gb_free=2.8, wall=3483 2021-06-18 19:37:00 | INFO | train_inner | epoch 001: 313 / 3002 loss=3.301, ppl=9.86, wps=5816, ups=0.09, wpb=64773, bsz=128, num_updates=305, lr=3.05e-05, gnorm=2.867, loss_scale=1, train_wall=11, gb_free=2.8, wall=3494 2021-06-18 19:37:11 | INFO | train_inner | epoch 001: 314 / 3002 loss=3.345, ppl=10.16, wps=5922.4, ups=0.09, wpb=64874, bsz=128, num_updates=306, lr=3.06e-05, gnorm=2.909, loss_scale=1, train_wall=11, gb_free=2.8, wall=3505 2021-06-18 19:37:22 | INFO | train_inner | epoch 001: 315 / 3002 loss=3.219, ppl=9.31, wps=5905.9, ups=0.09, wpb=64838, bsz=128, num_updates=307, lr=3.07e-05, gnorm=2.935, loss_scale=1, train_wall=11, gb_free=2.8, wall=3516 2021-06-18 19:37:33 | INFO | train_inner | epoch 001: 316 / 3002 loss=3.443, ppl=10.87, wps=5838.9, ups=0.09, wpb=64803, bsz=128, num_updates=308, lr=3.08e-05, gnorm=2.843, loss_scale=1, train_wall=11, gb_free=2.8, wall=3527 2021-06-18 19:37:44 | INFO | train_inner | epoch 001: 317 / 3002 loss=3.431, ppl=10.79, wps=6022.8, ups=0.09, wpb=64864, bsz=128, num_updates=309, lr=3.09e-05, gnorm=2.947, loss_scale=1, train_wall=10, gb_free=2.8, wall=3538 2021-06-18 19:37:55 | INFO | train_inner | epoch 001: 318 / 3002 loss=3.317, ppl=9.96, wps=5765.3, ups=0.09, wpb=64851, bsz=128, num_updates=310, lr=3.1e-05, gnorm=3.02, loss_scale=1, train_wall=11, gb_free=2.8, wall=3549 2021-06-18 19:38:06 | INFO | train_inner | epoch 001: 319 / 3002 loss=3.268, ppl=9.63, wps=5852.4, ups=0.09, wpb=64830, bsz=128, num_updates=311, lr=3.11e-05, gnorm=2.969, loss_scale=1, train_wall=11, gb_free=2.8, wall=3560 2021-06-18 19:38:17 | INFO | train_inner | epoch 001: 320 / 3002 loss=3.16, ppl=8.94, wps=5849, ups=0.09, wpb=64848, bsz=128, num_updates=312, lr=3.12e-05, gnorm=2.788, loss_scale=1, train_wall=11, gb_free=2.8, wall=3572 2021-06-18 19:38:28 | INFO | train_inner | epoch 001: 321 / 3002 loss=3.216, ppl=9.29, wps=5840.5, ups=0.09, wpb=64795, bsz=128, num_updates=313, lr=3.13e-05, gnorm=2.889, loss_scale=1, train_wall=11, gb_free=2.8, wall=3583 2021-06-18 19:38:39 | INFO | train_inner | epoch 001: 322 / 3002 loss=3.474, ppl=11.11, wps=6006.8, ups=0.09, wpb=64869, bsz=128, num_updates=314, lr=3.14e-05, gnorm=2.923, loss_scale=1, train_wall=10, gb_free=2.8, wall=3593 2021-06-18 19:38:50 | INFO | train_inner | epoch 001: 323 / 3002 loss=3.361, ppl=10.27, wps=5874.5, ups=0.09, wpb=64844, bsz=128, num_updates=315, lr=3.15e-05, gnorm=3.07, loss_scale=1, train_wall=11, gb_free=2.8, wall=3604 2021-06-18 19:39:01 | INFO | train_inner | epoch 001: 324 / 3002 loss=3.439, ppl=10.85, wps=5914.1, ups=0.09, wpb=64837, bsz=128, num_updates=316, lr=3.16e-05, gnorm=3.163, loss_scale=1, train_wall=11, gb_free=2.8, wall=3615 2021-06-18 19:39:12 | INFO | train_inner | epoch 001: 325 / 3002 loss=3.335, ppl=10.09, wps=5808.9, ups=0.09, wpb=64896, bsz=128, num_updates=317, lr=3.17e-05, gnorm=3.182, loss_scale=1, train_wall=11, gb_free=2.8, wall=3627 2021-06-18 19:39:23 | INFO | train_inner | epoch 001: 326 / 3002 loss=3.479, ppl=11.15, wps=5817, ups=0.09, wpb=64851, bsz=128, num_updates=318, lr=3.18e-05, gnorm=2.893, loss_scale=1, train_wall=11, gb_free=2.8, wall=3638 2021-06-18 19:39:34 | INFO | train_inner | epoch 001: 327 / 3002 loss=3.264, ppl=9.61, wps=5872.4, ups=0.09, wpb=64871, bsz=128, num_updates=319, lr=3.19e-05, gnorm=3.16, loss_scale=1, train_wall=11, gb_free=2.8, wall=3649 2021-06-18 19:39:45 | INFO | train_inner | epoch 001: 328 / 3002 loss=3.298, ppl=9.83, wps=5923.4, ups=0.09, wpb=64899, bsz=128, num_updates=320, lr=3.2e-05, gnorm=3.117, loss_scale=1, train_wall=10, gb_free=2.8, wall=3660 2021-06-18 19:39:56 | INFO | train_inner | epoch 001: 329 / 3002 loss=3.441, ppl=10.86, wps=5864, ups=0.09, wpb=64889, bsz=128, num_updates=321, lr=3.21e-05, gnorm=4.425, loss_scale=1, train_wall=11, gb_free=2.8, wall=3671 2021-06-18 19:40:07 | INFO | train_inner | epoch 001: 330 / 3002 loss=3.36, ppl=10.27, wps=5890.7, ups=0.09, wpb=64872, bsz=128, num_updates=322, lr=3.22e-05, gnorm=2.909, loss_scale=1, train_wall=11, gb_free=2.8, wall=3682 2021-06-18 19:40:19 | INFO | train_inner | epoch 001: 331 / 3002 loss=3.346, ppl=10.17, wps=5828.2, ups=0.09, wpb=64843, bsz=128, num_updates=323, lr=3.23e-05, gnorm=3.209, loss_scale=1, train_wall=11, gb_free=2.8, wall=3693 2021-06-18 19:40:30 | INFO | train_inner | epoch 001: 332 / 3002 loss=3.253, ppl=9.54, wps=5829.7, ups=0.09, wpb=64846, bsz=128, num_updates=324, lr=3.24e-05, gnorm=3.185, loss_scale=1, train_wall=11, gb_free=2.8, wall=3704 2021-06-18 19:40:41 | INFO | train_inner | epoch 001: 333 / 3002 loss=3.275, ppl=9.68, wps=5772, ups=0.09, wpb=64832, bsz=128, num_updates=325, lr=3.25e-05, gnorm=2.937, loss_scale=1, train_wall=11, gb_free=2.8, wall=3715 2021-06-18 19:40:52 | INFO | train_inner | epoch 001: 334 / 3002 loss=3.25, ppl=9.51, wps=5780.3, ups=0.09, wpb=64833, bsz=128, num_updates=326, lr=3.26e-05, gnorm=3.496, loss_scale=1, train_wall=11, gb_free=2.8, wall=3727 2021-06-18 19:41:03 | INFO | train_inner | epoch 001: 335 / 3002 loss=3.37, ppl=10.34, wps=5856.9, ups=0.09, wpb=64846, bsz=128, num_updates=327, lr=3.27e-05, gnorm=3.112, loss_scale=1, train_wall=11, gb_free=2.8, wall=3738 2021-06-18 19:41:14 | INFO | train_inner | epoch 001: 336 / 3002 loss=3.32, ppl=9.98, wps=5793.4, ups=0.09, wpb=64751, bsz=128, num_updates=328, lr=3.28e-05, gnorm=3, loss_scale=1, train_wall=11, gb_free=2.8, wall=3749 2021-06-18 19:41:26 | INFO | train_inner | epoch 001: 337 / 3002 loss=3.157, ppl=8.92, wps=5787.6, ups=0.09, wpb=64828, bsz=128, num_updates=329, lr=3.29e-05, gnorm=3.084, loss_scale=1, train_wall=11, gb_free=2.8, wall=3760 2021-06-18 19:41:37 | INFO | train_inner | epoch 001: 338 / 3002 loss=3.336, ppl=10.1, wps=5847.6, ups=0.09, wpb=64828, bsz=128, num_updates=330, lr=3.3e-05, gnorm=4.822, loss_scale=1, train_wall=11, gb_free=2.8, wall=3771 2021-06-18 19:41:48 | INFO | train_inner | epoch 001: 339 / 3002 loss=3.246, ppl=9.49, wps=5753.9, ups=0.09, wpb=64826, bsz=128, num_updates=331, lr=3.31e-05, gnorm=3.174, loss_scale=1, train_wall=11, gb_free=2.8, wall=3782 2021-06-18 19:41:59 | INFO | train_inner | epoch 001: 340 / 3002 loss=3.175, ppl=9.03, wps=5922.8, ups=0.09, wpb=64825, bsz=128, num_updates=332, lr=3.32e-05, gnorm=2.868, loss_scale=1, train_wall=10, gb_free=2.8, wall=3793 2021-06-18 19:42:10 | INFO | train_inner | epoch 001: 341 / 3002 loss=3.175, ppl=9.03, wps=5921.1, ups=0.09, wpb=64857, bsz=128, num_updates=333, lr=3.33e-05, gnorm=3.288, loss_scale=1, train_wall=11, gb_free=2.8, wall=3804 2021-06-18 19:42:21 | INFO | train_inner | epoch 001: 342 / 3002 loss=3.196, ppl=9.17, wps=5941.9, ups=0.09, wpb=64829, bsz=128, num_updates=334, lr=3.34e-05, gnorm=3.071, loss_scale=1, train_wall=10, gb_free=2.8, wall=3815 2021-06-18 19:42:32 | INFO | train_inner | epoch 001: 343 / 3002 loss=3.307, ppl=9.9, wps=5775.3, ups=0.09, wpb=64839, bsz=128, num_updates=335, lr=3.35e-05, gnorm=11.528, loss_scale=1, train_wall=11, gb_free=2.8, wall=3826 2021-06-18 19:42:43 | INFO | train_inner | epoch 001: 344 / 3002 loss=3.433, ppl=10.8, wps=5986.9, ups=0.09, wpb=64873, bsz=128, num_updates=336, lr=3.36e-05, gnorm=2.967, loss_scale=1, train_wall=10, gb_free=2.8, wall=3837 2021-06-18 19:42:54 | INFO | train_inner | epoch 001: 345 / 3002 loss=3.352, ppl=10.21, wps=5768.5, ups=0.09, wpb=64864, bsz=128, num_updates=337, lr=3.37e-05, gnorm=3.198, loss_scale=1, train_wall=11, gb_free=2.8, wall=3848 2021-06-18 19:43:05 | INFO | train_inner | epoch 001: 346 / 3002 loss=3.354, ppl=10.23, wps=5823, ups=0.09, wpb=64785, bsz=128, num_updates=338, lr=3.38e-05, gnorm=3.173, loss_scale=1, train_wall=11, gb_free=2.8, wall=3860 2021-06-18 19:43:17 | INFO | train_inner | epoch 001: 347 / 3002 loss=3.23, ppl=9.38, wps=5735.2, ups=0.09, wpb=64880, bsz=128, num_updates=339, lr=3.39e-05, gnorm=2.94, loss_scale=1, train_wall=11, gb_free=2.8, wall=3871 2021-06-18 19:43:28 | INFO | train_inner | epoch 001: 348 / 3002 loss=3.242, ppl=9.46, wps=5855.6, ups=0.09, wpb=64834, bsz=128, num_updates=340, lr=3.4e-05, gnorm=3.228, loss_scale=1, train_wall=11, gb_free=2.8, wall=3882 2021-06-18 19:43:39 | INFO | train_inner | epoch 001: 349 / 3002 loss=3.331, ppl=10.06, wps=5803.7, ups=0.09, wpb=64787, bsz=128, num_updates=341, lr=3.41e-05, gnorm=2.794, loss_scale=1, train_wall=11, gb_free=2.8, wall=3893 2021-06-18 19:43:50 | INFO | train_inner | epoch 001: 350 / 3002 loss=3.371, ppl=10.35, wps=5907.9, ups=0.09, wpb=64786, bsz=128, num_updates=342, lr=3.42e-05, gnorm=3.007, loss_scale=1, train_wall=11, gb_free=2.8, wall=3904 2021-06-18 19:44:01 | INFO | train_inner | epoch 001: 351 / 3002 loss=3.155, ppl=8.91, wps=5788.2, ups=0.09, wpb=64811, bsz=128, num_updates=343, lr=3.43e-05, gnorm=2.887, loss_scale=1, train_wall=11, gb_free=2.8, wall=3915 2021-06-18 19:44:12 | INFO | train_inner | epoch 001: 352 / 3002 loss=3.103, ppl=8.59, wps=6073.2, ups=0.09, wpb=64887, bsz=128, num_updates=344, lr=3.44e-05, gnorm=2.917, loss_scale=1, train_wall=10, gb_free=2.8, wall=3926 2021-06-18 19:44:23 | INFO | train_inner | epoch 001: 353 / 3002 loss=3.388, ppl=10.47, wps=5927, ups=0.09, wpb=64877, bsz=128, num_updates=345, lr=3.45e-05, gnorm=3.169, loss_scale=1, train_wall=10, gb_free=2.8, wall=3937 2021-06-18 19:44:34 | INFO | train_inner | epoch 001: 354 / 3002 loss=3.254, ppl=9.54, wps=5891.9, ups=0.09, wpb=64843, bsz=128, num_updates=346, lr=3.46e-05, gnorm=3.303, loss_scale=1, train_wall=11, gb_free=2.8, wall=3948 2021-06-18 19:44:45 | INFO | train_inner | epoch 001: 355 / 3002 loss=3.348, ppl=10.18, wps=5917.7, ups=0.09, wpb=64838, bsz=128, num_updates=347, lr=3.47e-05, gnorm=3.182, loss_scale=1, train_wall=11, gb_free=2.8, wall=3959 2021-06-18 19:44:56 | INFO | train_inner | epoch 001: 356 / 3002 loss=3.32, ppl=9.99, wps=5762.9, ups=0.09, wpb=64773, bsz=128, num_updates=348, lr=3.48e-05, gnorm=2.985, loss_scale=1, train_wall=11, gb_free=2.8, wall=3970 2021-06-18 19:45:07 | INFO | train_inner | epoch 001: 357 / 3002 loss=3.191, ppl=9.13, wps=5773.7, ups=0.09, wpb=64866, bsz=128, num_updates=349, lr=3.49e-05, gnorm=2.971, loss_scale=1, train_wall=11, gb_free=2.8, wall=3981 2021-06-18 19:45:18 | INFO | train_inner | epoch 001: 358 / 3002 loss=3.165, ppl=8.97, wps=5768.1, ups=0.09, wpb=64843, bsz=128, num_updates=350, lr=3.5e-05, gnorm=2.83, loss_scale=1, train_wall=11, gb_free=2.8, wall=3993 2021-06-18 19:45:29 | INFO | train_inner | epoch 001: 359 / 3002 loss=3.231, ppl=9.39, wps=5788, ups=0.09, wpb=64782, bsz=128, num_updates=351, lr=3.51e-05, gnorm=3.038, loss_scale=1, train_wall=11, gb_free=2.8, wall=4004 2021-06-18 19:45:41 | INFO | train_inner | epoch 001: 360 / 3002 loss=3.077, ppl=8.44, wps=5819, ups=0.09, wpb=64846, bsz=128, num_updates=352, lr=3.52e-05, gnorm=2.913, loss_scale=1, train_wall=11, gb_free=2.8, wall=4015 2021-06-18 19:45:52 | INFO | train_inner | epoch 001: 361 / 3002 loss=3.392, ppl=10.5, wps=5806.4, ups=0.09, wpb=64824, bsz=128, num_updates=353, lr=3.53e-05, gnorm=3.132, loss_scale=1, train_wall=11, gb_free=2.8, wall=4026 2021-06-18 19:46:03 | INFO | train_inner | epoch 001: 362 / 3002 loss=3.575, ppl=11.91, wps=5768.4, ups=0.09, wpb=64721, bsz=128, num_updates=354, lr=3.54e-05, gnorm=2.946, loss_scale=1, train_wall=11, gb_free=2.8, wall=4037 2021-06-18 19:46:14 | INFO | train_inner | epoch 001: 363 / 3002 loss=3.4, ppl=10.55, wps=5855.7, ups=0.09, wpb=64849, bsz=128, num_updates=355, lr=3.55e-05, gnorm=3.204, loss_scale=1, train_wall=11, gb_free=2.8, wall=4048 2021-06-18 19:46:25 | INFO | train_inner | epoch 001: 364 / 3002 loss=3.254, ppl=9.54, wps=5905, ups=0.09, wpb=64834, bsz=128, num_updates=356, lr=3.56e-05, gnorm=3.187, loss_scale=1, train_wall=11, gb_free=2.8, wall=4059 2021-06-18 19:46:36 | INFO | train_inner | epoch 001: 365 / 3002 loss=3.356, ppl=10.24, wps=5826.5, ups=0.09, wpb=64845, bsz=128, num_updates=357, lr=3.57e-05, gnorm=3.292, loss_scale=1, train_wall=11, gb_free=2.8, wall=4071 2021-06-18 19:46:47 | INFO | train_inner | epoch 001: 366 / 3002 loss=3.261, ppl=9.58, wps=5924.6, ups=0.09, wpb=64788, bsz=128, num_updates=358, lr=3.58e-05, gnorm=2.876, loss_scale=1, train_wall=10, gb_free=2.8, wall=4081 2021-06-18 19:46:58 | INFO | train_inner | epoch 001: 367 / 3002 loss=3.158, ppl=8.93, wps=5892.5, ups=0.09, wpb=64837, bsz=128, num_updates=359, lr=3.59e-05, gnorm=3.195, loss_scale=1, train_wall=11, gb_free=2.8, wall=4092 2021-06-18 19:47:09 | INFO | train_inner | epoch 001: 368 / 3002 loss=3.359, ppl=10.26, wps=5921.1, ups=0.09, wpb=64822, bsz=128, num_updates=360, lr=3.6e-05, gnorm=3.045, loss_scale=1, train_wall=10, gb_free=2.8, wall=4103 2021-06-18 19:47:20 | INFO | train_inner | epoch 001: 369 / 3002 loss=3.354, ppl=10.22, wps=5844.6, ups=0.09, wpb=64867, bsz=128, num_updates=361, lr=3.61e-05, gnorm=3.082, loss_scale=1, train_wall=11, gb_free=2.8, wall=4114 2021-06-18 19:47:31 | INFO | train_inner | epoch 001: 370 / 3002 loss=3.391, ppl=10.49, wps=5921.5, ups=0.09, wpb=64884, bsz=128, num_updates=362, lr=3.62e-05, gnorm=2.954, loss_scale=1, train_wall=10, gb_free=2.8, wall=4125 2021-06-18 19:47:42 | INFO | train_inner | epoch 001: 371 / 3002 loss=3.385, ppl=10.44, wps=5743.6, ups=0.09, wpb=64859, bsz=128, num_updates=363, lr=3.63e-05, gnorm=2.94, loss_scale=1, train_wall=11, gb_free=2.8, wall=4137 2021-06-18 19:47:54 | INFO | train_inner | epoch 001: 372 / 3002 loss=3.258, ppl=9.57, wps=5763.3, ups=0.09, wpb=64828, bsz=128, num_updates=364, lr=3.64e-05, gnorm=2.765, loss_scale=1, train_wall=11, gb_free=2.8, wall=4148 2021-06-18 19:48:05 | INFO | train_inner | epoch 001: 373 / 3002 loss=3.284, ppl=9.74, wps=5764.4, ups=0.09, wpb=64718, bsz=128, num_updates=365, lr=3.65e-05, gnorm=2.945, loss_scale=1, train_wall=11, gb_free=2.8, wall=4159 2021-06-18 19:48:16 | INFO | train_inner | epoch 001: 374 / 3002 loss=3.268, ppl=9.63, wps=5778.3, ups=0.09, wpb=64867, bsz=128, num_updates=366, lr=3.66e-05, gnorm=3.403, loss_scale=1, train_wall=11, gb_free=2.8, wall=4170 2021-06-18 19:48:27 | INFO | train_inner | epoch 001: 375 / 3002 loss=3.248, ppl=9.5, wps=5974.1, ups=0.09, wpb=64765, bsz=128, num_updates=367, lr=3.67e-05, gnorm=2.942, loss_scale=1, train_wall=10, gb_free=2.8, wall=4181 2021-06-18 19:48:38 | INFO | train_inner | epoch 001: 376 / 3002 loss=3.012, ppl=8.07, wps=5887.7, ups=0.09, wpb=64873, bsz=128, num_updates=368, lr=3.68e-05, gnorm=2.971, loss_scale=1, train_wall=11, gb_free=2.8, wall=4192 2021-06-18 19:48:49 | INFO | train_inner | epoch 001: 377 / 3002 loss=3.247, ppl=9.5, wps=5806.3, ups=0.09, wpb=64841, bsz=128, num_updates=369, lr=3.69e-05, gnorm=2.824, loss_scale=1, train_wall=11, gb_free=2.8, wall=4203 2021-06-18 19:49:00 | INFO | train_inner | epoch 001: 378 / 3002 loss=3.242, ppl=9.46, wps=5839.4, ups=0.09, wpb=64828, bsz=128, num_updates=370, lr=3.7e-05, gnorm=3.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=4215 2021-06-18 19:49:11 | INFO | train_inner | epoch 001: 379 / 3002 loss=3.162, ppl=8.95, wps=5829.1, ups=0.09, wpb=64904, bsz=128, num_updates=371, lr=3.71e-05, gnorm=3.21, loss_scale=1, train_wall=11, gb_free=2.8, wall=4226 2021-06-18 19:49:23 | INFO | train_inner | epoch 001: 380 / 3002 loss=3.164, ppl=8.96, wps=5792.4, ups=0.09, wpb=64914, bsz=128, num_updates=372, lr=3.72e-05, gnorm=3.167, loss_scale=1, train_wall=11, gb_free=2.8, wall=4237 2021-06-18 19:49:34 | INFO | train_inner | epoch 001: 381 / 3002 loss=3.29, ppl=9.78, wps=5794.4, ups=0.09, wpb=64854, bsz=128, num_updates=373, lr=3.73e-05, gnorm=3.208, loss_scale=1, train_wall=11, gb_free=2.8, wall=4248 2021-06-18 19:49:45 | INFO | train_inner | epoch 001: 382 / 3002 loss=3.308, ppl=9.91, wps=5905.4, ups=0.09, wpb=64805, bsz=128, num_updates=374, lr=3.74e-05, gnorm=2.908, loss_scale=1, train_wall=11, gb_free=2.8, wall=4259 2021-06-18 19:49:56 | INFO | train_inner | epoch 001: 383 / 3002 loss=3.119, ppl=8.69, wps=5832.7, ups=0.09, wpb=64894, bsz=128, num_updates=375, lr=3.75e-05, gnorm=3.05, loss_scale=1, train_wall=11, gb_free=2.8, wall=4270 2021-06-18 19:50:07 | INFO | train_inner | epoch 001: 384 / 3002 loss=3.362, ppl=10.28, wps=5891.4, ups=0.09, wpb=64800, bsz=128, num_updates=376, lr=3.76e-05, gnorm=2.807, loss_scale=1, train_wall=11, gb_free=2.8, wall=4281 2021-06-18 19:50:18 | INFO | train_inner | epoch 001: 385 / 3002 loss=3.302, ppl=9.86, wps=5746.7, ups=0.09, wpb=64794, bsz=128, num_updates=377, lr=3.77e-05, gnorm=3.191, loss_scale=1, train_wall=11, gb_free=2.8, wall=4292 2021-06-18 19:50:29 | INFO | train_inner | epoch 001: 386 / 3002 loss=3.358, ppl=10.26, wps=5876.4, ups=0.09, wpb=64809, bsz=128, num_updates=378, lr=3.78e-05, gnorm=2.964, loss_scale=1, train_wall=11, gb_free=2.8, wall=4304 2021-06-18 19:50:40 | INFO | train_inner | epoch 001: 387 / 3002 loss=3.355, ppl=10.23, wps=5812.1, ups=0.09, wpb=64827, bsz=128, num_updates=379, lr=3.79e-05, gnorm=2.952, loss_scale=1, train_wall=11, gb_free=2.8, wall=4315 2021-06-18 19:50:51 | INFO | train_inner | epoch 001: 388 / 3002 loss=3.325, ppl=10.02, wps=5968.7, ups=0.09, wpb=64897, bsz=128, num_updates=380, lr=3.8e-05, gnorm=3.124, loss_scale=1, train_wall=10, gb_free=2.8, wall=4326 2021-06-18 19:51:02 | INFO | train_inner | epoch 001: 389 / 3002 loss=3.366, ppl=10.31, wps=5820.3, ups=0.09, wpb=64887, bsz=128, num_updates=381, lr=3.81e-05, gnorm=2.979, loss_scale=1, train_wall=11, gb_free=2.8, wall=4337 2021-06-18 19:51:14 | INFO | train_inner | epoch 001: 390 / 3002 loss=3.244, ppl=9.48, wps=5752.4, ups=0.09, wpb=64746, bsz=128, num_updates=382, lr=3.82e-05, gnorm=3.031, loss_scale=1, train_wall=11, gb_free=2.8, wall=4348 2021-06-18 19:51:25 | INFO | train_inner | epoch 001: 391 / 3002 loss=3.346, ppl=10.17, wps=5872.8, ups=0.09, wpb=64805, bsz=128, num_updates=383, lr=3.83e-05, gnorm=2.985, loss_scale=1, train_wall=11, gb_free=2.8, wall=4359 2021-06-18 19:51:36 | INFO | train_inner | epoch 001: 392 / 3002 loss=3.11, ppl=8.63, wps=5897.1, ups=0.09, wpb=64792, bsz=128, num_updates=384, lr=3.84e-05, gnorm=2.911, loss_scale=1, train_wall=11, gb_free=2.8, wall=4370 2021-06-18 19:51:47 | INFO | train_inner | epoch 001: 393 / 3002 loss=3.24, ppl=9.45, wps=5857.4, ups=0.09, wpb=64803, bsz=128, num_updates=385, lr=3.85e-05, gnorm=2.884, loss_scale=1, train_wall=11, gb_free=2.8, wall=4381 2021-06-18 19:51:58 | INFO | train_inner | epoch 001: 394 / 3002 loss=3.137, ppl=8.8, wps=5880.6, ups=0.09, wpb=64844, bsz=128, num_updates=386, lr=3.86e-05, gnorm=3.076, loss_scale=1, train_wall=11, gb_free=2.8, wall=4392 2021-06-18 19:52:09 | INFO | train_inner | epoch 001: 395 / 3002 loss=3.325, ppl=10.02, wps=5783.8, ups=0.09, wpb=64874, bsz=128, num_updates=387, lr=3.87e-05, gnorm=3.036, loss_scale=1, train_wall=11, gb_free=2.8, wall=4403 2021-06-18 19:52:20 | INFO | train_inner | epoch 001: 396 / 3002 loss=3.226, ppl=9.36, wps=5801.8, ups=0.09, wpb=64901, bsz=128, num_updates=388, lr=3.88e-05, gnorm=2.966, loss_scale=1, train_wall=11, gb_free=2.8, wall=4414 2021-06-18 19:52:31 | INFO | train_inner | epoch 001: 397 / 3002 loss=3.101, ppl=8.58, wps=5872.5, ups=0.09, wpb=64802, bsz=128, num_updates=389, lr=3.89e-05, gnorm=8.115, loss_scale=1, train_wall=11, gb_free=2.8, wall=4426 2021-06-18 19:52:42 | INFO | train_inner | epoch 001: 398 / 3002 loss=3.258, ppl=9.57, wps=5845.6, ups=0.09, wpb=64852, bsz=128, num_updates=390, lr=3.9e-05, gnorm=3.1, loss_scale=1, train_wall=11, gb_free=2.8, wall=4437 2021-06-18 19:52:54 | INFO | train_inner | epoch 001: 399 / 3002 loss=3.39, ppl=10.49, wps=5730, ups=0.09, wpb=64826, bsz=128, num_updates=391, lr=3.91e-05, gnorm=2.923, loss_scale=1, train_wall=11, gb_free=2.8, wall=4448 2021-06-18 19:53:05 | INFO | train_inner | epoch 001: 400 / 3002 loss=3.446, ppl=10.9, wps=5881.8, ups=0.09, wpb=64791, bsz=128, num_updates=392, lr=3.92e-05, gnorm=70.911, loss_scale=1, train_wall=11, gb_free=2.8, wall=4459 2021-06-18 19:53:16 | INFO | train_inner | epoch 001: 401 / 3002 loss=3.349, ppl=10.19, wps=5841.4, ups=0.09, wpb=64834, bsz=128, num_updates=393, lr=3.93e-05, gnorm=8.969, loss_scale=1, train_wall=11, gb_free=2.8, wall=4470 2021-06-18 19:53:27 | INFO | train_inner | epoch 001: 402 / 3002 loss=3.389, ppl=10.47, wps=5822.1, ups=0.09, wpb=64828, bsz=128, num_updates=394, lr=3.94e-05, gnorm=2.976, loss_scale=1, train_wall=11, gb_free=2.8, wall=4481 2021-06-18 19:53:38 | INFO | train_inner | epoch 001: 403 / 3002 loss=3.196, ppl=9.16, wps=5883.9, ups=0.09, wpb=64800, bsz=128, num_updates=395, lr=3.95e-05, gnorm=3.105, loss_scale=1, train_wall=11, gb_free=2.8, wall=4492 2021-06-18 19:53:49 | INFO | train_inner | epoch 001: 404 / 3002 loss=3.081, ppl=8.46, wps=5794.8, ups=0.09, wpb=64790, bsz=128, num_updates=396, lr=3.96e-05, gnorm=2.827, loss_scale=1, train_wall=11, gb_free=2.8, wall=4503 2021-06-18 19:54:00 | INFO | train_inner | epoch 001: 405 / 3002 loss=3.315, ppl=9.95, wps=5856.6, ups=0.09, wpb=64767, bsz=128, num_updates=397, lr=3.97e-05, gnorm=3.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=4514 2021-06-18 19:54:11 | INFO | train_inner | epoch 001: 406 / 3002 loss=3.066, ppl=8.37, wps=5975.4, ups=0.09, wpb=64812, bsz=128, num_updates=398, lr=3.98e-05, gnorm=2.952, loss_scale=1, train_wall=10, gb_free=2.8, wall=4525 2021-06-18 19:54:22 | INFO | train_inner | epoch 001: 407 / 3002 loss=3.221, ppl=9.32, wps=5790.7, ups=0.09, wpb=64896, bsz=128, num_updates=399, lr=3.99e-05, gnorm=2.841, loss_scale=1, train_wall=11, gb_free=2.8, wall=4536 2021-06-18 19:54:33 | INFO | train_inner | epoch 001: 408 / 3002 loss=3.26, ppl=9.58, wps=5781.7, ups=0.09, wpb=64833, bsz=128, num_updates=400, lr=4e-05, gnorm=3.227, loss_scale=1, train_wall=11, gb_free=2.8, wall=4548 2021-06-18 19:54:44 | INFO | train_inner | epoch 001: 409 / 3002 loss=3.303, ppl=9.87, wps=5857.3, ups=0.09, wpb=64777, bsz=128, num_updates=401, lr=4.01e-05, gnorm=3.148, loss_scale=1, train_wall=11, gb_free=2.8, wall=4559 2021-06-18 19:54:56 | INFO | train_inner | epoch 001: 410 / 3002 loss=3.26, ppl=9.58, wps=5801.1, ups=0.09, wpb=64842, bsz=128, num_updates=402, lr=4.02e-05, gnorm=3.087, loss_scale=1, train_wall=11, gb_free=2.8, wall=4570 2021-06-18 19:55:07 | INFO | train_inner | epoch 001: 411 / 3002 loss=3.311, ppl=9.93, wps=5842, ups=0.09, wpb=64827, bsz=128, num_updates=403, lr=4.03e-05, gnorm=2.876, loss_scale=1, train_wall=11, gb_free=2.8, wall=4581 2021-06-18 19:55:18 | INFO | train_inner | epoch 001: 412 / 3002 loss=3.45, ppl=10.93, wps=5855.5, ups=0.09, wpb=64790, bsz=128, num_updates=404, lr=4.04e-05, gnorm=3.124, loss_scale=1, train_wall=11, gb_free=2.8, wall=4592 2021-06-18 19:55:29 | INFO | train_inner | epoch 001: 413 / 3002 loss=3.285, ppl=9.75, wps=5858.8, ups=0.09, wpb=64777, bsz=128, num_updates=405, lr=4.05e-05, gnorm=2.97, loss_scale=1, train_wall=11, gb_free=2.8, wall=4603 2021-06-18 19:55:40 | INFO | train_inner | epoch 001: 414 / 3002 loss=3.433, ppl=10.8, wps=5859, ups=0.09, wpb=64784, bsz=128, num_updates=406, lr=4.06e-05, gnorm=2.965, loss_scale=1, train_wall=11, gb_free=2.8, wall=4614 2021-06-18 19:55:51 | INFO | train_inner | epoch 001: 415 / 3002 loss=3.402, ppl=10.57, wps=5868.8, ups=0.09, wpb=64848, bsz=128, num_updates=407, lr=4.07e-05, gnorm=2.842, loss_scale=1, train_wall=11, gb_free=2.8, wall=4625 2021-06-18 19:56:02 | INFO | train_inner | epoch 001: 416 / 3002 loss=3.373, ppl=10.36, wps=5904.8, ups=0.09, wpb=64825, bsz=128, num_updates=408, lr=4.08e-05, gnorm=3.507, loss_scale=1, train_wall=11, gb_free=2.8, wall=4636 2021-06-18 19:56:13 | INFO | train_inner | epoch 001: 417 / 3002 loss=3.372, ppl=10.35, wps=5901.9, ups=0.09, wpb=64814, bsz=128, num_updates=409, lr=4.09e-05, gnorm=3.123, loss_scale=1, train_wall=10, gb_free=2.8, wall=4647 2021-06-18 19:56:24 | INFO | train_inner | epoch 001: 418 / 3002 loss=3.243, ppl=9.47, wps=5899.4, ups=0.09, wpb=64884, bsz=128, num_updates=410, lr=4.1e-05, gnorm=2.842, loss_scale=1, train_wall=11, gb_free=2.8, wall=4658 2021-06-18 19:56:35 | INFO | train_inner | epoch 001: 419 / 3002 loss=3.198, ppl=9.18, wps=5798.8, ups=0.09, wpb=64896, bsz=128, num_updates=411, lr=4.11e-05, gnorm=4.738, loss_scale=1, train_wall=11, gb_free=2.8, wall=4669 2021-06-18 19:56:46 | INFO | train_inner | epoch 001: 420 / 3002 loss=3.275, ppl=9.68, wps=5861, ups=0.09, wpb=64824, bsz=128, num_updates=412, lr=4.12e-05, gnorm=2.926, loss_scale=1, train_wall=11, gb_free=2.8, wall=4680 2021-06-18 19:56:57 | INFO | train_inner | epoch 001: 421 / 3002 loss=3.339, ppl=10.12, wps=5830.7, ups=0.09, wpb=64813, bsz=128, num_updates=413, lr=4.13e-05, gnorm=2.911, loss_scale=1, train_wall=11, gb_free=2.8, wall=4692 2021-06-18 19:57:08 | INFO | train_inner | epoch 001: 422 / 3002 loss=3.135, ppl=8.78, wps=5880.7, ups=0.09, wpb=64825, bsz=128, num_updates=414, lr=4.14e-05, gnorm=2.772, loss_scale=1, train_wall=11, gb_free=2.8, wall=4703 2021-06-18 19:57:19 | INFO | train_inner | epoch 001: 423 / 3002 loss=3.16, ppl=8.94, wps=5808.3, ups=0.09, wpb=64786, bsz=128, num_updates=415, lr=4.15e-05, gnorm=2.849, loss_scale=1, train_wall=11, gb_free=2.8, wall=4714 2021-06-18 19:57:30 | INFO | train_inner | epoch 001: 424 / 3002 loss=3.286, ppl=9.75, wps=5945, ups=0.09, wpb=64858, bsz=128, num_updates=416, lr=4.16e-05, gnorm=2.863, loss_scale=1, train_wall=10, gb_free=2.8, wall=4725 2021-06-18 19:57:41 | INFO | train_inner | epoch 001: 425 / 3002 loss=3.119, ppl=8.69, wps=5818.7, ups=0.09, wpb=64849, bsz=128, num_updates=417, lr=4.17e-05, gnorm=2.989, loss_scale=2, train_wall=11, gb_free=2.8, wall=4736 2021-06-18 19:57:53 | INFO | train_inner | epoch 001: 426 / 3002 loss=3.232, ppl=9.39, wps=5796.5, ups=0.09, wpb=64816, bsz=128, num_updates=418, lr=4.18e-05, gnorm=2.961, loss_scale=2, train_wall=11, gb_free=2.8, wall=4747 2021-06-18 19:58:04 | INFO | train_inner | epoch 001: 427 / 3002 loss=3.292, ppl=9.79, wps=5787.3, ups=0.09, wpb=64877, bsz=128, num_updates=419, lr=4.19e-05, gnorm=2.862, loss_scale=2, train_wall=11, gb_free=2.8, wall=4758 2021-06-18 19:58:15 | INFO | train_inner | epoch 001: 428 / 3002 loss=3.114, ppl=8.66, wps=5906.9, ups=0.09, wpb=64842, bsz=128, num_updates=420, lr=4.2e-05, gnorm=2.849, loss_scale=2, train_wall=11, gb_free=2.8, wall=4769 2021-06-18 19:58:26 | INFO | train_inner | epoch 001: 429 / 3002 loss=3.393, ppl=10.5, wps=5944.2, ups=0.09, wpb=64837, bsz=128, num_updates=421, lr=4.21e-05, gnorm=3.653, loss_scale=2, train_wall=10, gb_free=2.8, wall=4780 2021-06-18 19:58:37 | INFO | train_inner | epoch 001: 430 / 3002 loss=3.287, ppl=9.76, wps=5844.2, ups=0.09, wpb=64768, bsz=128, num_updates=422, lr=4.22e-05, gnorm=2.778, loss_scale=2, train_wall=11, gb_free=2.8, wall=4791 2021-06-18 19:58:48 | INFO | train_inner | epoch 001: 431 / 3002 loss=3.449, ppl=10.92, wps=5855.5, ups=0.09, wpb=64813, bsz=128, num_updates=423, lr=4.23e-05, gnorm=3.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=4802 2021-06-18 19:58:59 | INFO | train_inner | epoch 001: 432 / 3002 loss=3.156, ppl=8.92, wps=5837.3, ups=0.09, wpb=64842, bsz=128, num_updates=424, lr=4.24e-05, gnorm=2.829, loss_scale=2, train_wall=11, gb_free=2.8, wall=4813 2021-06-18 19:59:10 | INFO | train_inner | epoch 001: 433 / 3002 loss=3.29, ppl=9.78, wps=5878.5, ups=0.09, wpb=64890, bsz=128, num_updates=425, lr=4.25e-05, gnorm=2.951, loss_scale=2, train_wall=11, gb_free=2.8, wall=4824 2021-06-18 19:59:21 | INFO | train_inner | epoch 001: 434 / 3002 loss=3.162, ppl=8.95, wps=5750, ups=0.09, wpb=64800, bsz=128, num_updates=426, lr=4.26e-05, gnorm=3.227, loss_scale=2, train_wall=11, gb_free=2.8, wall=4836 2021-06-18 19:59:32 | INFO | train_inner | epoch 001: 435 / 3002 loss=3.299, ppl=9.84, wps=5855.8, ups=0.09, wpb=64867, bsz=128, num_updates=427, lr=4.27e-05, gnorm=2.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=4847 2021-06-18 19:59:43 | INFO | train_inner | epoch 001: 436 / 3002 loss=3.07, ppl=8.4, wps=5918.6, ups=0.09, wpb=64927, bsz=128, num_updates=428, lr=4.28e-05, gnorm=3.179, loss_scale=2, train_wall=11, gb_free=2.8, wall=4858 2021-06-18 19:59:54 | INFO | train_inner | epoch 001: 437 / 3002 loss=3.216, ppl=9.3, wps=5878, ups=0.09, wpb=64843, bsz=128, num_updates=429, lr=4.29e-05, gnorm=2.851, loss_scale=2, train_wall=11, gb_free=2.8, wall=4869 2021-06-18 20:00:05 | INFO | train_inner | epoch 001: 438 / 3002 loss=3.289, ppl=9.78, wps=5889.7, ups=0.09, wpb=64773, bsz=128, num_updates=430, lr=4.3e-05, gnorm=2.989, loss_scale=2, train_wall=11, gb_free=2.8, wall=4880 2021-06-18 20:00:16 | INFO | train_inner | epoch 001: 439 / 3002 loss=3.272, ppl=9.66, wps=5869.4, ups=0.09, wpb=64970, bsz=128, num_updates=431, lr=4.31e-05, gnorm=7.882, loss_scale=2, train_wall=11, gb_free=2.8, wall=4891 2021-06-18 20:00:28 | INFO | train_inner | epoch 001: 440 / 3002 loss=3.114, ppl=8.66, wps=5811.7, ups=0.09, wpb=64781, bsz=128, num_updates=432, lr=4.32e-05, gnorm=2.8, loss_scale=2, train_wall=11, gb_free=2.8, wall=4902 2021-06-18 20:00:39 | INFO | train_inner | epoch 001: 441 / 3002 loss=3.1, ppl=8.58, wps=5845.4, ups=0.09, wpb=64887, bsz=128, num_updates=433, lr=4.33e-05, gnorm=2.842, loss_scale=2, train_wall=11, gb_free=2.8, wall=4913 2021-06-18 20:00:50 | INFO | train_inner | epoch 001: 442 / 3002 loss=3.276, ppl=9.69, wps=5872.2, ups=0.09, wpb=64827, bsz=128, num_updates=434, lr=4.34e-05, gnorm=2.927, loss_scale=2, train_wall=11, gb_free=2.8, wall=4924 2021-06-18 20:01:01 | INFO | train_inner | epoch 001: 443 / 3002 loss=3.25, ppl=9.51, wps=5860, ups=0.09, wpb=64863, bsz=128, num_updates=435, lr=4.35e-05, gnorm=2.924, loss_scale=2, train_wall=11, gb_free=2.8, wall=4935 2021-06-18 20:01:12 | INFO | train_inner | epoch 001: 444 / 3002 loss=3.31, ppl=9.92, wps=5906.8, ups=0.09, wpb=64859, bsz=128, num_updates=436, lr=4.36e-05, gnorm=2.711, loss_scale=2, train_wall=11, gb_free=2.8, wall=4946 2021-06-18 20:01:23 | INFO | train_inner | epoch 001: 445 / 3002 loss=3.192, ppl=9.14, wps=5815.3, ups=0.09, wpb=64839, bsz=128, num_updates=437, lr=4.37e-05, gnorm=3, loss_scale=2, train_wall=11, gb_free=2.8, wall=4957 2021-06-18 20:01:34 | INFO | train_inner | epoch 001: 446 / 3002 loss=3.417, ppl=10.68, wps=5803.2, ups=0.09, wpb=64807, bsz=128, num_updates=438, lr=4.38e-05, gnorm=2.816, loss_scale=2, train_wall=11, gb_free=2.8, wall=4968 2021-06-18 20:01:45 | INFO | train_inner | epoch 001: 447 / 3002 loss=3.457, ppl=10.98, wps=5792.7, ups=0.09, wpb=64849, bsz=128, num_updates=439, lr=4.39e-05, gnorm=2.925, loss_scale=2, train_wall=11, gb_free=2.8, wall=4980 2021-06-18 20:01:56 | INFO | train_inner | epoch 001: 448 / 3002 loss=3.404, ppl=10.59, wps=5816, ups=0.09, wpb=64821, bsz=128, num_updates=440, lr=4.4e-05, gnorm=2.938, loss_scale=2, train_wall=11, gb_free=2.8, wall=4991 2021-06-18 20:02:08 | INFO | train_inner | epoch 001: 449 / 3002 loss=3.366, ppl=10.31, wps=5834.8, ups=0.09, wpb=64825, bsz=128, num_updates=441, lr=4.41e-05, gnorm=2.988, loss_scale=2, train_wall=11, gb_free=2.8, wall=5002 2021-06-18 20:02:19 | INFO | train_inner | epoch 001: 450 / 3002 loss=3.302, ppl=9.86, wps=5830.5, ups=0.09, wpb=64764, bsz=128, num_updates=442, lr=4.42e-05, gnorm=2.965, loss_scale=2, train_wall=11, gb_free=2.8, wall=5013 2021-06-18 20:02:30 | INFO | train_inner | epoch 001: 451 / 3002 loss=3.275, ppl=9.68, wps=5814.3, ups=0.09, wpb=64803, bsz=128, num_updates=443, lr=4.43e-05, gnorm=2.877, loss_scale=2, train_wall=11, gb_free=2.8, wall=5024 2021-06-18 20:02:41 | INFO | train_inner | epoch 001: 452 / 3002 loss=3.199, ppl=9.18, wps=5854.4, ups=0.09, wpb=64795, bsz=128, num_updates=444, lr=4.44e-05, gnorm=2.821, loss_scale=2, train_wall=11, gb_free=2.8, wall=5035 2021-06-18 20:02:52 | INFO | train_inner | epoch 001: 453 / 3002 loss=3.395, ppl=10.52, wps=5784.1, ups=0.09, wpb=64840, bsz=128, num_updates=445, lr=4.45e-05, gnorm=3.115, loss_scale=2, train_wall=11, gb_free=2.8, wall=5046 2021-06-18 20:03:03 | INFO | train_inner | epoch 001: 454 / 3002 loss=3.329, ppl=10.05, wps=5825.5, ups=0.09, wpb=64839, bsz=128, num_updates=446, lr=4.46e-05, gnorm=2.955, loss_scale=2, train_wall=11, gb_free=2.8, wall=5058 2021-06-18 20:03:14 | INFO | train_inner | epoch 001: 455 / 3002 loss=3.284, ppl=9.74, wps=5778.8, ups=0.09, wpb=64774, bsz=128, num_updates=447, lr=4.47e-05, gnorm=2.938, loss_scale=2, train_wall=11, gb_free=2.8, wall=5069 2021-06-18 20:03:26 | INFO | train_inner | epoch 001: 456 / 3002 loss=3.444, ppl=10.88, wps=5787, ups=0.09, wpb=64799, bsz=128, num_updates=448, lr=4.48e-05, gnorm=3.072, loss_scale=2, train_wall=11, gb_free=2.8, wall=5080 2021-06-18 20:03:37 | INFO | train_inner | epoch 001: 457 / 3002 loss=3.307, ppl=9.9, wps=5811.9, ups=0.09, wpb=64787, bsz=128, num_updates=449, lr=4.49e-05, gnorm=3.12, loss_scale=2, train_wall=11, gb_free=2.8, wall=5091 2021-06-18 20:03:48 | INFO | train_inner | epoch 001: 458 / 3002 loss=3.266, ppl=9.62, wps=5949.4, ups=0.09, wpb=64879, bsz=128, num_updates=450, lr=4.5e-05, gnorm=3.118, loss_scale=2, train_wall=10, gb_free=2.8, wall=5102 2021-06-18 20:03:59 | INFO | train_inner | epoch 001: 459 / 3002 loss=3.315, ppl=9.95, wps=5925.4, ups=0.09, wpb=64932, bsz=128, num_updates=451, lr=4.51e-05, gnorm=3.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=5113 2021-06-18 20:04:10 | INFO | train_inner | epoch 001: 460 / 3002 loss=3.308, ppl=9.91, wps=5820.5, ups=0.09, wpb=64851, bsz=128, num_updates=452, lr=4.52e-05, gnorm=3.004, loss_scale=2, train_wall=11, gb_free=2.8, wall=5124 2021-06-18 20:04:21 | INFO | train_inner | epoch 001: 461 / 3002 loss=3.32, ppl=9.99, wps=5825.5, ups=0.09, wpb=64845, bsz=128, num_updates=453, lr=4.53e-05, gnorm=2.929, loss_scale=2, train_wall=11, gb_free=2.8, wall=5135 2021-06-18 20:04:32 | INFO | train_inner | epoch 001: 462 / 3002 loss=3.192, ppl=9.14, wps=5825, ups=0.09, wpb=64895, bsz=128, num_updates=454, lr=4.54e-05, gnorm=3.154, loss_scale=2, train_wall=11, gb_free=2.8, wall=5146 2021-06-18 20:04:43 | INFO | train_inner | epoch 001: 463 / 3002 loss=3.208, ppl=9.24, wps=5711.9, ups=0.09, wpb=64802, bsz=128, num_updates=455, lr=4.55e-05, gnorm=3.383, loss_scale=2, train_wall=11, gb_free=2.8, wall=5158 2021-06-18 20:04:54 | INFO | train_inner | epoch 001: 464 / 3002 loss=3.132, ppl=8.77, wps=5886.8, ups=0.09, wpb=64881, bsz=128, num_updates=456, lr=4.56e-05, gnorm=2.898, loss_scale=2, train_wall=11, gb_free=2.8, wall=5169 2021-06-18 20:05:05 | INFO | train_inner | epoch 001: 465 / 3002 loss=3.319, ppl=9.98, wps=5908, ups=0.09, wpb=64811, bsz=128, num_updates=457, lr=4.57e-05, gnorm=3.152, loss_scale=2, train_wall=11, gb_free=2.8, wall=5180 2021-06-18 20:05:17 | INFO | train_inner | epoch 001: 466 / 3002 loss=3.156, ppl=8.91, wps=5786.1, ups=0.09, wpb=64767, bsz=128, num_updates=458, lr=4.58e-05, gnorm=3.194, loss_scale=2, train_wall=11, gb_free=2.8, wall=5191 2021-06-18 20:05:28 | INFO | train_inner | epoch 001: 467 / 3002 loss=3.236, ppl=9.42, wps=5903.1, ups=0.09, wpb=64872, bsz=128, num_updates=459, lr=4.59e-05, gnorm=3.446, loss_scale=2, train_wall=11, gb_free=2.8, wall=5202 2021-06-18 20:05:39 | INFO | train_inner | epoch 001: 468 / 3002 loss=3.192, ppl=9.14, wps=5923.3, ups=0.09, wpb=64797, bsz=128, num_updates=460, lr=4.6e-05, gnorm=2.894, loss_scale=2, train_wall=10, gb_free=2.8, wall=5213 2021-06-18 20:05:50 | INFO | train_inner | epoch 001: 469 / 3002 loss=3.296, ppl=9.82, wps=5812.4, ups=0.09, wpb=64786, bsz=128, num_updates=461, lr=4.61e-05, gnorm=3.036, loss_scale=2, train_wall=11, gb_free=2.8, wall=5224 2021-06-18 20:06:01 | INFO | train_inner | epoch 001: 470 / 3002 loss=3.201, ppl=9.19, wps=5851.4, ups=0.09, wpb=64832, bsz=128, num_updates=462, lr=4.62e-05, gnorm=2.849, loss_scale=2, train_wall=11, gb_free=2.8, wall=5235 2021-06-18 20:06:12 | INFO | train_inner | epoch 001: 471 / 3002 loss=3.309, ppl=9.91, wps=5865.4, ups=0.09, wpb=64786, bsz=128, num_updates=463, lr=4.63e-05, gnorm=3.089, loss_scale=2, train_wall=11, gb_free=2.8, wall=5246 2021-06-18 20:06:23 | INFO | train_inner | epoch 001: 472 / 3002 loss=3.166, ppl=8.97, wps=5888, ups=0.09, wpb=64825, bsz=128, num_updates=464, lr=4.64e-05, gnorm=2.907, loss_scale=2, train_wall=11, gb_free=2.8, wall=5257 2021-06-18 20:06:34 | INFO | train_inner | epoch 001: 473 / 3002 loss=3.186, ppl=9.1, wps=5821.9, ups=0.09, wpb=64772, bsz=128, num_updates=465, lr=4.65e-05, gnorm=2.766, loss_scale=2, train_wall=11, gb_free=2.8, wall=5268 2021-06-18 20:06:45 | INFO | train_inner | epoch 001: 474 / 3002 loss=3.178, ppl=9.05, wps=5932.3, ups=0.09, wpb=64808, bsz=128, num_updates=466, lr=4.66e-05, gnorm=2.954, loss_scale=2, train_wall=10, gb_free=2.8, wall=5279 2021-06-18 20:06:56 | INFO | train_inner | epoch 001: 475 / 3002 loss=3.226, ppl=9.35, wps=5855.5, ups=0.09, wpb=64842, bsz=128, num_updates=467, lr=4.67e-05, gnorm=3.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=5290 2021-06-18 20:07:07 | INFO | train_inner | epoch 001: 476 / 3002 loss=3.29, ppl=9.78, wps=5853.5, ups=0.09, wpb=64876, bsz=128, num_updates=468, lr=4.68e-05, gnorm=2.888, loss_scale=2, train_wall=11, gb_free=2.8, wall=5301 2021-06-18 20:07:18 | INFO | train_inner | epoch 001: 477 / 3002 loss=3.212, ppl=9.27, wps=5824.2, ups=0.09, wpb=64887, bsz=128, num_updates=469, lr=4.69e-05, gnorm=3.471, loss_scale=2, train_wall=11, gb_free=2.8, wall=5312 2021-06-18 20:07:29 | INFO | train_inner | epoch 001: 478 / 3002 loss=3.24, ppl=9.45, wps=5853.6, ups=0.09, wpb=64806, bsz=128, num_updates=470, lr=4.7e-05, gnorm=2.742, loss_scale=2, train_wall=11, gb_free=2.8, wall=5324 2021-06-18 20:07:40 | INFO | train_inner | epoch 001: 479 / 3002 loss=3.097, ppl=8.55, wps=5840.7, ups=0.09, wpb=64836, bsz=128, num_updates=471, lr=4.71e-05, gnorm=2.789, loss_scale=2, train_wall=11, gb_free=2.8, wall=5335 2021-06-18 20:07:52 | INFO | train_inner | epoch 001: 480 / 3002 loss=3.233, ppl=9.4, wps=5755, ups=0.09, wpb=64786, bsz=128, num_updates=472, lr=4.72e-05, gnorm=2.778, loss_scale=2, train_wall=11, gb_free=2.8, wall=5346 2021-06-18 20:08:03 | INFO | train_inner | epoch 001: 481 / 3002 loss=3.165, ppl=8.97, wps=5926.1, ups=0.09, wpb=64801, bsz=128, num_updates=473, lr=4.73e-05, gnorm=3.068, loss_scale=2, train_wall=10, gb_free=2.8, wall=5357 2021-06-18 20:08:13 | INFO | train_inner | epoch 001: 482 / 3002 loss=3.242, ppl=9.46, wps=5922.9, ups=0.09, wpb=64901, bsz=128, num_updates=474, lr=4.74e-05, gnorm=3.214, loss_scale=2, train_wall=11, gb_free=2.8, wall=5368 2021-06-18 20:08:25 | INFO | train_inner | epoch 001: 483 / 3002 loss=3.005, ppl=8.03, wps=5816.4, ups=0.09, wpb=64867, bsz=128, num_updates=475, lr=4.75e-05, gnorm=3.709, loss_scale=2, train_wall=11, gb_free=2.8, wall=5379 2021-06-18 20:08:36 | INFO | train_inner | epoch 001: 484 / 3002 loss=3.082, ppl=8.47, wps=5724.3, ups=0.09, wpb=64864, bsz=128, num_updates=476, lr=4.76e-05, gnorm=3.071, loss_scale=2, train_wall=11, gb_free=2.8, wall=5390 2021-06-18 20:08:47 | INFO | train_inner | epoch 001: 485 / 3002 loss=3.1, ppl=8.57, wps=5877.2, ups=0.09, wpb=64819, bsz=128, num_updates=477, lr=4.77e-05, gnorm=2.825, loss_scale=2, train_wall=11, gb_free=2.8, wall=5401 2021-06-18 20:08:58 | INFO | train_inner | epoch 001: 486 / 3002 loss=3.064, ppl=8.36, wps=5769.2, ups=0.09, wpb=64898, bsz=128, num_updates=478, lr=4.78e-05, gnorm=2.797, loss_scale=2, train_wall=11, gb_free=2.8, wall=5413 2021-06-18 20:09:09 | INFO | train_inner | epoch 001: 487 / 3002 loss=3.192, ppl=9.14, wps=5894.4, ups=0.09, wpb=64779, bsz=128, num_updates=479, lr=4.79e-05, gnorm=14.104, loss_scale=2, train_wall=11, gb_free=2.8, wall=5424 2021-06-18 20:09:20 | INFO | train_inner | epoch 001: 488 / 3002 loss=3.161, ppl=8.94, wps=5881.6, ups=0.09, wpb=64822, bsz=128, num_updates=480, lr=4.8e-05, gnorm=2.883, loss_scale=2, train_wall=11, gb_free=2.8, wall=5435 2021-06-18 20:09:31 | INFO | train_inner | epoch 001: 489 / 3002 loss=3.041, ppl=8.23, wps=5859.4, ups=0.09, wpb=64873, bsz=128, num_updates=481, lr=4.81e-05, gnorm=2.759, loss_scale=2, train_wall=11, gb_free=2.8, wall=5446 2021-06-18 20:09:42 | INFO | train_inner | epoch 001: 490 / 3002 loss=3.448, ppl=10.91, wps=5897.8, ups=0.09, wpb=64814, bsz=128, num_updates=482, lr=4.82e-05, gnorm=3.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=5457 2021-06-18 20:09:53 | INFO | train_inner | epoch 001: 491 / 3002 loss=3.139, ppl=8.81, wps=5911.7, ups=0.09, wpb=64878, bsz=128, num_updates=483, lr=4.83e-05, gnorm=3.61, loss_scale=2, train_wall=11, gb_free=2.8, wall=5468 2021-06-18 20:10:04 | INFO | train_inner | epoch 001: 492 / 3002 loss=3.334, ppl=10.08, wps=5887.3, ups=0.09, wpb=64771, bsz=128, num_updates=484, lr=4.84e-05, gnorm=2.881, loss_scale=2, train_wall=11, gb_free=2.8, wall=5479 2021-06-18 20:10:15 | INFO | train_inner | epoch 001: 493 / 3002 loss=3.175, ppl=9.03, wps=5849, ups=0.09, wpb=64829, bsz=128, num_updates=485, lr=4.85e-05, gnorm=2.883, loss_scale=2, train_wall=11, gb_free=2.8, wall=5490 2021-06-18 20:10:27 | INFO | train_inner | epoch 001: 494 / 3002 loss=3.232, ppl=9.4, wps=5706.9, ups=0.09, wpb=64922, bsz=128, num_updates=486, lr=4.86e-05, gnorm=2.913, loss_scale=2, train_wall=11, gb_free=2.8, wall=5501 2021-06-18 20:10:38 | INFO | train_inner | epoch 001: 495 / 3002 loss=3.267, ppl=9.63, wps=5801, ups=0.09, wpb=64844, bsz=128, num_updates=487, lr=4.87e-05, gnorm=8.15, loss_scale=2, train_wall=11, gb_free=2.8, wall=5512 2021-06-18 20:10:49 | INFO | train_inner | epoch 001: 496 / 3002 loss=3.249, ppl=9.51, wps=5896.8, ups=0.09, wpb=64769, bsz=128, num_updates=488, lr=4.88e-05, gnorm=2.819, loss_scale=2, train_wall=11, gb_free=2.8, wall=5523 2021-06-18 20:11:00 | INFO | train_inner | epoch 001: 497 / 3002 loss=3.182, ppl=9.08, wps=5969.9, ups=0.09, wpb=64927, bsz=128, num_updates=489, lr=4.89e-05, gnorm=3.124, loss_scale=2, train_wall=10, gb_free=2.8, wall=5534 2021-06-18 20:11:11 | INFO | train_inner | epoch 001: 498 / 3002 loss=3.037, ppl=8.21, wps=5928.4, ups=0.09, wpb=64880, bsz=128, num_updates=490, lr=4.9e-05, gnorm=2.945, loss_scale=2, train_wall=10, gb_free=2.8, wall=5545 2021-06-18 20:11:22 | INFO | train_inner | epoch 001: 499 / 3002 loss=3.218, ppl=9.31, wps=5858, ups=0.09, wpb=64906, bsz=128, num_updates=491, lr=4.91e-05, gnorm=3.344, loss_scale=2, train_wall=11, gb_free=2.8, wall=5556 2021-06-18 20:11:33 | INFO | train_inner | epoch 001: 500 / 3002 loss=3.314, ppl=9.95, wps=5884.2, ups=0.09, wpb=64816, bsz=128, num_updates=492, lr=4.92e-05, gnorm=2.869, loss_scale=2, train_wall=11, gb_free=2.8, wall=5567 2021-06-18 20:11:44 | INFO | train_inner | epoch 001: 501 / 3002 loss=3.322, ppl=10, wps=5862.9, ups=0.09, wpb=64727, bsz=128, num_updates=493, lr=4.93e-05, gnorm=2.815, loss_scale=2, train_wall=11, gb_free=2.8, wall=5578 2021-06-18 20:11:55 | INFO | train_inner | epoch 001: 502 / 3002 loss=3.175, ppl=9.03, wps=5768.8, ups=0.09, wpb=64788, bsz=128, num_updates=494, lr=4.94e-05, gnorm=2.859, loss_scale=2, train_wall=11, gb_free=2.8, wall=5589 2021-06-18 20:12:06 | INFO | train_inner | epoch 001: 503 / 3002 loss=3.417, ppl=10.68, wps=5832.7, ups=0.09, wpb=64782, bsz=128, num_updates=495, lr=4.95e-05, gnorm=2.981, loss_scale=2, train_wall=11, gb_free=2.8, wall=5601 2021-06-18 20:12:17 | INFO | train_inner | epoch 001: 504 / 3002 loss=3.31, ppl=9.92, wps=5781.5, ups=0.09, wpb=64912, bsz=128, num_updates=496, lr=4.96e-05, gnorm=2.829, loss_scale=2, train_wall=11, gb_free=2.8, wall=5612 2021-06-18 20:12:29 | INFO | train_inner | epoch 001: 505 / 3002 loss=3.175, ppl=9.03, wps=5809.5, ups=0.09, wpb=64912, bsz=128, num_updates=497, lr=4.97e-05, gnorm=2.882, loss_scale=2, train_wall=11, gb_free=2.8, wall=5623 2021-06-18 20:12:40 | INFO | train_inner | epoch 001: 506 / 3002 loss=3.112, ppl=8.64, wps=5816.8, ups=0.09, wpb=64838, bsz=128, num_updates=498, lr=4.98e-05, gnorm=2.81, loss_scale=2, train_wall=11, gb_free=2.8, wall=5634 2021-06-18 20:12:51 | INFO | train_inner | epoch 001: 507 / 3002 loss=3.276, ppl=9.68, wps=5758.9, ups=0.09, wpb=64780, bsz=128, num_updates=499, lr=4.99e-05, gnorm=2.965, loss_scale=2, train_wall=11, gb_free=2.8, wall=5645 2021-06-18 20:13:02 | INFO | train_inner | epoch 001: 508 / 3002 loss=3.179, ppl=9.05, wps=5882.4, ups=0.09, wpb=64816, bsz=128, num_updates=500, lr=5e-05, gnorm=2.835, loss_scale=2, train_wall=11, gb_free=2.8, wall=5656 2021-06-18 20:13:13 | INFO | train_inner | epoch 001: 509 / 3002 loss=3.221, ppl=9.33, wps=5791.5, ups=0.09, wpb=64803, bsz=128, num_updates=501, lr=5.01e-05, gnorm=3.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=5668 2021-06-18 20:13:24 | INFO | train_inner | epoch 001: 510 / 3002 loss=3.156, ppl=8.91, wps=5900.4, ups=0.09, wpb=64827, bsz=128, num_updates=502, lr=5.02e-05, gnorm=2.705, loss_scale=2, train_wall=11, gb_free=2.8, wall=5679 2021-06-18 20:13:35 | INFO | train_inner | epoch 001: 511 / 3002 loss=3.249, ppl=9.51, wps=5809.4, ups=0.09, wpb=64747, bsz=128, num_updates=503, lr=5.03e-05, gnorm=14.574, loss_scale=2, train_wall=11, gb_free=2.8, wall=5690 2021-06-18 20:13:46 | INFO | train_inner | epoch 001: 512 / 3002 loss=3.316, ppl=9.96, wps=5856.9, ups=0.09, wpb=64841, bsz=128, num_updates=504, lr=5.04e-05, gnorm=2.853, loss_scale=2, train_wall=11, gb_free=2.8, wall=5701 2021-06-18 20:13:58 | INFO | train_inner | epoch 001: 513 / 3002 loss=3.406, ppl=10.6, wps=5809.8, ups=0.09, wpb=64850, bsz=128, num_updates=505, lr=5.05e-05, gnorm=3.878, loss_scale=2, train_wall=11, gb_free=2.8, wall=5712 2021-06-18 20:14:09 | INFO | train_inner | epoch 001: 514 / 3002 loss=3.394, ppl=10.51, wps=5816.2, ups=0.09, wpb=64788, bsz=128, num_updates=506, lr=5.06e-05, gnorm=8.85, loss_scale=2, train_wall=11, gb_free=2.8, wall=5723 2021-06-18 20:14:20 | INFO | train_inner | epoch 001: 515 / 3002 loss=3.115, ppl=8.67, wps=5872.2, ups=0.09, wpb=64827, bsz=128, num_updates=507, lr=5.07e-05, gnorm=3.178, loss_scale=2, train_wall=11, gb_free=2.8, wall=5734 2021-06-18 20:14:31 | INFO | train_inner | epoch 001: 516 / 3002 loss=3.193, ppl=9.15, wps=5918.9, ups=0.09, wpb=64853, bsz=128, num_updates=508, lr=5.08e-05, gnorm=2.97, loss_scale=2, train_wall=10, gb_free=2.8, wall=5745 2021-06-18 20:14:42 | INFO | train_inner | epoch 001: 517 / 3002 loss=3.267, ppl=9.63, wps=5901.5, ups=0.09, wpb=64892, bsz=128, num_updates=509, lr=5.09e-05, gnorm=3.14, loss_scale=2, train_wall=11, gb_free=2.8, wall=5756 2021-06-18 20:14:53 | INFO | train_inner | epoch 001: 518 / 3002 loss=3.277, ppl=9.7, wps=5751.3, ups=0.09, wpb=64813, bsz=128, num_updates=510, lr=5.1e-05, gnorm=2.96, loss_scale=2, train_wall=11, gb_free=2.8, wall=5767 2021-06-18 20:15:04 | INFO | train_inner | epoch 001: 519 / 3002 loss=3.232, ppl=9.39, wps=5758.7, ups=0.09, wpb=64781, bsz=128, num_updates=511, lr=5.11e-05, gnorm=3.077, loss_scale=2, train_wall=11, gb_free=2.8, wall=5779 2021-06-18 20:15:15 | INFO | train_inner | epoch 001: 520 / 3002 loss=3.053, ppl=8.3, wps=5998.9, ups=0.09, wpb=64870, bsz=128, num_updates=512, lr=5.12e-05, gnorm=16.632, loss_scale=2, train_wall=10, gb_free=2.8, wall=5789 2021-06-18 20:15:26 | INFO | train_inner | epoch 001: 521 / 3002 loss=3.146, ppl=8.85, wps=5830.4, ups=0.09, wpb=64898, bsz=128, num_updates=513, lr=5.13e-05, gnorm=2.988, loss_scale=2, train_wall=11, gb_free=2.8, wall=5801 2021-06-18 20:15:37 | INFO | train_inner | epoch 001: 522 / 3002 loss=3.294, ppl=9.81, wps=5809.8, ups=0.09, wpb=64858, bsz=128, num_updates=514, lr=5.14e-05, gnorm=3.026, loss_scale=2, train_wall=11, gb_free=2.8, wall=5812 2021-06-18 20:15:48 | INFO | train_inner | epoch 001: 523 / 3002 loss=3.316, ppl=9.96, wps=5861.5, ups=0.09, wpb=64844, bsz=128, num_updates=515, lr=5.15e-05, gnorm=3.289, loss_scale=2, train_wall=11, gb_free=2.8, wall=5823 2021-06-18 20:16:00 | INFO | train_inner | epoch 001: 524 / 3002 loss=3.493, ppl=11.26, wps=5762.6, ups=0.09, wpb=64878, bsz=128, num_updates=516, lr=5.16e-05, gnorm=3.441, loss_scale=2, train_wall=11, gb_free=2.8, wall=5834 2021-06-18 20:16:11 | INFO | train_inner | epoch 001: 525 / 3002 loss=3.187, ppl=9.1, wps=5843.5, ups=0.09, wpb=64792, bsz=128, num_updates=517, lr=5.17e-05, gnorm=3.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=5845 2021-06-18 20:16:22 | INFO | train_inner | epoch 001: 526 / 3002 loss=3.269, ppl=9.64, wps=5908.8, ups=0.09, wpb=64861, bsz=128, num_updates=518, lr=5.18e-05, gnorm=3.689, loss_scale=2, train_wall=11, gb_free=2.8, wall=5856 2021-06-18 20:16:33 | INFO | train_inner | epoch 001: 527 / 3002 loss=3.109, ppl=8.63, wps=5904.2, ups=0.09, wpb=64772, bsz=128, num_updates=519, lr=5.19e-05, gnorm=3.609, loss_scale=2, train_wall=11, gb_free=2.8, wall=5867 2021-06-18 20:16:44 | INFO | train_inner | epoch 001: 528 / 3002 loss=3.188, ppl=9.11, wps=5886.5, ups=0.09, wpb=64797, bsz=128, num_updates=520, lr=5.2e-05, gnorm=2.875, loss_scale=2, train_wall=11, gb_free=2.8, wall=5878 2021-06-18 20:16:55 | INFO | train_inner | epoch 001: 529 / 3002 loss=3.187, ppl=9.11, wps=5761.1, ups=0.09, wpb=64822, bsz=128, num_updates=521, lr=5.21e-05, gnorm=4.894, loss_scale=2, train_wall=11, gb_free=2.8, wall=5889 2021-06-18 20:17:06 | INFO | train_inner | epoch 001: 530 / 3002 loss=3.25, ppl=9.52, wps=6036.9, ups=0.09, wpb=64859, bsz=128, num_updates=522, lr=5.22e-05, gnorm=2.886, loss_scale=2, train_wall=10, gb_free=2.8, wall=5900 2021-06-18 20:17:17 | INFO | train_inner | epoch 001: 531 / 3002 loss=3.153, ppl=8.9, wps=5837, ups=0.09, wpb=64819, bsz=128, num_updates=523, lr=5.23e-05, gnorm=3.111, loss_scale=2, train_wall=11, gb_free=2.8, wall=5911 2021-06-18 20:17:28 | INFO | train_inner | epoch 001: 532 / 3002 loss=3.376, ppl=10.38, wps=5807.3, ups=0.09, wpb=64876, bsz=128, num_updates=524, lr=5.24e-05, gnorm=3.212, loss_scale=2, train_wall=11, gb_free=2.8, wall=5922 2021-06-18 20:17:39 | INFO | train_inner | epoch 001: 533 / 3002 loss=3.303, ppl=9.87, wps=5857.7, ups=0.09, wpb=64775, bsz=128, num_updates=525, lr=5.25e-05, gnorm=3.166, loss_scale=2, train_wall=11, gb_free=2.8, wall=5933 2021-06-18 20:17:50 | INFO | train_inner | epoch 001: 534 / 3002 loss=3.157, ppl=8.92, wps=5947.9, ups=0.09, wpb=64870, bsz=128, num_updates=526, lr=5.26e-05, gnorm=2.896, loss_scale=2, train_wall=10, gb_free=2.8, wall=5944 2021-06-18 20:18:01 | INFO | train_inner | epoch 001: 535 / 3002 loss=3.419, ppl=10.7, wps=5901.9, ups=0.09, wpb=64855, bsz=128, num_updates=527, lr=5.27e-05, gnorm=3.03, loss_scale=2, train_wall=11, gb_free=2.8, wall=5955 2021-06-18 20:18:12 | INFO | train_inner | epoch 001: 536 / 3002 loss=3.17, ppl=9, wps=5728.1, ups=0.09, wpb=64854, bsz=128, num_updates=528, lr=5.28e-05, gnorm=3.124, loss_scale=2, train_wall=11, gb_free=2.8, wall=5967 2021-06-18 20:18:23 | INFO | train_inner | epoch 001: 537 / 3002 loss=3.263, ppl=9.6, wps=5888.2, ups=0.09, wpb=64819, bsz=128, num_updates=529, lr=5.29e-05, gnorm=3.958, loss_scale=2, train_wall=11, gb_free=2.8, wall=5978 2021-06-18 20:18:34 | INFO | train_inner | epoch 001: 538 / 3002 loss=3.347, ppl=10.17, wps=5877.1, ups=0.09, wpb=64814, bsz=128, num_updates=530, lr=5.3e-05, gnorm=3.115, loss_scale=2, train_wall=11, gb_free=2.8, wall=5989 2021-06-18 20:18:45 | INFO | train_inner | epoch 001: 539 / 3002 loss=3.065, ppl=8.37, wps=5886.4, ups=0.09, wpb=64778, bsz=128, num_updates=531, lr=5.31e-05, gnorm=2.942, loss_scale=2, train_wall=11, gb_free=2.8, wall=6000 2021-06-18 20:18:56 | INFO | train_inner | epoch 001: 540 / 3002 loss=3.235, ppl=9.42, wps=5854.9, ups=0.09, wpb=64732, bsz=128, num_updates=532, lr=5.32e-05, gnorm=3.013, loss_scale=2, train_wall=11, gb_free=2.8, wall=6011 2021-06-18 20:19:07 | INFO | train_inner | epoch 001: 541 / 3002 loss=3.025, ppl=8.14, wps=5828.5, ups=0.09, wpb=64856, bsz=128, num_updates=533, lr=5.33e-05, gnorm=10.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=6022 2021-06-18 20:19:18 | INFO | train_inner | epoch 001: 542 / 3002 loss=2.996, ppl=7.98, wps=5967.4, ups=0.09, wpb=64890, bsz=128, num_updates=534, lr=5.34e-05, gnorm=7.986, loss_scale=2, train_wall=10, gb_free=2.8, wall=6033 2021-06-18 20:19:29 | INFO | train_inner | epoch 001: 543 / 3002 loss=3.208, ppl=9.24, wps=5945.8, ups=0.09, wpb=64790, bsz=128, num_updates=535, lr=5.35e-05, gnorm=2.896, loss_scale=2, train_wall=10, gb_free=2.8, wall=6044 2021-06-18 20:19:40 | INFO | train_inner | epoch 001: 544 / 3002 loss=3.387, ppl=10.46, wps=5922.3, ups=0.09, wpb=64819, bsz=128, num_updates=536, lr=5.36e-05, gnorm=3.156, loss_scale=2, train_wall=10, gb_free=2.8, wall=6055 2021-06-18 20:19:51 | INFO | train_inner | epoch 001: 545 / 3002 loss=3.398, ppl=10.54, wps=5900.8, ups=0.09, wpb=64813, bsz=128, num_updates=537, lr=5.37e-05, gnorm=3.065, loss_scale=2, train_wall=11, gb_free=2.8, wall=6066 2021-06-18 20:20:02 | INFO | train_inner | epoch 001: 546 / 3002 loss=3.294, ppl=9.81, wps=6007.1, ups=0.09, wpb=64897, bsz=128, num_updates=538, lr=5.38e-05, gnorm=3.037, loss_scale=2, train_wall=10, gb_free=2.8, wall=6076 2021-06-18 20:20:13 | INFO | train_inner | epoch 001: 547 / 3002 loss=3.27, ppl=9.64, wps=5869, ups=0.09, wpb=64846, bsz=128, num_updates=539, lr=5.39e-05, gnorm=14.902, loss_scale=2, train_wall=11, gb_free=2.8, wall=6087 2021-06-18 20:20:24 | INFO | train_inner | epoch 001: 548 / 3002 loss=3.301, ppl=9.86, wps=5767, ups=0.09, wpb=64825, bsz=128, num_updates=540, lr=5.4e-05, gnorm=3.099, loss_scale=2, train_wall=11, gb_free=2.8, wall=6099 2021-06-18 20:20:35 | INFO | train_inner | epoch 001: 549 / 3002 loss=3.034, ppl=8.19, wps=5927.1, ups=0.09, wpb=64844, bsz=128, num_updates=541, lr=5.41e-05, gnorm=5.075, loss_scale=2, train_wall=10, gb_free=2.8, wall=6110 2021-06-18 20:20:46 | INFO | train_inner | epoch 001: 550 / 3002 loss=3.223, ppl=9.34, wps=5915.9, ups=0.09, wpb=64870, bsz=128, num_updates=542, lr=5.42e-05, gnorm=3.035, loss_scale=2, train_wall=11, gb_free=2.8, wall=6121 2021-06-18 20:20:57 | INFO | train_inner | epoch 001: 551 / 3002 loss=3.377, ppl=10.39, wps=5839, ups=0.09, wpb=64756, bsz=128, num_updates=543, lr=5.43e-05, gnorm=2.929, loss_scale=2, train_wall=11, gb_free=2.8, wall=6132 2021-06-18 20:21:09 | INFO | train_inner | epoch 001: 552 / 3002 loss=3.188, ppl=9.11, wps=5773, ups=0.09, wpb=64778, bsz=128, num_updates=544, lr=5.44e-05, gnorm=3.057, loss_scale=2, train_wall=11, gb_free=2.8, wall=6143 2021-06-18 20:21:20 | INFO | train_inner | epoch 001: 553 / 3002 loss=3.217, ppl=9.3, wps=5765.1, ups=0.09, wpb=64838, bsz=128, num_updates=545, lr=5.45e-05, gnorm=3.309, loss_scale=4, train_wall=11, gb_free=2.8, wall=6154 2021-06-18 20:21:31 | INFO | train_inner | epoch 001: 554 / 3002 loss=3.288, ppl=9.77, wps=5802.9, ups=0.09, wpb=64849, bsz=128, num_updates=546, lr=5.46e-05, gnorm=3.052, loss_scale=4, train_wall=11, gb_free=2.8, wall=6165 2021-06-18 20:21:42 | INFO | train_inner | epoch 001: 555 / 3002 loss=3.157, ppl=8.92, wps=5840.7, ups=0.09, wpb=64820, bsz=128, num_updates=547, lr=5.47e-05, gnorm=3.442, loss_scale=4, train_wall=11, gb_free=2.8, wall=6176 2021-06-18 20:21:53 | INFO | train_inner | epoch 001: 556 / 3002 loss=3.136, ppl=8.79, wps=5838.9, ups=0.09, wpb=64771, bsz=128, num_updates=548, lr=5.48e-05, gnorm=2.907, loss_scale=4, train_wall=11, gb_free=2.8, wall=6187 2021-06-18 20:22:04 | INFO | train_inner | epoch 001: 557 / 3002 loss=3.162, ppl=8.95, wps=5854.7, ups=0.09, wpb=64817, bsz=128, num_updates=549, lr=5.49e-05, gnorm=3.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=6199 2021-06-18 20:22:15 | INFO | train_inner | epoch 001: 558 / 3002 loss=3.261, ppl=9.58, wps=5940.7, ups=0.09, wpb=64868, bsz=128, num_updates=550, lr=5.5e-05, gnorm=6.265, loss_scale=4, train_wall=10, gb_free=2.8, wall=6209 2021-06-18 20:22:26 | INFO | train_inner | epoch 001: 559 / 3002 loss=3.227, ppl=9.36, wps=5943.1, ups=0.09, wpb=64861, bsz=128, num_updates=551, lr=5.51e-05, gnorm=3.628, loss_scale=4, train_wall=10, gb_free=2.8, wall=6220 2021-06-18 20:22:37 | INFO | train_inner | epoch 001: 560 / 3002 loss=3.071, ppl=8.41, wps=5878.5, ups=0.09, wpb=64846, bsz=128, num_updates=552, lr=5.52e-05, gnorm=2.761, loss_scale=4, train_wall=11, gb_free=2.8, wall=6231 2021-06-18 20:22:48 | INFO | train_inner | epoch 001: 561 / 3002 loss=3.243, ppl=9.46, wps=5922.6, ups=0.09, wpb=64899, bsz=128, num_updates=553, lr=5.53e-05, gnorm=8.329, loss_scale=4, train_wall=10, gb_free=2.8, wall=6242 2021-06-18 20:22:59 | INFO | train_inner | epoch 001: 562 / 3002 loss=3.151, ppl=8.88, wps=5865.3, ups=0.09, wpb=64865, bsz=128, num_updates=554, lr=5.54e-05, gnorm=2.81, loss_scale=4, train_wall=11, gb_free=2.8, wall=6253 2021-06-18 20:23:10 | INFO | train_inner | epoch 001: 563 / 3002 loss=3.375, ppl=10.38, wps=6051.9, ups=0.09, wpb=64839, bsz=128, num_updates=555, lr=5.55e-05, gnorm=2.838, loss_scale=4, train_wall=10, gb_free=2.8, wall=6264 2021-06-18 20:23:21 | INFO | train_inner | epoch 001: 564 / 3002 loss=3.358, ppl=10.25, wps=5812, ups=0.09, wpb=64812, bsz=128, num_updates=556, lr=5.56e-05, gnorm=3.056, loss_scale=4, train_wall=11, gb_free=2.8, wall=6275 2021-06-18 20:23:32 | INFO | train_inner | epoch 001: 565 / 3002 loss=3.439, ppl=10.84, wps=5804.3, ups=0.09, wpb=64792, bsz=128, num_updates=557, lr=5.57e-05, gnorm=3.908, loss_scale=4, train_wall=11, gb_free=2.8, wall=6286 2021-06-18 20:23:43 | INFO | train_inner | epoch 001: 566 / 3002 loss=3.249, ppl=9.51, wps=5882.8, ups=0.09, wpb=64826, bsz=128, num_updates=558, lr=5.58e-05, gnorm=3.835, loss_scale=4, train_wall=11, gb_free=2.8, wall=6297 2021-06-18 20:23:54 | INFO | train_inner | epoch 001: 567 / 3002 loss=3.294, ppl=9.81, wps=6011.5, ups=0.09, wpb=64950, bsz=128, num_updates=559, lr=5.59e-05, gnorm=2.798, loss_scale=4, train_wall=10, gb_free=2.8, wall=6308 2021-06-18 20:24:05 | INFO | train_inner | epoch 001: 568 / 3002 loss=3.177, ppl=9.04, wps=5771, ups=0.09, wpb=64750, bsz=128, num_updates=560, lr=5.6e-05, gnorm=2.867, loss_scale=4, train_wall=11, gb_free=2.8, wall=6319 2021-06-18 20:24:16 | INFO | train_inner | epoch 001: 569 / 3002 loss=3.262, ppl=9.59, wps=5838.2, ups=0.09, wpb=64919, bsz=128, num_updates=561, lr=5.61e-05, gnorm=2.947, loss_scale=4, train_wall=11, gb_free=2.8, wall=6331 2021-06-18 20:24:27 | INFO | train_inner | epoch 001: 570 / 3002 loss=3.061, ppl=8.35, wps=5838, ups=0.09, wpb=64797, bsz=128, num_updates=562, lr=5.62e-05, gnorm=3.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=6342 2021-06-18 20:24:38 | INFO | train_inner | epoch 001: 571 / 3002 loss=3.259, ppl=9.58, wps=5879.3, ups=0.09, wpb=64901, bsz=128, num_updates=563, lr=5.63e-05, gnorm=3.041, loss_scale=4, train_wall=11, gb_free=2.8, wall=6353 2021-06-18 20:24:49 | INFO | train_inner | epoch 001: 572 / 3002 loss=3.112, ppl=8.65, wps=5921.2, ups=0.09, wpb=64794, bsz=128, num_updates=564, lr=5.64e-05, gnorm=2.768, loss_scale=4, train_wall=10, gb_free=2.8, wall=6364 2021-06-18 20:25:00 | INFO | train_inner | epoch 001: 573 / 3002 loss=3.238, ppl=9.44, wps=5953.9, ups=0.09, wpb=64900, bsz=128, num_updates=565, lr=5.65e-05, gnorm=2.937, loss_scale=4, train_wall=10, gb_free=2.8, wall=6375 2021-06-18 20:25:11 | INFO | train_inner | epoch 001: 574 / 3002 loss=3.243, ppl=9.47, wps=5772.2, ups=0.09, wpb=64849, bsz=128, num_updates=566, lr=5.66e-05, gnorm=2.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=6386 2021-06-18 20:25:22 | INFO | train_inner | epoch 001: 575 / 3002 loss=3.413, ppl=10.65, wps=5882.7, ups=0.09, wpb=64779, bsz=128, num_updates=567, lr=5.67e-05, gnorm=2.976, loss_scale=4, train_wall=11, gb_free=2.8, wall=6397 2021-06-18 20:25:33 | INFO | train_inner | epoch 001: 576 / 3002 loss=3.272, ppl=9.66, wps=5963.7, ups=0.09, wpb=64811, bsz=128, num_updates=568, lr=5.68e-05, gnorm=2.837, loss_scale=4, train_wall=10, gb_free=2.8, wall=6408 2021-06-18 20:25:44 | INFO | train_inner | epoch 001: 577 / 3002 loss=3.217, ppl=9.3, wps=5909.3, ups=0.09, wpb=64817, bsz=128, num_updates=569, lr=5.69e-05, gnorm=3.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=6419 2021-06-18 20:25:56 | INFO | train_inner | epoch 001: 578 / 3002 loss=3.044, ppl=8.25, wps=5762, ups=0.09, wpb=64821, bsz=128, num_updates=570, lr=5.7e-05, gnorm=2.857, loss_scale=4, train_wall=11, gb_free=2.8, wall=6430 2021-06-18 20:26:07 | INFO | train_inner | epoch 001: 579 / 3002 loss=3.225, ppl=9.35, wps=5902.5, ups=0.09, wpb=64763, bsz=128, num_updates=571, lr=5.71e-05, gnorm=5.052, loss_scale=4, train_wall=11, gb_free=2.8, wall=6441 2021-06-18 20:26:17 | INFO | train_inner | epoch 001: 580 / 3002 loss=3.294, ppl=9.81, wps=6051, ups=0.09, wpb=64890, bsz=128, num_updates=572, lr=5.72e-05, gnorm=2.871, loss_scale=4, train_wall=10, gb_free=2.8, wall=6452 2021-06-18 20:26:28 | INFO | train_inner | epoch 001: 581 / 3002 loss=3.231, ppl=9.39, wps=5911.5, ups=0.09, wpb=64832, bsz=128, num_updates=573, lr=5.73e-05, gnorm=2.763, loss_scale=4, train_wall=11, gb_free=2.8, wall=6463 2021-06-18 20:26:39 | INFO | train_inner | epoch 001: 582 / 3002 loss=3.197, ppl=9.17, wps=5834.6, ups=0.09, wpb=64865, bsz=128, num_updates=574, lr=5.74e-05, gnorm=2.927, loss_scale=4, train_wall=11, gb_free=2.8, wall=6474 2021-06-18 20:26:51 | INFO | train_inner | epoch 001: 583 / 3002 loss=3.339, ppl=10.12, wps=5748.7, ups=0.09, wpb=64888, bsz=128, num_updates=575, lr=5.75e-05, gnorm=2.713, loss_scale=4, train_wall=11, gb_free=2.8, wall=6485 2021-06-18 20:27:02 | INFO | train_inner | epoch 001: 584 / 3002 loss=3.172, ppl=9.01, wps=5839.6, ups=0.09, wpb=64877, bsz=128, num_updates=576, lr=5.76e-05, gnorm=2.984, loss_scale=4, train_wall=11, gb_free=2.8, wall=6496 2021-06-18 20:27:13 | INFO | train_inner | epoch 001: 585 / 3002 loss=3.207, ppl=9.23, wps=5857.8, ups=0.09, wpb=64884, bsz=128, num_updates=577, lr=5.77e-05, gnorm=3.035, loss_scale=4, train_wall=11, gb_free=2.8, wall=6507 2021-06-18 20:27:24 | INFO | train_inner | epoch 001: 586 / 3002 loss=3.187, ppl=9.11, wps=5915.3, ups=0.09, wpb=64884, bsz=128, num_updates=578, lr=5.78e-05, gnorm=3.26, loss_scale=4, train_wall=10, gb_free=2.8, wall=6518 2021-06-18 20:27:35 | INFO | train_inner | epoch 001: 587 / 3002 loss=3.236, ppl=9.42, wps=5965.1, ups=0.09, wpb=64899, bsz=128, num_updates=579, lr=5.79e-05, gnorm=2.958, loss_scale=4, train_wall=10, gb_free=2.8, wall=6529 2021-06-18 20:27:46 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-18 20:27:57 | INFO | train_inner | epoch 001: 589 / 3002 loss=3.149, ppl=8.87, wps=2919, ups=0.04, wpb=64881, bsz=128, num_updates=580, lr=5.8e-05, gnorm=2.987, loss_scale=2, train_wall=21, gb_free=2.8, wall=6551 2021-06-18 20:28:08 | INFO | train_inner | epoch 001: 590 / 3002 loss=3.171, ppl=9.01, wps=5729.7, ups=0.09, wpb=64868, bsz=128, num_updates=581, lr=5.81e-05, gnorm=2.803, loss_scale=2, train_wall=11, gb_free=2.8, wall=6563 2021-06-18 20:28:19 | INFO | train_inner | epoch 001: 591 / 3002 loss=3.116, ppl=8.67, wps=5985.5, ups=0.09, wpb=64813, bsz=128, num_updates=582, lr=5.82e-05, gnorm=3.839, loss_scale=2, train_wall=10, gb_free=2.8, wall=6573 2021-06-18 20:28:30 | INFO | train_inner | epoch 001: 592 / 3002 loss=3.401, ppl=10.57, wps=5846, ups=0.09, wpb=64806, bsz=128, num_updates=583, lr=5.83e-05, gnorm=3.548, loss_scale=2, train_wall=11, gb_free=2.8, wall=6584 2021-06-18 20:28:41 | INFO | train_inner | epoch 001: 593 / 3002 loss=3.292, ppl=9.8, wps=5832.1, ups=0.09, wpb=64898, bsz=128, num_updates=584, lr=5.84e-05, gnorm=2.936, loss_scale=2, train_wall=11, gb_free=2.8, wall=6596 2021-06-18 20:28:52 | INFO | train_inner | epoch 001: 594 / 3002 loss=3.202, ppl=9.2, wps=5809.9, ups=0.09, wpb=64828, bsz=128, num_updates=585, lr=5.85e-05, gnorm=8.991, loss_scale=2, train_wall=11, gb_free=2.8, wall=6607 2021-06-18 20:29:03 | INFO | train_inner | epoch 001: 595 / 3002 loss=3.373, ppl=10.36, wps=5868, ups=0.09, wpb=64871, bsz=128, num_updates=586, lr=5.86e-05, gnorm=3.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=6618 2021-06-18 20:29:15 | INFO | train_inner | epoch 001: 596 / 3002 loss=3.246, ppl=9.49, wps=5879.4, ups=0.09, wpb=64838, bsz=128, num_updates=587, lr=5.87e-05, gnorm=2.918, loss_scale=2, train_wall=11, gb_free=2.8, wall=6629 2021-06-18 20:29:26 | INFO | train_inner | epoch 001: 597 / 3002 loss=3.314, ppl=9.95, wps=5876.7, ups=0.09, wpb=64844, bsz=128, num_updates=588, lr=5.88e-05, gnorm=2.865, loss_scale=2, train_wall=11, gb_free=2.8, wall=6640 2021-06-18 20:29:37 | INFO | train_inner | epoch 001: 598 / 3002 loss=3.482, ppl=11.17, wps=5744.1, ups=0.09, wpb=64847, bsz=128, num_updates=589, lr=5.89e-05, gnorm=2.981, loss_scale=2, train_wall=11, gb_free=2.8, wall=6651 2021-06-18 20:29:48 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 2021-06-18 20:29:59 | INFO | train_inner | epoch 001: 600 / 3002 loss=3.201, ppl=9.19, wps=2931.2, ups=0.05, wpb=64767, bsz=128, num_updates=590, lr=5.9e-05, gnorm=3.393, loss_scale=1, train_wall=21, gb_free=2.8, wall=6673 2021-06-18 20:30:10 | INFO | train_inner | epoch 001: 601 / 3002 loss=2.959, ppl=7.78, wps=5843.6, ups=0.09, wpb=64847, bsz=128, num_updates=591, lr=5.91e-05, gnorm=21.991, loss_scale=1, train_wall=11, gb_free=2.8, wall=6684 2021-06-18 20:30:21 | INFO | train_inner | epoch 001: 602 / 3002 loss=3.355, ppl=10.23, wps=5917.9, ups=0.09, wpb=64796, bsz=128, num_updates=592, lr=5.92e-05, gnorm=3.072, loss_scale=1, train_wall=10, gb_free=2.8, wall=6695 2021-06-18 20:30:32 | INFO | train_inner | epoch 001: 603 / 3002 loss=3.154, ppl=8.9, wps=5924.1, ups=0.09, wpb=64903, bsz=128, num_updates=593, lr=5.93e-05, gnorm=2.748, loss_scale=1, train_wall=10, gb_free=2.8, wall=6706 2021-06-18 20:30:43 | INFO | train_inner | epoch 001: 604 / 3002 loss=3.237, ppl=9.43, wps=5975.5, ups=0.09, wpb=64812, bsz=128, num_updates=594, lr=5.94e-05, gnorm=2.769, loss_scale=1, train_wall=10, gb_free=2.8, wall=6717 2021-06-18 20:30:54 | INFO | train_inner | epoch 001: 605 / 3002 loss=3.132, ppl=8.77, wps=5839.5, ups=0.09, wpb=64888, bsz=128, num_updates=595, lr=5.95e-05, gnorm=2.706, loss_scale=1, train_wall=11, gb_free=2.8, wall=6728 2021-06-18 20:31:05 | INFO | train_inner | epoch 001: 606 / 3002 loss=3.174, ppl=9.02, wps=5788.6, ups=0.09, wpb=64842, bsz=128, num_updates=596, lr=5.96e-05, gnorm=2.918, loss_scale=1, train_wall=11, gb_free=2.8, wall=6739 2021-06-18 20:31:16 | INFO | train_inner | epoch 001: 607 / 3002 loss=3.051, ppl=8.29, wps=5879.4, ups=0.09, wpb=64868, bsz=128, num_updates=597, lr=5.97e-05, gnorm=3.023, loss_scale=1, train_wall=11, gb_free=2.8, wall=6750 2021-06-18 20:31:27 | INFO | train_inner | epoch 001: 608 / 3002 loss=3.061, ppl=8.35, wps=5855.4, ups=0.09, wpb=64791, bsz=128, num_updates=598, lr=5.98e-05, gnorm=2.822, loss_scale=1, train_wall=11, gb_free=2.8, wall=6762 2021-06-18 20:31:38 | INFO | train_inner | epoch 001: 609 / 3002 loss=3.226, ppl=9.36, wps=5861.9, ups=0.09, wpb=64855, bsz=128, num_updates=599, lr=5.99e-05, gnorm=2.868, loss_scale=1, train_wall=11, gb_free=2.8, wall=6773 2021-06-18 20:31:49 | INFO | train_inner | epoch 001: 610 / 3002 loss=3.145, ppl=8.84, wps=5817.7, ups=0.09, wpb=64800, bsz=128, num_updates=600, lr=6e-05, gnorm=4.338, loss_scale=1, train_wall=11, gb_free=2.8, wall=6784 2021-06-18 20:32:01 | INFO | train_inner | epoch 001: 611 / 3002 loss=3.231, ppl=9.39, wps=5775.8, ups=0.09, wpb=64805, bsz=128, num_updates=601, lr=6.01e-05, gnorm=2.892, loss_scale=1, train_wall=11, gb_free=2.8, wall=6795 2021-06-18 20:32:12 | INFO | train_inner | epoch 001: 612 / 3002 loss=3.196, ppl=9.17, wps=5787.7, ups=0.09, wpb=64814, bsz=128, num_updates=602, lr=6.02e-05, gnorm=3.278, loss_scale=1, train_wall=11, gb_free=2.8, wall=6806 2021-06-18 20:32:23 | INFO | train_inner | epoch 001: 613 / 3002 loss=3.149, ppl=8.87, wps=5918, ups=0.09, wpb=64838, bsz=128, num_updates=603, lr=6.03e-05, gnorm=2.912, loss_scale=1, train_wall=11, gb_free=2.8, wall=6817 2021-06-18 20:32:34 | INFO | train_inner | epoch 001: 614 / 3002 loss=3.17, ppl=9, wps=5890.3, ups=0.09, wpb=64935, bsz=128, num_updates=604, lr=6.04e-05, gnorm=3.102, loss_scale=1, train_wall=11, gb_free=2.8, wall=6828 2021-06-18 20:32:45 | INFO | train_inner | epoch 001: 615 / 3002 loss=3.262, ppl=9.59, wps=5942.9, ups=0.09, wpb=64808, bsz=128, num_updates=605, lr=6.05e-05, gnorm=3.583, loss_scale=1, train_wall=10, gb_free=2.8, wall=6839 2021-06-18 20:32:56 | INFO | train_inner | epoch 001: 616 / 3002 loss=3.358, ppl=10.25, wps=5930, ups=0.09, wpb=64843, bsz=128, num_updates=606, lr=6.06e-05, gnorm=2.972, loss_scale=1, train_wall=11, gb_free=2.8, wall=6850 2021-06-18 20:33:07 | INFO | train_inner | epoch 001: 617 / 3002 loss=3.273, ppl=9.67, wps=5765.3, ups=0.09, wpb=64797, bsz=128, num_updates=607, lr=6.07e-05, gnorm=2.805, loss_scale=1, train_wall=11, gb_free=2.8, wall=6861 2021-06-18 20:33:18 | INFO | train_inner | epoch 001: 618 / 3002 loss=3.158, ppl=8.92, wps=5888.8, ups=0.09, wpb=64959, bsz=128, num_updates=608, lr=6.08e-05, gnorm=9.323, loss_scale=1, train_wall=11, gb_free=2.8, wall=6872 2021-06-18 20:33:29 | INFO | train_inner | epoch 001: 619 / 3002 loss=3.071, ppl=8.4, wps=5769.5, ups=0.09, wpb=64907, bsz=128, num_updates=609, lr=6.09e-05, gnorm=3.385, loss_scale=1, train_wall=11, gb_free=2.8, wall=6884 2021-06-18 20:33:40 | INFO | train_inner | epoch 001: 620 / 3002 loss=3.172, ppl=9.01, wps=5866.8, ups=0.09, wpb=64857, bsz=128, num_updates=610, lr=6.1e-05, gnorm=3.007, loss_scale=1, train_wall=11, gb_free=2.8, wall=6895 2021-06-18 20:33:51 | INFO | train_inner | epoch 001: 621 / 3002 loss=3.061, ppl=8.34, wps=5786.6, ups=0.09, wpb=64817, bsz=128, num_updates=611, lr=6.11e-05, gnorm=3.173, loss_scale=1, train_wall=11, gb_free=2.8, wall=6906 2021-06-18 20:34:03 | INFO | train_inner | epoch 001: 622 / 3002 loss=3.093, ppl=8.53, wps=5846.3, ups=0.09, wpb=64870, bsz=128, num_updates=612, lr=6.12e-05, gnorm=2.729, loss_scale=1, train_wall=11, gb_free=2.8, wall=6917 2021-06-18 20:34:14 | INFO | train_inner | epoch 001: 623 / 3002 loss=3.284, ppl=9.74, wps=5839.1, ups=0.09, wpb=64845, bsz=128, num_updates=613, lr=6.13e-05, gnorm=4.951, loss_scale=1, train_wall=11, gb_free=2.8, wall=6928 2021-06-18 20:34:25 | INFO | train_inner | epoch 001: 624 / 3002 loss=3.173, ppl=9.02, wps=5941.4, ups=0.09, wpb=64839, bsz=128, num_updates=614, lr=6.14e-05, gnorm=5.23, loss_scale=1, train_wall=10, gb_free=2.8, wall=6939 2021-06-18 20:34:36 | INFO | train_inner | epoch 001: 625 / 3002 loss=3.094, ppl=8.54, wps=5888.2, ups=0.09, wpb=64843, bsz=128, num_updates=615, lr=6.15e-05, gnorm=4.035, loss_scale=1, train_wall=11, gb_free=2.8, wall=6950 2021-06-18 20:34:47 | INFO | train_inner | epoch 001: 626 / 3002 loss=3.223, ppl=9.34, wps=5831.4, ups=0.09, wpb=64761, bsz=128, num_updates=616, lr=6.16e-05, gnorm=2.962, loss_scale=1, train_wall=11, gb_free=2.8, wall=6961 2021-06-18 20:34:58 | INFO | train_inner | epoch 001: 627 / 3002 loss=3.23, ppl=9.38, wps=5675.9, ups=0.09, wpb=64744, bsz=128, num_updates=617, lr=6.17e-05, gnorm=3.173, loss_scale=1, train_wall=11, gb_free=2.8, wall=6972 2021-06-18 20:35:09 | INFO | train_inner | epoch 001: 628 / 3002 loss=3.15, ppl=8.88, wps=5985.1, ups=0.09, wpb=64768, bsz=128, num_updates=618, lr=6.18e-05, gnorm=2.837, loss_scale=1, train_wall=10, gb_free=2.8, wall=6983 2021-06-18 20:35:20 | INFO | train_inner | epoch 001: 629 / 3002 loss=3.269, ppl=9.64, wps=5906.5, ups=0.09, wpb=64790, bsz=128, num_updates=619, lr=6.19e-05, gnorm=2.875, loss_scale=1, train_wall=11, gb_free=2.8, wall=6994 2021-06-18 20:35:31 | INFO | train_inner | epoch 001: 630 / 3002 loss=3.226, ppl=9.35, wps=5851.9, ups=0.09, wpb=64806, bsz=128, num_updates=620, lr=6.2e-05, gnorm=3.137, loss_scale=1, train_wall=11, gb_free=2.8, wall=7005 2021-06-18 20:35:42 | INFO | train_inner | epoch 001: 631 / 3002 loss=3.003, ppl=8.02, wps=5901.9, ups=0.09, wpb=64890, bsz=128, num_updates=621, lr=6.21e-05, gnorm=3.313, loss_scale=1, train_wall=11, gb_free=2.8, wall=7016 2021-06-18 20:35:53 | INFO | train_inner | epoch 001: 632 / 3002 loss=3.102, ppl=8.58, wps=5838.7, ups=0.09, wpb=64826, bsz=128, num_updates=622, lr=6.22e-05, gnorm=2.706, loss_scale=1, train_wall=11, gb_free=2.8, wall=7027 2021-06-18 20:36:04 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 2021-06-18 20:36:15 | INFO | train_inner | epoch 001: 634 / 3002 loss=3.032, ppl=8.18, wps=2926.2, ups=0.05, wpb=64876, bsz=128, num_updates=623, lr=6.23e-05, gnorm=3.166, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=7050 2021-06-18 20:36:26 | INFO | train_inner | epoch 001: 635 / 3002 loss=3.372, ppl=10.35, wps=5874.7, ups=0.09, wpb=64879, bsz=128, num_updates=624, lr=6.24e-05, gnorm=2.928, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7061 2021-06-18 20:36:37 | INFO | train_inner | epoch 001: 636 / 3002 loss=3.108, ppl=8.62, wps=5944.3, ups=0.09, wpb=64879, bsz=128, num_updates=625, lr=6.25e-05, gnorm=2.847, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7071 2021-06-18 20:36:48 | INFO | train_inner | epoch 001: 637 / 3002 loss=3.262, ppl=9.6, wps=5736.4, ups=0.09, wpb=64880, bsz=128, num_updates=626, lr=6.26e-05, gnorm=2.948, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7083 2021-06-18 20:36:59 | INFO | train_inner | epoch 001: 638 / 3002 loss=3.01, ppl=8.06, wps=5941.2, ups=0.09, wpb=64871, bsz=128, num_updates=627, lr=6.27e-05, gnorm=4.344, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7094 2021-06-18 20:37:11 | INFO | train_inner | epoch 001: 639 / 3002 loss=3.179, ppl=9.06, wps=5829.3, ups=0.09, wpb=64913, bsz=128, num_updates=628, lr=6.28e-05, gnorm=3.03, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7105 2021-06-18 20:37:22 | INFO | train_inner | epoch 001: 640 / 3002 loss=3.394, ppl=10.51, wps=5870, ups=0.09, wpb=64826, bsz=128, num_updates=629, lr=6.29e-05, gnorm=2.897, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7116 2021-06-18 20:37:33 | INFO | train_inner | epoch 001: 641 / 3002 loss=3.352, ppl=10.21, wps=5860.7, ups=0.09, wpb=64752, bsz=128, num_updates=630, lr=6.3e-05, gnorm=2.924, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7127 2021-06-18 20:37:44 | INFO | train_inner | epoch 001: 642 / 3002 loss=3.18, ppl=9.07, wps=5945.2, ups=0.09, wpb=64823, bsz=128, num_updates=631, lr=6.31e-05, gnorm=2.878, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7138 2021-06-18 20:37:54 | INFO | train_inner | epoch 001: 643 / 3002 loss=3.164, ppl=8.96, wps=5912.6, ups=0.09, wpb=64875, bsz=128, num_updates=632, lr=6.32e-05, gnorm=2.868, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7149 2021-06-18 20:38:06 | INFO | train_inner | epoch 001: 644 / 3002 loss=3.242, ppl=9.46, wps=5824.1, ups=0.09, wpb=64807, bsz=128, num_updates=633, lr=6.33e-05, gnorm=2.889, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7160 2021-06-18 20:38:17 | INFO | train_inner | epoch 001: 645 / 3002 loss=3.236, ppl=9.42, wps=5892.6, ups=0.09, wpb=64854, bsz=128, num_updates=634, lr=6.34e-05, gnorm=4.238, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7171 2021-06-18 20:38:28 | INFO | train_inner | epoch 001: 646 / 3002 loss=3.112, ppl=8.65, wps=5840.4, ups=0.09, wpb=64845, bsz=128, num_updates=635, lr=6.35e-05, gnorm=2.886, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7182 2021-06-18 20:38:39 | INFO | train_inner | epoch 001: 647 / 3002 loss=3.22, ppl=9.32, wps=5758.4, ups=0.09, wpb=64829, bsz=128, num_updates=636, lr=6.36e-05, gnorm=2.931, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7193 2021-06-18 20:38:50 | INFO | train_inner | epoch 001: 648 / 3002 loss=3.16, ppl=8.94, wps=5866.7, ups=0.09, wpb=64761, bsz=128, num_updates=637, lr=6.37e-05, gnorm=3.12, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7204 2021-06-18 20:39:01 | INFO | train_inner | epoch 001: 649 / 3002 loss=3.277, ppl=9.69, wps=5765.6, ups=0.09, wpb=64786, bsz=128, num_updates=638, lr=6.38e-05, gnorm=3.102, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7216 2021-06-18 20:39:12 | INFO | train_inner | epoch 001: 650 / 3002 loss=3.217, ppl=9.3, wps=5894.6, ups=0.09, wpb=64793, bsz=128, num_updates=639, lr=6.39e-05, gnorm=2.841, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7227 2021-06-18 20:39:23 | INFO | train_inner | epoch 001: 651 / 3002 loss=3.298, ppl=9.84, wps=5826.2, ups=0.09, wpb=64807, bsz=128, num_updates=640, lr=6.4e-05, gnorm=2.756, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7238 2021-06-18 20:39:34 | INFO | train_inner | epoch 001: 652 / 3002 loss=3.238, ppl=9.44, wps=5918.9, ups=0.09, wpb=64868, bsz=128, num_updates=641, lr=6.41e-05, gnorm=3.006, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7249 2021-06-18 20:39:45 | INFO | train_inner | epoch 001: 653 / 3002 loss=3.141, ppl=8.82, wps=6007.8, ups=0.09, wpb=64868, bsz=128, num_updates=642, lr=6.42e-05, gnorm=3.279, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7259 2021-06-18 20:39:56 | INFO | train_inner | epoch 001: 654 / 3002 loss=3.261, ppl=9.58, wps=5859.1, ups=0.09, wpb=64825, bsz=128, num_updates=643, lr=6.43e-05, gnorm=2.918, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7271 2021-06-18 20:40:07 | INFO | train_inner | epoch 001: 655 / 3002 loss=3.163, ppl=8.96, wps=5889.8, ups=0.09, wpb=64883, bsz=128, num_updates=644, lr=6.44e-05, gnorm=2.802, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7282 2021-06-18 20:40:18 | INFO | train_inner | epoch 001: 656 / 3002 loss=3.145, ppl=8.85, wps=5867.1, ups=0.09, wpb=64765, bsz=128, num_updates=645, lr=6.45e-05, gnorm=2.828, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7293 2021-06-18 20:40:29 | INFO | train_inner | epoch 001: 657 / 3002 loss=3.235, ppl=9.41, wps=5865.8, ups=0.09, wpb=64876, bsz=128, num_updates=646, lr=6.46e-05, gnorm=3.04, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7304 2021-06-18 20:40:41 | INFO | train_inner | epoch 001: 658 / 3002 loss=3.271, ppl=9.65, wps=5725.3, ups=0.09, wpb=64839, bsz=128, num_updates=647, lr=6.47e-05, gnorm=2.954, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7315 2021-06-18 20:40:52 | INFO | train_inner | epoch 001: 659 / 3002 loss=3.368, ppl=10.32, wps=5867.1, ups=0.09, wpb=64847, bsz=128, num_updates=648, lr=6.48e-05, gnorm=2.81, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7326 2021-06-18 20:41:03 | INFO | train_inner | epoch 001: 660 / 3002 loss=2.982, ppl=7.9, wps=5802.6, ups=0.09, wpb=64790, bsz=128, num_updates=649, lr=6.49e-05, gnorm=3.077, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7337 2021-06-18 20:41:14 | INFO | train_inner | epoch 001: 661 / 3002 loss=3.242, ppl=9.46, wps=5782.4, ups=0.09, wpb=64737, bsz=128, num_updates=650, lr=6.5e-05, gnorm=3.166, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7348 2021-06-18 20:41:25 | INFO | train_inner | epoch 001: 662 / 3002 loss=3.128, ppl=8.74, wps=5860.5, ups=0.09, wpb=64831, bsz=128, num_updates=651, lr=6.51e-05, gnorm=2.799, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7359 2021-06-18 20:41:36 | INFO | train_inner | epoch 001: 663 / 3002 loss=3.258, ppl=9.56, wps=5863.8, ups=0.09, wpb=64850, bsz=128, num_updates=652, lr=6.52e-05, gnorm=2.941, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7371 2021-06-18 20:41:47 | INFO | train_inner | epoch 001: 664 / 3002 loss=3.25, ppl=9.51, wps=5887.3, ups=0.09, wpb=64731, bsz=128, num_updates=653, lr=6.53e-05, gnorm=6.886, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7382 2021-06-18 20:41:58 | INFO | train_inner | epoch 001: 665 / 3002 loss=3.227, ppl=9.36, wps=5856.4, ups=0.09, wpb=64858, bsz=128, num_updates=654, lr=6.54e-05, gnorm=2.908, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7393 2021-06-18 20:42:09 | INFO | train_inner | epoch 001: 666 / 3002 loss=3.123, ppl=8.71, wps=5787.8, ups=0.09, wpb=64776, bsz=128, num_updates=655, lr=6.55e-05, gnorm=2.911, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7404 2021-06-18 20:42:20 | INFO | train_inner | epoch 001: 667 / 3002 loss=3.239, ppl=9.44, wps=5955.9, ups=0.09, wpb=64871, bsz=128, num_updates=656, lr=6.56e-05, gnorm=2.853, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7415 2021-06-18 20:42:32 | INFO | train_inner | epoch 001: 668 / 3002 loss=3.219, ppl=9.31, wps=5792.4, ups=0.09, wpb=64817, bsz=128, num_updates=657, lr=6.57e-05, gnorm=2.992, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7426 2021-06-18 20:42:43 | INFO | train_inner | epoch 001: 669 / 3002 loss=3.061, ppl=8.34, wps=5880.2, ups=0.09, wpb=64851, bsz=128, num_updates=658, lr=6.58e-05, gnorm=2.803, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7437 2021-06-18 20:42:53 | INFO | train_inner | epoch 001: 670 / 3002 loss=3.154, ppl=8.9, wps=6013.9, ups=0.09, wpb=64864, bsz=128, num_updates=659, lr=6.59e-05, gnorm=2.828, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7448 2021-06-18 20:43:05 | INFO | train_inner | epoch 001: 671 / 3002 loss=3.096, ppl=8.55, wps=5783.7, ups=0.09, wpb=64790, bsz=128, num_updates=660, lr=6.6e-05, gnorm=4.359, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7459 2021-06-18 20:43:16 | INFO | train_inner | epoch 001: 672 / 3002 loss=3.129, ppl=8.75, wps=5852.2, ups=0.09, wpb=64841, bsz=128, num_updates=661, lr=6.61e-05, gnorm=3.983, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7470 2021-06-18 20:43:27 | INFO | train_inner | epoch 001: 673 / 3002 loss=3.207, ppl=9.23, wps=5870.5, ups=0.09, wpb=64869, bsz=128, num_updates=662, lr=6.62e-05, gnorm=4.59, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7481 2021-06-18 20:43:38 | INFO | train_inner | epoch 001: 674 / 3002 loss=3.064, ppl=8.36, wps=5834.9, ups=0.09, wpb=64786, bsz=128, num_updates=663, lr=6.63e-05, gnorm=2.899, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7492 2021-06-18 20:43:49 | INFO | train_inner | epoch 001: 675 / 3002 loss=3.234, ppl=9.41, wps=5888.1, ups=0.09, wpb=64803, bsz=128, num_updates=664, lr=6.64e-05, gnorm=2.998, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7503 2021-06-18 20:44:00 | INFO | train_inner | epoch 001: 676 / 3002 loss=3.3, ppl=9.85, wps=5756, ups=0.09, wpb=64795, bsz=128, num_updates=665, lr=6.65e-05, gnorm=2.846, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7514 2021-06-18 20:44:11 | INFO | train_inner | epoch 001: 677 / 3002 loss=3.069, ppl=8.39, wps=5925.3, ups=0.09, wpb=64797, bsz=128, num_updates=666, lr=6.66e-05, gnorm=2.82, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7525 2021-06-18 20:44:22 | INFO | train_inner | epoch 001: 678 / 3002 loss=3.05, ppl=8.28, wps=5877.2, ups=0.09, wpb=64859, bsz=128, num_updates=667, lr=6.67e-05, gnorm=2.787, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7536 2021-06-18 20:44:33 | INFO | train_inner | epoch 001: 679 / 3002 loss=3.141, ppl=8.82, wps=5942.5, ups=0.09, wpb=64905, bsz=128, num_updates=668, lr=6.68e-05, gnorm=2.813, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7547 2021-06-18 20:44:44 | INFO | train_inner | epoch 001: 680 / 3002 loss=3.082, ppl=8.47, wps=5953.7, ups=0.09, wpb=64874, bsz=128, num_updates=669, lr=6.69e-05, gnorm=3.867, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7558 2021-06-18 20:44:55 | INFO | train_inner | epoch 001: 681 / 3002 loss=3.199, ppl=9.18, wps=5767.7, ups=0.09, wpb=64778, bsz=128, num_updates=670, lr=6.7e-05, gnorm=2.848, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7569 2021-06-18 20:45:06 | INFO | train_inner | epoch 001: 682 / 3002 loss=3.172, ppl=9.01, wps=5734.4, ups=0.09, wpb=64882, bsz=128, num_updates=671, lr=6.71e-05, gnorm=2.841, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7581 2021-06-18 20:45:17 | INFO | train_inner | epoch 001: 683 / 3002 loss=3.213, ppl=9.27, wps=5959.8, ups=0.09, wpb=64883, bsz=128, num_updates=672, lr=6.72e-05, gnorm=3.05, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7592 2021-06-18 20:45:28 | INFO | train_inner | epoch 001: 684 / 3002 loss=3.144, ppl=8.84, wps=5786.3, ups=0.09, wpb=64774, bsz=128, num_updates=673, lr=6.73e-05, gnorm=2.834, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7603 2021-06-18 20:45:39 | INFO | train_inner | epoch 001: 685 / 3002 loss=3.224, ppl=9.34, wps=5877.9, ups=0.09, wpb=64769, bsz=128, num_updates=674, lr=6.74e-05, gnorm=2.829, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7614 2021-06-18 20:45:51 | INFO | train_inner | epoch 001: 686 / 3002 loss=3.102, ppl=8.58, wps=5771.3, ups=0.09, wpb=64840, bsz=128, num_updates=675, lr=6.75e-05, gnorm=2.865, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7625 2021-06-18 20:46:02 | INFO | train_inner | epoch 001: 687 / 3002 loss=3.068, ppl=8.39, wps=5946.3, ups=0.09, wpb=64863, bsz=128, num_updates=676, lr=6.76e-05, gnorm=3.233, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7636 2021-06-18 20:46:13 | INFO | train_inner | epoch 001: 688 / 3002 loss=3.149, ppl=8.87, wps=5758, ups=0.09, wpb=64697, bsz=128, num_updates=677, lr=6.77e-05, gnorm=2.712, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7647 2021-06-18 20:46:24 | INFO | train_inner | epoch 001: 689 / 3002 loss=3.171, ppl=9.01, wps=5906.1, ups=0.09, wpb=64806, bsz=128, num_updates=678, lr=6.78e-05, gnorm=2.878, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7658 2021-06-18 20:46:35 | INFO | train_inner | epoch 001: 690 / 3002 loss=3.216, ppl=9.29, wps=5941.4, ups=0.09, wpb=64902, bsz=128, num_updates=679, lr=6.79e-05, gnorm=3.439, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7669 2021-06-18 20:46:46 | INFO | train_inner | epoch 001: 691 / 3002 loss=3.161, ppl=8.95, wps=5810.1, ups=0.09, wpb=64833, bsz=128, num_updates=680, lr=6.8e-05, gnorm=2.952, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7680 2021-06-18 20:46:57 | INFO | train_inner | epoch 001: 692 / 3002 loss=3.125, ppl=8.73, wps=5949.5, ups=0.09, wpb=64774, bsz=128, num_updates=681, lr=6.81e-05, gnorm=2.81, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7691 2021-06-18 20:47:08 | INFO | train_inner | epoch 001: 693 / 3002 loss=3.163, ppl=8.95, wps=5909, ups=0.09, wpb=64911, bsz=128, num_updates=682, lr=6.82e-05, gnorm=2.831, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7702 2021-06-18 20:47:19 | INFO | train_inner | epoch 001: 694 / 3002 loss=3.308, ppl=9.91, wps=5923.1, ups=0.09, wpb=64785, bsz=128, num_updates=683, lr=6.83e-05, gnorm=3.142, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7713 2021-06-18 20:47:30 | INFO | train_inner | epoch 001: 695 / 3002 loss=3.111, ppl=8.64, wps=5802.6, ups=0.09, wpb=64869, bsz=128, num_updates=684, lr=6.84e-05, gnorm=2.987, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7724 2021-06-18 20:47:41 | INFO | train_inner | epoch 001: 696 / 3002 loss=3.068, ppl=8.39, wps=5952.3, ups=0.09, wpb=64817, bsz=128, num_updates=685, lr=6.85e-05, gnorm=2.761, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7735 2021-06-18 20:47:52 | INFO | train_inner | epoch 001: 697 / 3002 loss=3.057, ppl=8.32, wps=5850.6, ups=0.09, wpb=64796, bsz=128, num_updates=686, lr=6.86e-05, gnorm=31.392, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7746 2021-06-18 20:48:03 | INFO | train_inner | epoch 001: 698 / 3002 loss=3.213, ppl=9.28, wps=5781.1, ups=0.09, wpb=64851, bsz=128, num_updates=687, lr=6.87e-05, gnorm=3.07, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7757 2021-06-18 20:48:14 | INFO | train_inner | epoch 001: 699 / 3002 loss=3.167, ppl=8.98, wps=5791.4, ups=0.09, wpb=64815, bsz=128, num_updates=688, lr=6.88e-05, gnorm=7.673, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7769 2021-06-18 20:48:25 | INFO | train_inner | epoch 001: 700 / 3002 loss=3.156, ppl=8.91, wps=5812.8, ups=0.09, wpb=64876, bsz=128, num_updates=689, lr=6.89e-05, gnorm=2.828, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7780 2021-06-18 20:48:37 | INFO | train_inner | epoch 001: 701 / 3002 loss=3.066, ppl=8.38, wps=5841.2, ups=0.09, wpb=64801, bsz=128, num_updates=690, lr=6.9e-05, gnorm=3.218, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7791 2021-06-18 20:48:48 | INFO | train_inner | epoch 001: 702 / 3002 loss=3.086, ppl=8.49, wps=5828.1, ups=0.09, wpb=64781, bsz=128, num_updates=691, lr=6.91e-05, gnorm=2.915, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7802 2021-06-18 20:48:59 | INFO | train_inner | epoch 001: 703 / 3002 loss=3.129, ppl=8.75, wps=5935.5, ups=0.09, wpb=64874, bsz=128, num_updates=692, lr=6.92e-05, gnorm=2.908, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7813 2021-06-18 20:49:09 | INFO | train_inner | epoch 001: 704 / 3002 loss=3.186, ppl=9.1, wps=5965.1, ups=0.09, wpb=64780, bsz=128, num_updates=693, lr=6.93e-05, gnorm=2.941, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7824 2021-06-18 20:49:21 | INFO | train_inner | epoch 001: 705 / 3002 loss=3.326, ppl=10.03, wps=5852.5, ups=0.09, wpb=64856, bsz=128, num_updates=694, lr=6.94e-05, gnorm=4.869, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7835 2021-06-18 20:49:32 | INFO | train_inner | epoch 001: 706 / 3002 loss=3.174, ppl=9.02, wps=5902.8, ups=0.09, wpb=64892, bsz=128, num_updates=695, lr=6.95e-05, gnorm=4.206, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7846 2021-06-18 20:49:43 | INFO | train_inner | epoch 001: 707 / 3002 loss=3.328, ppl=10.04, wps=5865.9, ups=0.09, wpb=64802, bsz=128, num_updates=696, lr=6.96e-05, gnorm=3.16, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7857 2021-06-18 20:49:54 | INFO | train_inner | epoch 001: 708 / 3002 loss=3.121, ppl=8.7, wps=5907.7, ups=0.09, wpb=64854, bsz=128, num_updates=697, lr=6.97e-05, gnorm=2.975, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7868 2021-06-18 20:50:05 | INFO | train_inner | epoch 001: 709 / 3002 loss=3.306, ppl=9.89, wps=5874.5, ups=0.09, wpb=64871, bsz=128, num_updates=698, lr=6.98e-05, gnorm=2.894, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7879 2021-06-18 20:50:16 | INFO | train_inner | epoch 001: 710 / 3002 loss=3.227, ppl=9.36, wps=5810.7, ups=0.09, wpb=64884, bsz=128, num_updates=699, lr=6.99e-05, gnorm=6.945, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7890 2021-06-18 20:50:27 | INFO | train_inner | epoch 001: 711 / 3002 loss=3.237, ppl=9.43, wps=5921.9, ups=0.09, wpb=64827, bsz=128, num_updates=700, lr=7e-05, gnorm=2.993, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7901 2021-06-18 20:50:38 | INFO | train_inner | epoch 001: 712 / 3002 loss=3.31, ppl=9.92, wps=5878, ups=0.09, wpb=64851, bsz=128, num_updates=701, lr=7.01e-05, gnorm=3.111, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7912 2021-06-18 20:50:49 | INFO | train_inner | epoch 001: 713 / 3002 loss=3.351, ppl=10.2, wps=5929.9, ups=0.09, wpb=64805, bsz=128, num_updates=702, lr=7.02e-05, gnorm=2.923, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7923 2021-06-18 20:51:00 | INFO | train_inner | epoch 001: 714 / 3002 loss=3.022, ppl=8.12, wps=5872.7, ups=0.09, wpb=64865, bsz=128, num_updates=703, lr=7.03e-05, gnorm=3.013, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7934 2021-06-18 20:51:11 | INFO | train_inner | epoch 001: 715 / 3002 loss=3.319, ppl=9.98, wps=5870.2, ups=0.09, wpb=64787, bsz=128, num_updates=704, lr=7.04e-05, gnorm=2.994, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7945 2021-06-18 20:51:22 | INFO | train_inner | epoch 001: 716 / 3002 loss=3.2, ppl=9.19, wps=5918.9, ups=0.09, wpb=64902, bsz=128, num_updates=705, lr=7.05e-05, gnorm=2.839, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7956 2021-06-18 20:51:33 | INFO | train_inner | epoch 001: 717 / 3002 loss=3.247, ppl=9.5, wps=5787.4, ups=0.09, wpb=64749, bsz=128, num_updates=706, lr=7.06e-05, gnorm=3.924, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7967 2021-06-18 20:51:44 | INFO | train_inner | epoch 001: 718 / 3002 loss=3.423, ppl=10.73, wps=5863.6, ups=0.09, wpb=64767, bsz=128, num_updates=707, lr=7.07e-05, gnorm=52.345, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7978 2021-06-18 20:51:55 | INFO | train_inner | epoch 001: 719 / 3002 loss=3.446, ppl=10.9, wps=5864.6, ups=0.09, wpb=64761, bsz=128, num_updates=708, lr=7.08e-05, gnorm=3.014, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7989 2021-06-18 20:52:06 | INFO | train_inner | epoch 001: 720 / 3002 loss=3.237, ppl=9.43, wps=5868, ups=0.09, wpb=64844, bsz=128, num_updates=709, lr=7.09e-05, gnorm=2.908, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8000 2021-06-18 20:52:17 | INFO | train_inner | epoch 001: 721 / 3002 loss=3.195, ppl=9.16, wps=5772.3, ups=0.09, wpb=64852, bsz=128, num_updates=710, lr=7.1e-05, gnorm=2.891, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8012 2021-06-18 20:52:28 | INFO | train_inner | epoch 001: 722 / 3002 loss=3.128, ppl=8.74, wps=5844.3, ups=0.09, wpb=64848, bsz=128, num_updates=711, lr=7.11e-05, gnorm=3.158, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8023 2021-06-18 20:52:39 | INFO | train_inner | epoch 001: 723 / 3002 loss=3.224, ppl=9.34, wps=5997, ups=0.09, wpb=64876, bsz=128, num_updates=712, lr=7.12e-05, gnorm=2.799, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8034 2021-06-18 20:52:50 | INFO | train_inner | epoch 001: 724 / 3002 loss=3.295, ppl=9.81, wps=5863.6, ups=0.09, wpb=64819, bsz=128, num_updates=713, lr=7.13e-05, gnorm=2.761, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8045 2021-06-18 20:53:01 | INFO | train_inner | epoch 001: 725 / 3002 loss=3.089, ppl=8.51, wps=5934.2, ups=0.09, wpb=64868, bsz=128, num_updates=714, lr=7.14e-05, gnorm=2.7, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8056 2021-06-18 20:53:12 | INFO | train_inner | epoch 001: 726 / 3002 loss=3.214, ppl=9.28, wps=5787.3, ups=0.09, wpb=64791, bsz=128, num_updates=715, lr=7.15e-05, gnorm=3.467, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8067 2021-06-18 20:53:23 | INFO | train_inner | epoch 001: 727 / 3002 loss=3.134, ppl=8.78, wps=5857.7, ups=0.09, wpb=64826, bsz=128, num_updates=716, lr=7.16e-05, gnorm=2.804, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8078 2021-06-18 20:53:35 | INFO | train_inner | epoch 001: 728 / 3002 loss=3.085, ppl=8.48, wps=5760.7, ups=0.09, wpb=64790, bsz=128, num_updates=717, lr=7.17e-05, gnorm=2.987, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8089 2021-06-18 20:53:46 | INFO | train_inner | epoch 001: 729 / 3002 loss=3.2, ppl=9.19, wps=5843.9, ups=0.09, wpb=64856, bsz=128, num_updates=718, lr=7.18e-05, gnorm=2.901, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8100 2021-06-18 20:53:57 | INFO | train_inner | epoch 001: 730 / 3002 loss=3.286, ppl=9.76, wps=5923, ups=0.09, wpb=64868, bsz=128, num_updates=719, lr=7.19e-05, gnorm=2.782, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8111 2021-06-18 20:54:07 | INFO | train_inner | epoch 001: 731 / 3002 loss=3.116, ppl=8.67, wps=6021.3, ups=0.09, wpb=64808, bsz=128, num_updates=720, lr=7.2e-05, gnorm=3.261, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8122 2021-06-18 20:54:18 | INFO | train_inner | epoch 001: 732 / 3002 loss=3.053, ppl=8.3, wps=5983.7, ups=0.09, wpb=64817, bsz=128, num_updates=721, lr=7.21e-05, gnorm=2.855, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8133 2021-06-18 20:54:29 | INFO | train_inner | epoch 001: 733 / 3002 loss=3.016, ppl=8.09, wps=5886.2, ups=0.09, wpb=64749, bsz=128, num_updates=722, lr=7.22e-05, gnorm=5.082, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8144 2021-06-18 20:54:40 | INFO | train_inner | epoch 001: 734 / 3002 loss=3.221, ppl=9.32, wps=5970.6, ups=0.09, wpb=64807, bsz=128, num_updates=723, lr=7.23e-05, gnorm=2.893, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8155 2021-06-18 20:54:51 | INFO | train_inner | epoch 001: 735 / 3002 loss=3.302, ppl=9.86, wps=5874.2, ups=0.09, wpb=64844, bsz=128, num_updates=724, lr=7.24e-05, gnorm=3.024, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8166 2021-06-18 20:55:02 | INFO | train_inner | epoch 001: 736 / 3002 loss=3.245, ppl=9.48, wps=5843.2, ups=0.09, wpb=64844, bsz=128, num_updates=725, lr=7.25e-05, gnorm=6.746, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8177 2021-06-18 20:55:13 | INFO | train_inner | epoch 001: 737 / 3002 loss=3.015, ppl=8.08, wps=5803.9, ups=0.09, wpb=64852, bsz=128, num_updates=726, lr=7.26e-05, gnorm=3.44, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8188 2021-06-18 20:55:25 | INFO | train_inner | epoch 001: 738 / 3002 loss=3.033, ppl=8.19, wps=5890.2, ups=0.09, wpb=64925, bsz=128, num_updates=727, lr=7.27e-05, gnorm=2.76, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8199 2021-06-18 20:55:36 | INFO | train_inner | epoch 001: 739 / 3002 loss=3.321, ppl=9.99, wps=5814.2, ups=0.09, wpb=64775, bsz=128, num_updates=728, lr=7.28e-05, gnorm=2.786, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8210 2021-06-18 20:55:47 | INFO | train_inner | epoch 001: 740 / 3002 loss=2.972, ppl=7.85, wps=5760.6, ups=0.09, wpb=64842, bsz=128, num_updates=729, lr=7.29e-05, gnorm=2.745, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8221 2021-06-18 20:55:58 | INFO | train_inner | epoch 001: 741 / 3002 loss=3.136, ppl=8.79, wps=5826.8, ups=0.09, wpb=64836, bsz=128, num_updates=730, lr=7.3e-05, gnorm=2.769, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8232 2021-06-18 20:56:09 | INFO | train_inner | epoch 001: 742 / 3002 loss=3.051, ppl=8.29, wps=5823.7, ups=0.09, wpb=64840, bsz=128, num_updates=731, lr=7.31e-05, gnorm=2.815, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8244 2021-06-18 20:56:20 | INFO | train_inner | epoch 001: 743 / 3002 loss=3.12, ppl=8.7, wps=5802.6, ups=0.09, wpb=64828, bsz=128, num_updates=732, lr=7.32e-05, gnorm=2.796, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8255 2021-06-18 20:56:31 | INFO | train_inner | epoch 001: 744 / 3002 loss=3.151, ppl=8.88, wps=5910.6, ups=0.09, wpb=64781, bsz=128, num_updates=733, lr=7.33e-05, gnorm=2.775, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8266 2021-06-18 20:56:42 | INFO | train_inner | epoch 001: 745 / 3002 loss=3.223, ppl=9.34, wps=5838.7, ups=0.09, wpb=64845, bsz=128, num_updates=734, lr=7.34e-05, gnorm=2.766, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8277 2021-06-18 20:56:54 | INFO | train_inner | epoch 001: 746 / 3002 loss=3.142, ppl=8.83, wps=5845.6, ups=0.09, wpb=64825, bsz=128, num_updates=735, lr=7.35e-05, gnorm=2.721, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8288 2021-06-18 20:57:05 | INFO | train_inner | epoch 001: 747 / 3002 loss=3.129, ppl=8.75, wps=5828.6, ups=0.09, wpb=64813, bsz=128, num_updates=736, lr=7.36e-05, gnorm=2.703, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8299 2021-06-18 20:57:16 | INFO | train_inner | epoch 001: 748 / 3002 loss=3.334, ppl=10.09, wps=5868.7, ups=0.09, wpb=64908, bsz=128, num_updates=737, lr=7.37e-05, gnorm=2.794, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8310 2021-06-18 20:57:27 | INFO | train_inner | epoch 001: 749 / 3002 loss=3.028, ppl=8.16, wps=5891.6, ups=0.09, wpb=64816, bsz=128, num_updates=738, lr=7.38e-05, gnorm=2.708, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8321 2021-06-18 20:57:38 | INFO | train_inner | epoch 001: 750 / 3002 loss=3.124, ppl=8.72, wps=5984.2, ups=0.09, wpb=64840, bsz=128, num_updates=739, lr=7.39e-05, gnorm=2.776, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8332 2021-06-18 20:57:49 | INFO | train_inner | epoch 001: 751 / 3002 loss=3.338, ppl=10.11, wps=5875, ups=0.09, wpb=64819, bsz=128, num_updates=740, lr=7.4e-05, gnorm=3.54, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8343 2021-06-18 20:58:00 | INFO | train_inner | epoch 001: 752 / 3002 loss=3.052, ppl=8.29, wps=5783.5, ups=0.09, wpb=64843, bsz=128, num_updates=741, lr=7.41e-05, gnorm=3.001, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8354 2021-06-18 20:58:11 | INFO | train_inner | epoch 001: 753 / 3002 loss=3.327, ppl=10.04, wps=5986.6, ups=0.09, wpb=64838, bsz=128, num_updates=742, lr=7.42e-05, gnorm=2.96, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8365 2021-06-18 20:58:22 | INFO | train_inner | epoch 001: 754 / 3002 loss=3.353, ppl=10.22, wps=5877.6, ups=0.09, wpb=64722, bsz=128, num_updates=743, lr=7.43e-05, gnorm=3.033, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8376 2021-06-18 20:58:33 | INFO | train_inner | epoch 001: 755 / 3002 loss=3.005, ppl=8.03, wps=5766.7, ups=0.09, wpb=64881, bsz=128, num_updates=744, lr=7.44e-05, gnorm=2.889, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8387 2021-06-18 20:58:44 | INFO | train_inner | epoch 001: 756 / 3002 loss=3.18, ppl=9.06, wps=5849.9, ups=0.09, wpb=64818, bsz=128, num_updates=745, lr=7.45e-05, gnorm=2.777, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8398 2021-06-18 20:58:55 | INFO | train_inner | epoch 001: 757 / 3002 loss=3.217, ppl=9.3, wps=5800, ups=0.09, wpb=64769, bsz=128, num_updates=746, lr=7.46e-05, gnorm=2.754, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8409 2021-06-18 20:59:06 | INFO | train_inner | epoch 001: 758 / 3002 loss=3.207, ppl=9.24, wps=5946.1, ups=0.09, wpb=64789, bsz=128, num_updates=747, lr=7.47e-05, gnorm=2.864, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8420 2021-06-18 20:59:17 | INFO | train_inner | epoch 001: 759 / 3002 loss=3.21, ppl=9.25, wps=5811.2, ups=0.09, wpb=64804, bsz=128, num_updates=748, lr=7.48e-05, gnorm=3.135, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8431 2021-06-18 20:59:28 | INFO | train_inner | epoch 001: 760 / 3002 loss=3.182, ppl=9.08, wps=5801.2, ups=0.09, wpb=64840, bsz=128, num_updates=749, lr=7.49e-05, gnorm=3.007, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8443 2021-06-18 20:59:39 | INFO | train_inner | epoch 001: 761 / 3002 loss=3.333, ppl=10.08, wps=5856.1, ups=0.09, wpb=64883, bsz=128, num_updates=750, lr=7.5e-05, gnorm=3.762, loss_scale=1, train_wall=11, gb_free=2.8, wall=8454 2021-06-18 20:59:51 | INFO | train_inner | epoch 001: 762 / 3002 loss=3.055, ppl=8.31, wps=5776.9, ups=0.09, wpb=64854, bsz=128, num_updates=751, lr=7.51e-05, gnorm=2.723, loss_scale=1, train_wall=11, gb_free=2.8, wall=8465 2021-06-18 21:00:02 | INFO | train_inner | epoch 001: 763 / 3002 loss=3.143, ppl=8.83, wps=5829.3, ups=0.09, wpb=64827, bsz=128, num_updates=752, lr=7.52e-05, gnorm=2.832, loss_scale=1, train_wall=11, gb_free=2.8, wall=8476 2021-06-18 21:00:13 | INFO | train_inner | epoch 001: 764 / 3002 loss=3.126, ppl=8.73, wps=5836.1, ups=0.09, wpb=64878, bsz=128, num_updates=753, lr=7.53e-05, gnorm=2.885, loss_scale=1, train_wall=11, gb_free=2.8, wall=8487 2021-06-18 21:00:24 | INFO | train_inner | epoch 001: 765 / 3002 loss=3.321, ppl=9.99, wps=5800.2, ups=0.09, wpb=64772, bsz=128, num_updates=754, lr=7.54e-05, gnorm=2.864, loss_scale=1, train_wall=11, gb_free=2.8, wall=8498 2021-06-18 21:00:35 | INFO | train_inner | epoch 001: 766 / 3002 loss=3.183, ppl=9.08, wps=5854.5, ups=0.09, wpb=64851, bsz=128, num_updates=755, lr=7.55e-05, gnorm=2.748, loss_scale=1, train_wall=11, gb_free=2.8, wall=8509 2021-06-18 21:00:46 | INFO | train_inner | epoch 001: 767 / 3002 loss=3.251, ppl=9.52, wps=5716, ups=0.09, wpb=64821, bsz=128, num_updates=756, lr=7.56e-05, gnorm=2.77, loss_scale=1, train_wall=11, gb_free=2.8, wall=8521 2021-06-18 21:00:57 | INFO | train_inner | epoch 001: 768 / 3002 loss=3.122, ppl=8.7, wps=5900.4, ups=0.09, wpb=64854, bsz=128, num_updates=757, lr=7.57e-05, gnorm=2.847, loss_scale=1, train_wall=11, gb_free=2.8, wall=8532 2021-06-18 21:01:09 | INFO | train_inner | epoch 001: 769 / 3002 loss=3.298, ppl=9.83, wps=5795.6, ups=0.09, wpb=64821, bsz=128, num_updates=758, lr=7.58e-05, gnorm=2.807, loss_scale=1, train_wall=11, gb_free=2.8, wall=8543 2021-06-18 21:01:20 | INFO | train_inner | epoch 001: 770 / 3002 loss=3.06, ppl=8.34, wps=5932.6, ups=0.09, wpb=64895, bsz=128, num_updates=759, lr=7.59e-05, gnorm=3.049, loss_scale=1, train_wall=10, gb_free=2.8, wall=8554 2021-06-18 21:01:31 | INFO | train_inner | epoch 001: 771 / 3002 loss=3.135, ppl=8.79, wps=5875, ups=0.09, wpb=64832, bsz=128, num_updates=760, lr=7.6e-05, gnorm=2.706, loss_scale=1, train_wall=11, gb_free=2.8, wall=8565 2021-06-18 21:01:42 | INFO | train_inner | epoch 001: 772 / 3002 loss=3.269, ppl=9.64, wps=5922.7, ups=0.09, wpb=64784, bsz=128, num_updates=761, lr=7.61e-05, gnorm=2.821, loss_scale=1, train_wall=10, gb_free=2.8, wall=8576 2021-06-18 21:01:53 | INFO | train_inner | epoch 001: 773 / 3002 loss=3.293, ppl=9.8, wps=5893.6, ups=0.09, wpb=64777, bsz=128, num_updates=762, lr=7.62e-05, gnorm=4.36, loss_scale=1, train_wall=11, gb_free=2.8, wall=8587 2021-06-18 21:02:04 | INFO | train_inner | epoch 001: 774 / 3002 loss=3.201, ppl=9.2, wps=5878.2, ups=0.09, wpb=64832, bsz=128, num_updates=763, lr=7.63e-05, gnorm=2.721, loss_scale=1, train_wall=11, gb_free=2.8, wall=8598 2021-06-18 21:02:14 | INFO | train_inner | epoch 001: 775 / 3002 loss=3.196, ppl=9.17, wps=5959.1, ups=0.09, wpb=64759, bsz=128, num_updates=764, lr=7.64e-05, gnorm=2.749, loss_scale=1, train_wall=10, gb_free=2.8, wall=8609 2021-06-18 21:02:26 | INFO | train_inner | epoch 001: 776 / 3002 loss=3.22, ppl=9.32, wps=5767.1, ups=0.09, wpb=64747, bsz=128, num_updates=765, lr=7.65e-05, gnorm=2.804, loss_scale=1, train_wall=11, gb_free=2.8, wall=8620 2021-06-18 21:02:37 | INFO | train_inner | epoch 001: 777 / 3002 loss=3.152, ppl=8.89, wps=5887, ups=0.09, wpb=64786, bsz=128, num_updates=766, lr=7.66e-05, gnorm=2.632, loss_scale=1, train_wall=11, gb_free=2.8, wall=8631 2021-06-18 21:02:48 | INFO | train_inner | epoch 001: 778 / 3002 loss=3.16, ppl=8.94, wps=5862.3, ups=0.09, wpb=64744, bsz=128, num_updates=767, lr=7.67e-05, gnorm=2.695, loss_scale=1, train_wall=11, gb_free=2.8, wall=8642 2021-06-18 21:02:58 | INFO | train_inner | epoch 001: 779 / 3002 loss=3.331, ppl=10.06, wps=6078.5, ups=0.09, wpb=64822, bsz=128, num_updates=768, lr=7.68e-05, gnorm=3.652, loss_scale=1, train_wall=10, gb_free=2.8, wall=8653 2021-06-18 21:03:10 | INFO | train_inner | epoch 001: 780 / 3002 loss=3.079, ppl=8.45, wps=5805.3, ups=0.09, wpb=64830, bsz=128, num_updates=769, lr=7.69e-05, gnorm=2.885, loss_scale=1, train_wall=11, gb_free=2.8, wall=8664 2021-06-18 21:03:21 | INFO | train_inner | epoch 001: 781 / 3002 loss=3.312, ppl=9.93, wps=5765, ups=0.09, wpb=64769, bsz=128, num_updates=770, lr=7.7e-05, gnorm=2.863, loss_scale=1, train_wall=11, gb_free=2.8, wall=8675 2021-06-18 21:03:32 | INFO | train_inner | epoch 001: 782 / 3002 loss=3.203, ppl=9.21, wps=5877.3, ups=0.09, wpb=64836, bsz=128, num_updates=771, lr=7.71e-05, gnorm=2.724, loss_scale=1, train_wall=11, gb_free=2.8, wall=8686 2021-06-18 21:03:43 | INFO | train_inner | epoch 001: 783 / 3002 loss=3.034, ppl=8.19, wps=5862.7, ups=0.09, wpb=64886, bsz=128, num_updates=772, lr=7.72e-05, gnorm=2.724, loss_scale=1, train_wall=11, gb_free=2.8, wall=8697 2021-06-18 21:03:54 | INFO | train_inner | epoch 001: 784 / 3002 loss=3.165, ppl=8.97, wps=5776.4, ups=0.09, wpb=64769, bsz=128, num_updates=773, lr=7.73e-05, gnorm=2.661, loss_scale=1, train_wall=11, gb_free=2.8, wall=8708 2021-06-18 21:04:05 | INFO | train_inner | epoch 001: 785 / 3002 loss=3.127, ppl=8.74, wps=5952.2, ups=0.09, wpb=64848, bsz=128, num_updates=774, lr=7.74e-05, gnorm=4.675, loss_scale=1, train_wall=10, gb_free=2.8, wall=8719 2021-06-18 21:04:16 | INFO | train_inner | epoch 001: 786 / 3002 loss=3.279, ppl=9.71, wps=5835.4, ups=0.09, wpb=64851, bsz=128, num_updates=775, lr=7.75e-05, gnorm=2.86, loss_scale=1, train_wall=11, gb_free=2.8, wall=8730 2021-06-18 21:04:27 | INFO | train_inner | epoch 001: 787 / 3002 loss=3.324, ppl=10.01, wps=5918.9, ups=0.09, wpb=64806, bsz=128, num_updates=776, lr=7.76e-05, gnorm=2.804, loss_scale=1, train_wall=10, gb_free=2.8, wall=8741 2021-06-18 21:04:38 | INFO | train_inner | epoch 001: 788 / 3002 loss=3.191, ppl=9.13, wps=5960.9, ups=0.09, wpb=64828, bsz=128, num_updates=777, lr=7.77e-05, gnorm=2.895, loss_scale=1, train_wall=10, gb_free=2.8, wall=8752 2021-06-18 21:04:49 | INFO | train_inner | epoch 001: 789 / 3002 loss=3.261, ppl=9.59, wps=5831.9, ups=0.09, wpb=64762, bsz=128, num_updates=778, lr=7.78e-05, gnorm=2.844, loss_scale=1, train_wall=11, gb_free=2.8, wall=8763 2021-06-18 21:05:00 | INFO | train_inner | epoch 001: 790 / 3002 loss=3.169, ppl=8.99, wps=5768.9, ups=0.09, wpb=64794, bsz=128, num_updates=779, lr=7.79e-05, gnorm=2.751, loss_scale=1, train_wall=11, gb_free=2.8, wall=8775 2021-06-18 21:05:11 | INFO | train_inner | epoch 001: 791 / 3002 loss=3.342, ppl=10.14, wps=5882.5, ups=0.09, wpb=64814, bsz=128, num_updates=780, lr=7.8e-05, gnorm=2.762, loss_scale=1, train_wall=11, gb_free=2.8, wall=8786 2021-06-18 21:05:22 | INFO | train_inner | epoch 001: 792 / 3002 loss=3.255, ppl=9.55, wps=5844.1, ups=0.09, wpb=64775, bsz=128, num_updates=781, lr=7.81e-05, gnorm=11.176, loss_scale=1, train_wall=11, gb_free=2.8, wall=8797 2021-06-18 21:05:33 | INFO | train_inner | epoch 001: 793 / 3002 loss=3.044, ppl=8.25, wps=5936.2, ups=0.09, wpb=64849, bsz=128, num_updates=782, lr=7.82e-05, gnorm=2.782, loss_scale=1, train_wall=10, gb_free=2.8, wall=8808 2021-06-18 21:05:44 | INFO | train_inner | epoch 001: 794 / 3002 loss=3.144, ppl=8.84, wps=5885.4, ups=0.09, wpb=64829, bsz=128, num_updates=783, lr=7.83e-05, gnorm=2.671, loss_scale=1, train_wall=11, gb_free=2.8, wall=8819 2021-06-18 21:05:55 | INFO | train_inner | epoch 001: 795 / 3002 loss=3.189, ppl=9.12, wps=5816.5, ups=0.09, wpb=64800, bsz=128, num_updates=784, lr=7.84e-05, gnorm=2.812, loss_scale=1, train_wall=11, gb_free=2.8, wall=8830 2021-06-18 21:06:06 | INFO | train_inner | epoch 001: 796 / 3002 loss=3.034, ppl=8.19, wps=5898.3, ups=0.09, wpb=64843, bsz=128, num_updates=785, lr=7.85e-05, gnorm=2.771, loss_scale=1, train_wall=11, gb_free=2.8, wall=8841 2021-06-18 21:06:17 | INFO | train_inner | epoch 001: 797 / 3002 loss=3.215, ppl=9.29, wps=5889.9, ups=0.09, wpb=64856, bsz=128, num_updates=786, lr=7.86e-05, gnorm=3.548, loss_scale=1, train_wall=11, gb_free=2.8, wall=8852 2021-06-18 21:06:28 | INFO | train_inner | epoch 001: 798 / 3002 loss=3.161, ppl=8.94, wps=5941.9, ups=0.09, wpb=64890, bsz=128, num_updates=787, lr=7.87e-05, gnorm=2.729, loss_scale=1, train_wall=10, gb_free=2.8, wall=8863 2021-06-18 21:06:39 | INFO | train_inner | epoch 001: 799 / 3002 loss=3.195, ppl=9.16, wps=5849.4, ups=0.09, wpb=64769, bsz=128, num_updates=788, lr=7.88e-05, gnorm=3.249, loss_scale=1, train_wall=11, gb_free=2.8, wall=8874 2021-06-18 21:06:51 | INFO | train_inner | epoch 001: 800 / 3002 loss=3.26, ppl=9.58, wps=5864.4, ups=0.09, wpb=64828, bsz=128, num_updates=789, lr=7.89e-05, gnorm=2.79, loss_scale=1, train_wall=11, gb_free=2.8, wall=8885 2021-06-18 21:07:01 | INFO | train_inner | epoch 001: 801 / 3002 loss=3.18, ppl=9.06, wps=5924.5, ups=0.09, wpb=64769, bsz=128, num_updates=790, lr=7.9e-05, gnorm=2.784, loss_scale=1, train_wall=10, gb_free=2.8, wall=8896 2021-06-18 21:07:12 | INFO | train_inner | epoch 001: 802 / 3002 loss=3.198, ppl=9.18, wps=5886.9, ups=0.09, wpb=64857, bsz=128, num_updates=791, lr=7.91e-05, gnorm=2.733, loss_scale=1, train_wall=11, gb_free=2.8, wall=8907 2021-06-18 21:07:23 | INFO | train_inner | epoch 001: 803 / 3002 loss=3.109, ppl=8.63, wps=5925.7, ups=0.09, wpb=64895, bsz=128, num_updates=792, lr=7.92e-05, gnorm=2.715, loss_scale=1, train_wall=10, gb_free=2.8, wall=8918 2021-06-18 21:07:34 | INFO | train_inner | epoch 001: 804 / 3002 loss=3.279, ppl=9.71, wps=5887.8, ups=0.09, wpb=64782, bsz=128, num_updates=793, lr=7.93e-05, gnorm=2.885, loss_scale=1, train_wall=11, gb_free=2.8, wall=8929 2021-06-18 21:07:45 | INFO | train_inner | epoch 001: 805 / 3002 loss=2.98, ppl=7.89, wps=5863.8, ups=0.09, wpb=64906, bsz=128, num_updates=794, lr=7.94e-05, gnorm=2.762, loss_scale=1, train_wall=11, gb_free=2.8, wall=8940 2021-06-18 21:07:57 | INFO | train_inner | epoch 001: 806 / 3002 loss=2.944, ppl=7.7, wps=5876.8, ups=0.09, wpb=64839, bsz=128, num_updates=795, lr=7.95e-05, gnorm=2.859, loss_scale=1, train_wall=11, gb_free=2.8, wall=8951 2021-06-18 21:08:08 | INFO | train_inner | epoch 001: 807 / 3002 loss=3.179, ppl=9.06, wps=5865.5, ups=0.09, wpb=64848, bsz=128, num_updates=796, lr=7.96e-05, gnorm=2.744, loss_scale=1, train_wall=11, gb_free=2.8, wall=8962 2021-06-18 21:08:19 | INFO | train_inner | epoch 001: 808 / 3002 loss=3.167, ppl=8.98, wps=5857, ups=0.09, wpb=64833, bsz=128, num_updates=797, lr=7.97e-05, gnorm=2.687, loss_scale=1, train_wall=11, gb_free=2.8, wall=8973 2021-06-18 21:08:30 | INFO | train_inner | epoch 001: 809 / 3002 loss=3.111, ppl=8.64, wps=5810.6, ups=0.09, wpb=64835, bsz=128, num_updates=798, lr=7.98e-05, gnorm=2.721, loss_scale=1, train_wall=11, gb_free=2.8, wall=8984 2021-06-18 21:08:41 | INFO | train_inner | epoch 001: 810 / 3002 loss=3.018, ppl=8.1, wps=5876.8, ups=0.09, wpb=64808, bsz=128, num_updates=799, lr=7.99e-05, gnorm=2.747, loss_scale=1, train_wall=11, gb_free=2.8, wall=8995 2021-06-18 21:08:52 | INFO | train_inner | epoch 001: 811 / 3002 loss=3.114, ppl=8.66, wps=6001.2, ups=0.09, wpb=64896, bsz=128, num_updates=800, lr=8e-05, gnorm=3.18, loss_scale=1, train_wall=10, gb_free=2.8, wall=9006 2021-06-18 21:09:03 | INFO | train_inner | epoch 001: 812 / 3002 loss=3.055, ppl=8.31, wps=5930.6, ups=0.09, wpb=64817, bsz=128, num_updates=801, lr=8.01e-05, gnorm=2.888, loss_scale=1, train_wall=10, gb_free=2.8, wall=9017 2021-06-18 21:09:14 | INFO | train_inner | epoch 001: 813 / 3002 loss=3.108, ppl=8.62, wps=5803.4, ups=0.09, wpb=64762, bsz=128, num_updates=802, lr=8.02e-05, gnorm=2.641, loss_scale=1, train_wall=11, gb_free=2.8, wall=9028 2021-06-18 21:09:25 | INFO | train_inner | epoch 001: 814 / 3002 loss=3.007, ppl=8.04, wps=5789, ups=0.09, wpb=64876, bsz=128, num_updates=803, lr=8.03e-05, gnorm=2.78, loss_scale=1, train_wall=11, gb_free=2.8, wall=9039 2021-06-18 21:09:36 | INFO | train_inner | epoch 001: 815 / 3002 loss=3.182, ppl=9.08, wps=5906.7, ups=0.09, wpb=64780, bsz=128, num_updates=804, lr=8.04e-05, gnorm=2.987, loss_scale=1, train_wall=11, gb_free=2.8, wall=9050 2021-06-18 21:09:47 | INFO | train_inner | epoch 001: 816 / 3002 loss=3.158, ppl=8.93, wps=5851.8, ups=0.09, wpb=64754, bsz=128, num_updates=805, lr=8.05e-05, gnorm=2.91, loss_scale=1, train_wall=11, gb_free=2.8, wall=9061 2021-06-18 21:09:58 | INFO | train_inner | epoch 001: 817 / 3002 loss=3.139, ppl=8.81, wps=5822.2, ups=0.09, wpb=64869, bsz=128, num_updates=806, lr=8.06e-05, gnorm=2.696, loss_scale=1, train_wall=11, gb_free=2.8, wall=9072 2021-06-18 21:10:09 | INFO | train_inner | epoch 001: 818 / 3002 loss=3.08, ppl=8.46, wps=5908.3, ups=0.09, wpb=64766, bsz=128, num_updates=807, lr=8.07e-05, gnorm=2.742, loss_scale=1, train_wall=11, gb_free=2.8, wall=9083 2021-06-18 21:10:20 | INFO | train_inner | epoch 001: 819 / 3002 loss=3.24, ppl=9.45, wps=5859.6, ups=0.09, wpb=64724, bsz=128, num_updates=808, lr=8.08e-05, gnorm=2.821, loss_scale=1, train_wall=11, gb_free=2.8, wall=9094 2021-06-18 21:10:31 | INFO | train_inner | epoch 001: 820 / 3002 loss=3.092, ppl=8.53, wps=5913.5, ups=0.09, wpb=64881, bsz=128, num_updates=809, lr=8.09e-05, gnorm=2.677, loss_scale=1, train_wall=11, gb_free=2.8, wall=9105 2021-06-18 21:10:42 | INFO | train_inner | epoch 001: 821 / 3002 loss=3.242, ppl=9.46, wps=5932.7, ups=0.09, wpb=64770, bsz=128, num_updates=810, lr=8.1e-05, gnorm=2.733, loss_scale=1, train_wall=10, gb_free=2.8, wall=9116 2021-06-18 21:10:53 | INFO | train_inner | epoch 001: 822 / 3002 loss=3.008, ppl=8.04, wps=5836.9, ups=0.09, wpb=64877, bsz=128, num_updates=811, lr=8.11e-05, gnorm=2.882, loss_scale=1, train_wall=11, gb_free=2.8, wall=9127 2021-06-18 21:11:04 | INFO | train_inner | epoch 001: 823 / 3002 loss=3.284, ppl=9.74, wps=5997.1, ups=0.09, wpb=64818, bsz=128, num_updates=812, lr=8.12e-05, gnorm=3.021, loss_scale=1, train_wall=10, gb_free=2.8, wall=9138 2021-06-18 21:11:15 | INFO | train_inner | epoch 001: 824 / 3002 loss=3.135, ppl=8.78, wps=5808.7, ups=0.09, wpb=64898, bsz=128, num_updates=813, lr=8.13e-05, gnorm=2.811, loss_scale=1, train_wall=11, gb_free=2.8, wall=9149 2021-06-18 21:11:27 | INFO | train_inner | epoch 001: 825 / 3002 loss=3.247, ppl=9.49, wps=5640.3, ups=0.09, wpb=64760, bsz=128, num_updates=814, lr=8.14e-05, gnorm=5.567, loss_scale=1, train_wall=11, gb_free=2.8, wall=9161 2021-06-18 21:11:38 | INFO | train_inner | epoch 001: 826 / 3002 loss=2.953, ppl=7.74, wps=5931.3, ups=0.09, wpb=64822, bsz=128, num_updates=815, lr=8.15e-05, gnorm=2.872, loss_scale=1, train_wall=10, gb_free=2.8, wall=9172 2021-06-18 21:11:49 | INFO | train_inner | epoch 001: 827 / 3002 loss=3.279, ppl=9.71, wps=5815.4, ups=0.09, wpb=64827, bsz=128, num_updates=816, lr=8.16e-05, gnorm=2.802, loss_scale=1, train_wall=11, gb_free=2.8, wall=9183 2021-06-18 21:12:00 | INFO | train_inner | epoch 001: 828 / 3002 loss=3.152, ppl=8.89, wps=5813.3, ups=0.09, wpb=64884, bsz=128, num_updates=817, lr=8.17e-05, gnorm=2.747, loss_scale=1, train_wall=11, gb_free=2.8, wall=9194 2021-06-18 21:12:11 | INFO | train_inner | epoch 001: 829 / 3002 loss=3.121, ppl=8.7, wps=5840.9, ups=0.09, wpb=64755, bsz=128, num_updates=818, lr=8.18e-05, gnorm=3.632, loss_scale=1, train_wall=11, gb_free=2.8, wall=9205 2021-06-18 21:12:22 | INFO | train_inner | epoch 001: 830 / 3002 loss=3.162, ppl=8.95, wps=5813, ups=0.09, wpb=64863, bsz=128, num_updates=819, lr=8.19e-05, gnorm=2.943, loss_scale=1, train_wall=11, gb_free=2.8, wall=9216 2021-06-18 21:12:33 | INFO | train_inner | epoch 001: 831 / 3002 loss=3.182, ppl=9.07, wps=5896.2, ups=0.09, wpb=64778, bsz=128, num_updates=820, lr=8.2e-05, gnorm=2.788, loss_scale=1, train_wall=11, gb_free=2.8, wall=9227 2021-06-18 21:12:44 | INFO | train_inner | epoch 001: 832 / 3002 loss=3.216, ppl=9.29, wps=6019.2, ups=0.09, wpb=64903, bsz=128, num_updates=821, lr=8.21e-05, gnorm=2.718, loss_scale=1, train_wall=10, gb_free=2.8, wall=9238 2021-06-18 21:12:55 | INFO | train_inner | epoch 001: 833 / 3002 loss=3.087, ppl=8.5, wps=6021.6, ups=0.09, wpb=64892, bsz=128, num_updates=822, lr=8.22e-05, gnorm=2.864, loss_scale=1, train_wall=10, gb_free=2.8, wall=9249 2021-06-18 21:13:06 | INFO | train_inner | epoch 001: 834 / 3002 loss=3.271, ppl=9.66, wps=5802.9, ups=0.09, wpb=64875, bsz=128, num_updates=823, lr=8.23e-05, gnorm=2.944, loss_scale=1, train_wall=11, gb_free=2.8, wall=9260 2021-06-18 21:13:17 | INFO | train_inner | epoch 001: 835 / 3002 loss=3.193, ppl=9.15, wps=5765.8, ups=0.09, wpb=64784, bsz=128, num_updates=824, lr=8.24e-05, gnorm=2.73, loss_scale=1, train_wall=11, gb_free=2.8, wall=9271 2021-06-18 21:13:28 | INFO | train_inner | epoch 001: 836 / 3002 loss=3.159, ppl=8.93, wps=5823.7, ups=0.09, wpb=64837, bsz=128, num_updates=825, lr=8.25e-05, gnorm=2.599, loss_scale=1, train_wall=11, gb_free=2.8, wall=9283 2021-06-18 21:13:39 | INFO | train_inner | epoch 001: 837 / 3002 loss=2.895, ppl=7.44, wps=5857, ups=0.09, wpb=64809, bsz=128, num_updates=826, lr=8.26e-05, gnorm=2.75, loss_scale=1, train_wall=11, gb_free=2.8, wall=9294 2021-06-18 21:13:50 | INFO | train_inner | epoch 001: 838 / 3002 loss=3.139, ppl=8.81, wps=5856.8, ups=0.09, wpb=64781, bsz=128, num_updates=827, lr=8.27e-05, gnorm=2.766, loss_scale=1, train_wall=11, gb_free=2.8, wall=9305 2021-06-18 21:14:01 | INFO | train_inner | epoch 001: 839 / 3002 loss=3.184, ppl=9.09, wps=5803.3, ups=0.09, wpb=64902, bsz=128, num_updates=828, lr=8.28e-05, gnorm=2.835, loss_scale=1, train_wall=11, gb_free=2.8, wall=9316 2021-06-18 21:14:12 | INFO | train_inner | epoch 001: 840 / 3002 loss=3.097, ppl=8.55, wps=5918, ups=0.09, wpb=64890, bsz=128, num_updates=829, lr=8.29e-05, gnorm=2.672, loss_scale=1, train_wall=11, gb_free=2.8, wall=9327 2021-06-18 21:14:23 | INFO | train_inner | epoch 001: 841 / 3002 loss=3.16, ppl=8.94, wps=5889.3, ups=0.09, wpb=64863, bsz=128, num_updates=830, lr=8.3e-05, gnorm=2.727, loss_scale=1, train_wall=11, gb_free=2.8, wall=9338 2021-06-18 21:14:34 | INFO | train_inner | epoch 001: 842 / 3002 loss=3.16, ppl=8.94, wps=5929.2, ups=0.09, wpb=64751, bsz=128, num_updates=831, lr=8.31e-05, gnorm=2.671, loss_scale=1, train_wall=10, gb_free=2.8, wall=9349 2021-06-18 21:14:45 | INFO | train_inner | epoch 001: 843 / 3002 loss=3.235, ppl=9.42, wps=5939.8, ups=0.09, wpb=64794, bsz=128, num_updates=832, lr=8.32e-05, gnorm=2.799, loss_scale=1, train_wall=10, gb_free=2.8, wall=9360 2021-06-18 21:14:56 | INFO | train_inner | epoch 001: 844 / 3002 loss=3.09, ppl=8.52, wps=5867.6, ups=0.09, wpb=64857, bsz=128, num_updates=833, lr=8.33e-05, gnorm=2.786, loss_scale=1, train_wall=11, gb_free=2.8, wall=9371 2021-06-18 21:15:07 | INFO | train_inner | epoch 001: 845 / 3002 loss=3.305, ppl=9.89, wps=5937.9, ups=0.09, wpb=64765, bsz=128, num_updates=834, lr=8.34e-05, gnorm=2.991, loss_scale=1, train_wall=10, gb_free=2.8, wall=9382 2021-06-18 21:15:19 | INFO | train_inner | epoch 001: 846 / 3002 loss=2.988, ppl=7.93, wps=5730.9, ups=0.09, wpb=64790, bsz=128, num_updates=835, lr=8.35e-05, gnorm=3.035, loss_scale=1, train_wall=11, gb_free=2.8, wall=9393 2021-06-18 21:15:30 | INFO | train_inner | epoch 001: 847 / 3002 loss=3.122, ppl=8.7, wps=5910, ups=0.09, wpb=64857, bsz=128, num_updates=836, lr=8.36e-05, gnorm=2.791, loss_scale=1, train_wall=10, gb_free=2.8, wall=9404 2021-06-18 21:15:41 | INFO | train_inner | epoch 001: 848 / 3002 loss=3.051, ppl=8.29, wps=5864.3, ups=0.09, wpb=64849, bsz=128, num_updates=837, lr=8.37e-05, gnorm=2.681, loss_scale=1, train_wall=11, gb_free=2.8, wall=9415 2021-06-18 21:15:51 | INFO | train_inner | epoch 001: 849 / 3002 loss=3.2, ppl=9.19, wps=5975.5, ups=0.09, wpb=64853, bsz=128, num_updates=838, lr=8.38e-05, gnorm=2.65, loss_scale=1, train_wall=10, gb_free=2.8, wall=9426 2021-06-18 21:16:02 | INFO | train_inner | epoch 001: 850 / 3002 loss=3.057, ppl=8.32, wps=5908, ups=0.09, wpb=64806, bsz=128, num_updates=839, lr=8.39e-05, gnorm=2.596, loss_scale=1, train_wall=11, gb_free=2.8, wall=9437 2021-06-18 21:16:13 | INFO | train_inner | epoch 001: 851 / 3002 loss=2.903, ppl=7.48, wps=5911.1, ups=0.09, wpb=64757, bsz=128, num_updates=840, lr=8.4e-05, gnorm=2.783, loss_scale=1, train_wall=11, gb_free=2.8, wall=9448 2021-06-18 21:16:24 | INFO | train_inner | epoch 001: 852 / 3002 loss=3.042, ppl=8.24, wps=5927.9, ups=0.09, wpb=64825, bsz=128, num_updates=841, lr=8.41e-05, gnorm=2.627, loss_scale=1, train_wall=11, gb_free=2.8, wall=9459 2021-06-18 21:16:35 | INFO | train_inner | epoch 001: 853 / 3002 loss=3.194, ppl=9.15, wps=5946.4, ups=0.09, wpb=64754, bsz=128, num_updates=842, lr=8.42e-05, gnorm=2.871, loss_scale=1, train_wall=10, gb_free=2.8, wall=9470 2021-06-18 21:16:46 | INFO | train_inner | epoch 001: 854 / 3002 loss=3.053, ppl=8.3, wps=5853.5, ups=0.09, wpb=64812, bsz=128, num_updates=843, lr=8.43e-05, gnorm=2.636, loss_scale=1, train_wall=11, gb_free=2.8, wall=9481 2021-06-18 21:16:57 | INFO | train_inner | epoch 001: 855 / 3002 loss=3.035, ppl=8.2, wps=5787, ups=0.09, wpb=64800, bsz=128, num_updates=844, lr=8.44e-05, gnorm=2.609, loss_scale=1, train_wall=11, gb_free=2.8, wall=9492 2021-06-18 21:17:08 | INFO | train_inner | epoch 001: 856 / 3002 loss=3.151, ppl=8.88, wps=5948.5, ups=0.09, wpb=64806, bsz=128, num_updates=845, lr=8.45e-05, gnorm=2.629, loss_scale=1, train_wall=10, gb_free=2.8, wall=9503 2021-06-18 21:17:19 | INFO | train_inner | epoch 001: 857 / 3002 loss=3.058, ppl=8.33, wps=5885.3, ups=0.09, wpb=64861, bsz=128, num_updates=846, lr=8.46e-05, gnorm=2.768, loss_scale=1, train_wall=11, gb_free=2.8, wall=9514 2021-06-18 21:17:30 | INFO | train_inner | epoch 001: 858 / 3002 loss=3.149, ppl=8.87, wps=5843.8, ups=0.09, wpb=64805, bsz=128, num_updates=847, lr=8.47e-05, gnorm=2.815, loss_scale=1, train_wall=11, gb_free=2.8, wall=9525 2021-06-18 21:17:42 | INFO | train_inner | epoch 001: 859 / 3002 loss=3.306, ppl=9.89, wps=5765, ups=0.09, wpb=64777, bsz=128, num_updates=848, lr=8.48e-05, gnorm=3.08, loss_scale=1, train_wall=11, gb_free=2.8, wall=9536 2021-06-18 21:17:53 | INFO | train_inner | epoch 001: 860 / 3002 loss=3.176, ppl=9.04, wps=5857.2, ups=0.09, wpb=64774, bsz=128, num_updates=849, lr=8.49e-05, gnorm=2.618, loss_scale=1, train_wall=11, gb_free=2.8, wall=9547 2021-06-18 21:18:04 | INFO | train_inner | epoch 001: 861 / 3002 loss=3.012, ppl=8.07, wps=5852.8, ups=0.09, wpb=64872, bsz=128, num_updates=850, lr=8.5e-05, gnorm=2.719, loss_scale=1, train_wall=11, gb_free=2.8, wall=9558 2021-06-18 21:18:15 | INFO | train_inner | epoch 001: 862 / 3002 loss=3.261, ppl=9.59, wps=5814.8, ups=0.09, wpb=64807, bsz=128, num_updates=851, lr=8.51e-05, gnorm=2.658, loss_scale=1, train_wall=11, gb_free=2.8, wall=9569 2021-06-18 21:18:26 | INFO | train_inner | epoch 001: 863 / 3002 loss=3.234, ppl=9.41, wps=5846.5, ups=0.09, wpb=64857, bsz=128, num_updates=852, lr=8.52e-05, gnorm=4.327, loss_scale=1, train_wall=11, gb_free=2.8, wall=9580 2021-06-18 21:18:37 | INFO | train_inner | epoch 001: 864 / 3002 loss=3.353, ppl=10.22, wps=5971.1, ups=0.09, wpb=64752, bsz=128, num_updates=853, lr=8.53e-05, gnorm=2.888, loss_scale=1, train_wall=10, gb_free=2.8, wall=9591 2021-06-18 21:18:48 | INFO | train_inner | epoch 001: 865 / 3002 loss=3.093, ppl=8.53, wps=5708.5, ups=0.09, wpb=64831, bsz=128, num_updates=854, lr=8.54e-05, gnorm=2.739, loss_scale=1, train_wall=11, gb_free=2.8, wall=9603 2021-06-18 21:19:00 | INFO | train_inner | epoch 001: 866 / 3002 loss=3.23, ppl=9.38, wps=5776.6, ups=0.09, wpb=64751, bsz=128, num_updates=855, lr=8.55e-05, gnorm=2.882, loss_scale=1, train_wall=11, gb_free=2.8, wall=9614 2021-06-18 21:19:10 | INFO | train_inner | epoch 001: 867 / 3002 loss=3.249, ppl=9.51, wps=5970.4, ups=0.09, wpb=64796, bsz=128, num_updates=856, lr=8.56e-05, gnorm=2.827, loss_scale=1, train_wall=10, gb_free=2.8, wall=9625 2021-06-18 21:19:21 | INFO | train_inner | epoch 001: 868 / 3002 loss=3.22, ppl=9.32, wps=5822.1, ups=0.09, wpb=64882, bsz=128, num_updates=857, lr=8.57e-05, gnorm=2.581, loss_scale=1, train_wall=11, gb_free=2.8, wall=9636 2021-06-18 21:19:33 | INFO | train_inner | epoch 001: 869 / 3002 loss=3.184, ppl=9.09, wps=5819.2, ups=0.09, wpb=64859, bsz=128, num_updates=858, lr=8.58e-05, gnorm=2.751, loss_scale=1, train_wall=11, gb_free=2.8, wall=9647 2021-06-18 21:19:44 | INFO | train_inner | epoch 001: 870 / 3002 loss=3.13, ppl=8.75, wps=5854.4, ups=0.09, wpb=64851, bsz=128, num_updates=859, lr=8.59e-05, gnorm=2.663, loss_scale=1, train_wall=11, gb_free=2.8, wall=9658 2021-06-18 21:19:55 | INFO | train_inner | epoch 001: 871 / 3002 loss=3.115, ppl=8.66, wps=5877.7, ups=0.09, wpb=64858, bsz=128, num_updates=860, lr=8.6e-05, gnorm=2.856, loss_scale=1, train_wall=11, gb_free=2.8, wall=9669 2021-06-18 21:20:06 | INFO | train_inner | epoch 001: 872 / 3002 loss=2.993, ppl=7.96, wps=5922.8, ups=0.09, wpb=64755, bsz=128, num_updates=861, lr=8.61e-05, gnorm=2.836, loss_scale=1, train_wall=10, gb_free=2.8, wall=9680 2021-06-18 21:20:17 | INFO | train_inner | epoch 001: 873 / 3002 loss=3.074, ppl=8.42, wps=5827.5, ups=0.09, wpb=64824, bsz=128, num_updates=862, lr=8.62e-05, gnorm=2.752, loss_scale=1, train_wall=11, gb_free=2.8, wall=9691 2021-06-18 21:20:28 | INFO | train_inner | epoch 001: 874 / 3002 loss=3.055, ppl=8.31, wps=5820.7, ups=0.09, wpb=64937, bsz=128, num_updates=863, lr=8.63e-05, gnorm=2.642, loss_scale=1, train_wall=11, gb_free=2.8, wall=9702 2021-06-18 21:20:39 | INFO | train_inner | epoch 001: 875 / 3002 loss=3.029, ppl=8.16, wps=5806.8, ups=0.09, wpb=64863, bsz=128, num_updates=864, lr=8.64e-05, gnorm=2.728, loss_scale=1, train_wall=11, gb_free=2.8, wall=9713 2021-06-18 21:20:50 | INFO | train_inner | epoch 001: 876 / 3002 loss=3.175, ppl=9.03, wps=5906.9, ups=0.09, wpb=64874, bsz=128, num_updates=865, lr=8.65e-05, gnorm=2.7, loss_scale=1, train_wall=11, gb_free=2.8, wall=9724 2021-06-18 21:21:01 | INFO | train_inner | epoch 001: 877 / 3002 loss=3.225, ppl=9.35, wps=5728.4, ups=0.09, wpb=64868, bsz=128, num_updates=866, lr=8.66e-05, gnorm=3.004, loss_scale=1, train_wall=11, gb_free=2.8, wall=9736 2021-06-18 21:21:13 | INFO | train_inner | epoch 001: 878 / 3002 loss=3.156, ppl=8.91, wps=5755, ups=0.09, wpb=64773, bsz=128, num_updates=867, lr=8.67e-05, gnorm=2.645, loss_scale=1, train_wall=11, gb_free=2.8, wall=9747 2021-06-18 21:21:24 | INFO | train_inner | epoch 001: 879 / 3002 loss=3.027, ppl=8.15, wps=5861.4, ups=0.09, wpb=64883, bsz=128, num_updates=868, lr=8.68e-05, gnorm=2.687, loss_scale=1, train_wall=11, gb_free=2.8, wall=9758 2021-06-18 21:21:35 | INFO | train_inner | epoch 001: 880 / 3002 loss=3.156, ppl=8.91, wps=5901.1, ups=0.09, wpb=64815, bsz=128, num_updates=869, lr=8.69e-05, gnorm=2.81, loss_scale=1, train_wall=11, gb_free=2.8, wall=9769 2021-06-18 21:21:46 | INFO | train_inner | epoch 001: 881 / 3002 loss=2.964, ppl=7.8, wps=5829.3, ups=0.09, wpb=64805, bsz=128, num_updates=870, lr=8.7e-05, gnorm=2.689, loss_scale=1, train_wall=11, gb_free=2.8, wall=9780 2021-06-18 21:21:57 | INFO | train_inner | epoch 001: 882 / 3002 loss=3.105, ppl=8.6, wps=5869.8, ups=0.09, wpb=64816, bsz=128, num_updates=871, lr=8.71e-05, gnorm=2.775, loss_scale=1, train_wall=11, gb_free=2.8, wall=9791 2021-06-18 21:22:08 | INFO | train_inner | epoch 001: 883 / 3002 loss=3.24, ppl=9.45, wps=5787.8, ups=0.09, wpb=64825, bsz=128, num_updates=872, lr=8.72e-05, gnorm=2.9, loss_scale=1, train_wall=11, gb_free=2.8, wall=9802 2021-06-18 21:22:19 | INFO | train_inner | epoch 001: 884 / 3002 loss=3.111, ppl=8.64, wps=5866.8, ups=0.09, wpb=64797, bsz=128, num_updates=873, lr=8.73e-05, gnorm=2.857, loss_scale=1, train_wall=11, gb_free=2.8, wall=9813 2021-06-18 21:22:30 | INFO | train_inner | epoch 001: 885 / 3002 loss=3.275, ppl=9.68, wps=5742.7, ups=0.09, wpb=64835, bsz=128, num_updates=874, lr=8.74e-05, gnorm=2.788, loss_scale=1, train_wall=11, gb_free=2.8, wall=9825 2021-06-18 21:22:41 | INFO | train_inner | epoch 001: 886 / 3002 loss=3.193, ppl=9.14, wps=5885.1, ups=0.09, wpb=64781, bsz=128, num_updates=875, lr=8.75e-05, gnorm=2.716, loss_scale=1, train_wall=11, gb_free=2.8, wall=9836 2021-06-18 21:22:52 | INFO | train_inner | epoch 001: 887 / 3002 loss=3.172, ppl=9.01, wps=5949.7, ups=0.09, wpb=64896, bsz=128, num_updates=876, lr=8.76e-05, gnorm=2.701, loss_scale=1, train_wall=10, gb_free=2.8, wall=9847 2021-06-18 21:23:03 | INFO | train_inner | epoch 001: 888 / 3002 loss=3.056, ppl=8.32, wps=5880.5, ups=0.09, wpb=64889, bsz=128, num_updates=877, lr=8.77e-05, gnorm=2.837, loss_scale=1, train_wall=11, gb_free=2.8, wall=9858 2021-06-18 21:23:14 | INFO | train_inner | epoch 001: 889 / 3002 loss=2.954, ppl=7.75, wps=5841.1, ups=0.09, wpb=64735, bsz=128, num_updates=878, lr=8.78e-05, gnorm=2.735, loss_scale=2, train_wall=11, gb_free=2.8, wall=9869 2021-06-18 21:23:26 | INFO | train_inner | epoch 001: 890 / 3002 loss=3.315, ppl=9.95, wps=5862, ups=0.09, wpb=64844, bsz=128, num_updates=879, lr=8.79e-05, gnorm=2.818, loss_scale=2, train_wall=11, gb_free=2.8, wall=9880 2021-06-18 21:23:37 | INFO | train_inner | epoch 001: 891 / 3002 loss=3.14, ppl=8.82, wps=5868, ups=0.09, wpb=64780, bsz=128, num_updates=880, lr=8.8e-05, gnorm=2.689, loss_scale=2, train_wall=11, gb_free=2.8, wall=9891 2021-06-18 21:23:48 | INFO | train_inner | epoch 001: 892 / 3002 loss=3.036, ppl=8.2, wps=5889.1, ups=0.09, wpb=64859, bsz=128, num_updates=881, lr=8.81e-05, gnorm=2.709, loss_scale=2, train_wall=11, gb_free=2.8, wall=9902 2021-06-18 21:23:59 | INFO | train_inner | epoch 001: 893 / 3002 loss=3.004, ppl=8.02, wps=5844, ups=0.09, wpb=64850, bsz=128, num_updates=882, lr=8.82e-05, gnorm=2.767, loss_scale=2, train_wall=11, gb_free=2.8, wall=9913 2021-06-18 21:24:10 | INFO | train_inner | epoch 001: 894 / 3002 loss=3.028, ppl=8.15, wps=5930.8, ups=0.09, wpb=64832, bsz=128, num_updates=883, lr=8.83e-05, gnorm=8.107, loss_scale=2, train_wall=10, gb_free=2.8, wall=9924 2021-06-18 21:24:20 | INFO | train_inner | epoch 001: 895 / 3002 loss=3.231, ppl=9.39, wps=6002.3, ups=0.09, wpb=64853, bsz=128, num_updates=884, lr=8.84e-05, gnorm=2.654, loss_scale=2, train_wall=10, gb_free=2.8, wall=9935 2021-06-18 21:24:32 | INFO | train_inner | epoch 001: 896 / 3002 loss=3.145, ppl=8.84, wps=5840.2, ups=0.09, wpb=64791, bsz=128, num_updates=885, lr=8.85e-05, gnorm=2.7, loss_scale=2, train_wall=11, gb_free=2.8, wall=9946 2021-06-18 21:24:43 | INFO | train_inner | epoch 001: 897 / 3002 loss=3.135, ppl=8.78, wps=5902, ups=0.09, wpb=64769, bsz=128, num_updates=886, lr=8.86e-05, gnorm=2.57, loss_scale=2, train_wall=11, gb_free=2.8, wall=9957 2021-06-18 21:24:54 | INFO | train_inner | epoch 001: 898 / 3002 loss=3.073, ppl=8.42, wps=5846.8, ups=0.09, wpb=64853, bsz=128, num_updates=887, lr=8.87e-05, gnorm=2.656, loss_scale=2, train_wall=11, gb_free=2.8, wall=9968 2021-06-18 21:25:05 | INFO | train_inner | epoch 001: 899 / 3002 loss=3.105, ppl=8.6, wps=5848.7, ups=0.09, wpb=64827, bsz=128, num_updates=888, lr=8.88e-05, gnorm=2.768, loss_scale=2, train_wall=11, gb_free=2.8, wall=9979 2021-06-18 21:25:16 | INFO | train_inner | epoch 001: 900 / 3002 loss=3.252, ppl=9.53, wps=5819.1, ups=0.09, wpb=64850, bsz=128, num_updates=889, lr=8.89e-05, gnorm=2.86, loss_scale=2, train_wall=11, gb_free=2.8, wall=9990 2021-06-18 21:25:27 | INFO | train_inner | epoch 001: 901 / 3002 loss=3.131, ppl=8.76, wps=5964.5, ups=0.09, wpb=64834, bsz=128, num_updates=890, lr=8.9e-05, gnorm=2.683, loss_scale=2, train_wall=10, gb_free=2.8, wall=10001 2021-06-18 21:25:38 | INFO | train_inner | epoch 001: 902 / 3002 loss=3.191, ppl=9.13, wps=5835.4, ups=0.09, wpb=64811, bsz=128, num_updates=891, lr=8.91e-05, gnorm=2.726, loss_scale=2, train_wall=11, gb_free=2.8, wall=10012 2021-06-18 21:25:49 | INFO | train_inner | epoch 001: 903 / 3002 loss=3.319, ppl=9.98, wps=5887.7, ups=0.09, wpb=64806, bsz=128, num_updates=892, lr=8.92e-05, gnorm=2.747, loss_scale=2, train_wall=11, gb_free=2.8, wall=10023 2021-06-18 21:26:00 | INFO | train_inner | epoch 001: 904 / 3002 loss=3.134, ppl=8.78, wps=5846.9, ups=0.09, wpb=64821, bsz=128, num_updates=893, lr=8.93e-05, gnorm=2.684, loss_scale=2, train_wall=11, gb_free=2.8, wall=10034 2021-06-18 21:26:11 | INFO | train_inner | epoch 001: 905 / 3002 loss=2.981, ppl=7.9, wps=5771.1, ups=0.09, wpb=64886, bsz=128, num_updates=894, lr=8.94e-05, gnorm=2.743, loss_scale=2, train_wall=11, gb_free=2.8, wall=10045 2021-06-18 21:26:22 | INFO | train_inner | epoch 001: 906 / 3002 loss=3.085, ppl=8.49, wps=5892.9, ups=0.09, wpb=64828, bsz=128, num_updates=895, lr=8.95e-05, gnorm=3.008, loss_scale=2, train_wall=11, gb_free=2.8, wall=10056 2021-06-18 21:26:33 | INFO | train_inner | epoch 001: 907 / 3002 loss=3.013, ppl=8.07, wps=5865, ups=0.09, wpb=64891, bsz=128, num_updates=896, lr=8.96e-05, gnorm=2.629, loss_scale=2, train_wall=11, gb_free=2.8, wall=10068 2021-06-18 21:26:44 | INFO | train_inner | epoch 001: 908 / 3002 loss=3.128, ppl=8.75, wps=5936.5, ups=0.09, wpb=64861, bsz=128, num_updates=897, lr=8.97e-05, gnorm=2.765, loss_scale=2, train_wall=10, gb_free=2.8, wall=10078 2021-06-18 21:26:55 | INFO | train_inner | epoch 001: 909 / 3002 loss=3.033, ppl=8.19, wps=5872.1, ups=0.09, wpb=64857, bsz=128, num_updates=898, lr=8.98e-05, gnorm=2.55, loss_scale=2, train_wall=11, gb_free=2.8, wall=10090 2021-06-18 21:27:06 | INFO | train_inner | epoch 001: 910 / 3002 loss=3.098, ppl=8.56, wps=5812.2, ups=0.09, wpb=64815, bsz=128, num_updates=899, lr=8.99e-05, gnorm=2.829, loss_scale=2, train_wall=11, gb_free=2.8, wall=10101 2021-06-18 21:27:17 | INFO | train_inner | epoch 001: 911 / 3002 loss=2.946, ppl=7.71, wps=5875.9, ups=0.09, wpb=64809, bsz=128, num_updates=900, lr=9e-05, gnorm=2.722, loss_scale=2, train_wall=11, gb_free=2.8, wall=10112 2021-06-18 21:27:29 | INFO | train_inner | epoch 001: 912 / 3002 loss=3.186, ppl=9.1, wps=5806.5, ups=0.09, wpb=64825, bsz=128, num_updates=901, lr=9.01e-05, gnorm=2.771, loss_scale=2, train_wall=11, gb_free=2.8, wall=10123 2021-06-18 21:27:40 | INFO | train_inner | epoch 001: 913 / 3002 loss=3.1, ppl=8.58, wps=5869.1, ups=0.09, wpb=64860, bsz=128, num_updates=902, lr=9.02e-05, gnorm=2.722, loss_scale=2, train_wall=11, gb_free=2.8, wall=10134 2021-06-18 21:27:50 | INFO | train_inner | epoch 001: 914 / 3002 loss=3.218, ppl=9.31, wps=5966.3, ups=0.09, wpb=64781, bsz=128, num_updates=903, lr=9.03e-05, gnorm=2.775, loss_scale=2, train_wall=10, gb_free=2.8, wall=10145 2021-06-18 21:28:02 | INFO | train_inner | epoch 001: 915 / 3002 loss=3.352, ppl=10.21, wps=5814.7, ups=0.09, wpb=64834, bsz=128, num_updates=904, lr=9.04e-05, gnorm=2.766, loss_scale=2, train_wall=11, gb_free=2.8, wall=10156 2021-06-18 21:28:13 | INFO | train_inner | epoch 001: 916 / 3002 loss=2.982, ppl=7.9, wps=5852.9, ups=0.09, wpb=64858, bsz=128, num_updates=905, lr=9.05e-05, gnorm=2.598, loss_scale=2, train_wall=11, gb_free=2.8, wall=10167 2021-06-18 21:28:23 | INFO | train_inner | epoch 001: 917 / 3002 loss=2.982, ppl=7.9, wps=6001, ups=0.09, wpb=64851, bsz=128, num_updates=906, lr=9.06e-05, gnorm=2.757, loss_scale=2, train_wall=10, gb_free=2.8, wall=10178 2021-06-18 21:28:35 | INFO | train_inner | epoch 001: 918 / 3002 loss=3.188, ppl=9.11, wps=5872.1, ups=0.09, wpb=64847, bsz=128, num_updates=907, lr=9.07e-05, gnorm=2.931, loss_scale=2, train_wall=11, gb_free=2.8, wall=10189 2021-06-18 21:28:46 | INFO | train_inner | epoch 001: 919 / 3002 loss=3.139, ppl=8.81, wps=5839.6, ups=0.09, wpb=64869, bsz=128, num_updates=908, lr=9.08e-05, gnorm=2.946, loss_scale=2, train_wall=11, gb_free=2.8, wall=10200 2021-06-18 21:28:57 | INFO | train_inner | epoch 001: 920 / 3002 loss=3.196, ppl=9.16, wps=5841.8, ups=0.09, wpb=64787, bsz=128, num_updates=909, lr=9.09e-05, gnorm=2.932, loss_scale=2, train_wall=11, gb_free=2.8, wall=10211 2021-06-18 21:29:08 | INFO | train_inner | epoch 001: 921 / 3002 loss=3.228, ppl=9.37, wps=5885.1, ups=0.09, wpb=64835, bsz=128, num_updates=910, lr=9.1e-05, gnorm=2.815, loss_scale=2, train_wall=11, gb_free=2.8, wall=10222 2021-06-18 21:29:19 | INFO | train_inner | epoch 001: 922 / 3002 loss=3.166, ppl=8.97, wps=5862.7, ups=0.09, wpb=64826, bsz=128, num_updates=911, lr=9.11e-05, gnorm=2.745, loss_scale=2, train_wall=11, gb_free=2.8, wall=10233 2021-06-18 21:29:30 | INFO | train_inner | epoch 001: 923 / 3002 loss=3.085, ppl=8.49, wps=5906.8, ups=0.09, wpb=64916, bsz=128, num_updates=912, lr=9.12e-05, gnorm=2.779, loss_scale=2, train_wall=11, gb_free=2.8, wall=10244 2021-06-18 21:29:41 | INFO | train_inner | epoch 001: 924 / 3002 loss=3.181, ppl=9.07, wps=5782.5, ups=0.09, wpb=64748, bsz=128, num_updates=913, lr=9.13e-05, gnorm=2.704, loss_scale=2, train_wall=11, gb_free=2.8, wall=10255 2021-06-18 21:29:52 | INFO | train_inner | epoch 001: 925 / 3002 loss=3.105, ppl=8.6, wps=5847.1, ups=0.09, wpb=64835, bsz=128, num_updates=914, lr=9.14e-05, gnorm=2.598, loss_scale=2, train_wall=11, gb_free=2.8, wall=10266 2021-06-18 21:30:03 | INFO | train_inner | epoch 001: 926 / 3002 loss=2.991, ppl=7.95, wps=5883, ups=0.09, wpb=64791, bsz=128, num_updates=915, lr=9.15e-05, gnorm=2.782, loss_scale=2, train_wall=11, gb_free=2.8, wall=10277 2021-06-18 21:30:14 | INFO | train_inner | epoch 001: 927 / 3002 loss=3, ppl=8, wps=5819.4, ups=0.09, wpb=64756, bsz=128, num_updates=916, lr=9.16e-05, gnorm=2.531, loss_scale=2, train_wall=11, gb_free=2.8, wall=10289 2021-06-18 21:30:25 | INFO | train_inner | epoch 001: 928 / 3002 loss=3.085, ppl=8.48, wps=5978.1, ups=0.09, wpb=64934, bsz=128, num_updates=917, lr=9.17e-05, gnorm=2.824, loss_scale=2, train_wall=10, gb_free=2.8, wall=10299 2021-06-18 21:30:36 | INFO | train_inner | epoch 001: 929 / 3002 loss=3.074, ppl=8.42, wps=5682, ups=0.09, wpb=64327, bsz=128, num_updates=918, lr=9.18e-05, gnorm=2.894, loss_scale=2, train_wall=11, gb_free=2.8, wall=10311 2021-06-18 21:30:47 | INFO | train_inner | epoch 001: 930 / 3002 loss=3.306, ppl=9.89, wps=5835.6, ups=0.09, wpb=64772, bsz=128, num_updates=919, lr=9.19e-05, gnorm=2.888, loss_scale=2, train_wall=11, gb_free=2.8, wall=10322 2021-06-18 21:30:59 | INFO | train_inner | epoch 001: 931 / 3002 loss=2.906, ppl=7.5, wps=5838.2, ups=0.09, wpb=64812, bsz=128, num_updates=920, lr=9.2e-05, gnorm=2.705, loss_scale=2, train_wall=11, gb_free=2.8, wall=10333 2021-06-18 21:31:10 | INFO | train_inner | epoch 001: 932 / 3002 loss=2.987, ppl=7.93, wps=5868.3, ups=0.09, wpb=64884, bsz=128, num_updates=921, lr=9.21e-05, gnorm=2.775, loss_scale=2, train_wall=11, gb_free=2.8, wall=10344 2021-06-18 21:31:21 | INFO | train_inner | epoch 001: 933 / 3002 loss=3.204, ppl=9.21, wps=5908.6, ups=0.09, wpb=64848, bsz=128, num_updates=922, lr=9.22e-05, gnorm=2.786, loss_scale=2, train_wall=11, gb_free=2.8, wall=10355 2021-06-18 21:31:32 | INFO | train_inner | epoch 001: 934 / 3002 loss=3.128, ppl=8.74, wps=5840.3, ups=0.09, wpb=64750, bsz=128, num_updates=923, lr=9.23e-05, gnorm=2.614, loss_scale=2, train_wall=11, gb_free=2.8, wall=10366 2021-06-18 21:31:43 | INFO | train_inner | epoch 001: 935 / 3002 loss=3.138, ppl=8.8, wps=5851.3, ups=0.09, wpb=64826, bsz=128, num_updates=924, lr=9.24e-05, gnorm=3.273, loss_scale=2, train_wall=11, gb_free=2.8, wall=10377 2021-06-18 21:31:54 | INFO | train_inner | epoch 001: 936 / 3002 loss=3.162, ppl=8.95, wps=5803.3, ups=0.09, wpb=64859, bsz=128, num_updates=925, lr=9.25e-05, gnorm=2.72, loss_scale=2, train_wall=11, gb_free=2.8, wall=10388 2021-06-18 21:32:05 | INFO | train_inner | epoch 001: 937 / 3002 loss=3.074, ppl=8.42, wps=5890.3, ups=0.09, wpb=64863, bsz=128, num_updates=926, lr=9.26e-05, gnorm=2.884, loss_scale=2, train_wall=11, gb_free=2.8, wall=10399 2021-06-18 21:32:16 | INFO | train_inner | epoch 001: 938 / 3002 loss=3.236, ppl=9.42, wps=5783.1, ups=0.09, wpb=64851, bsz=128, num_updates=927, lr=9.27e-05, gnorm=2.833, loss_scale=2, train_wall=11, gb_free=2.8, wall=10411 2021-06-18 21:32:27 | INFO | train_inner | epoch 001: 939 / 3002 loss=3.353, ppl=10.21, wps=5900.6, ups=0.09, wpb=64803, bsz=128, num_updates=928, lr=9.28e-05, gnorm=2.777, loss_scale=2, train_wall=11, gb_free=2.8, wall=10422 2021-06-18 21:32:38 | INFO | train_inner | epoch 001: 940 / 3002 loss=3.127, ppl=8.74, wps=5842, ups=0.09, wpb=64781, bsz=128, num_updates=929, lr=9.29e-05, gnorm=2.819, loss_scale=2, train_wall=11, gb_free=2.8, wall=10433 2021-06-18 21:32:50 | INFO | train_inner | epoch 001: 941 / 3002 loss=3.112, ppl=8.65, wps=5768.9, ups=0.09, wpb=64839, bsz=128, num_updates=930, lr=9.3e-05, gnorm=2.906, loss_scale=2, train_wall=11, gb_free=2.8, wall=10444 2021-06-18 21:33:01 | INFO | train_inner | epoch 001: 942 / 3002 loss=3.111, ppl=8.64, wps=5876.8, ups=0.09, wpb=64910, bsz=128, num_updates=931, lr=9.31e-05, gnorm=2.841, loss_scale=2, train_wall=11, gb_free=2.8, wall=10455 2021-06-18 21:33:12 | INFO | train_inner | epoch 001: 943 / 3002 loss=2.986, ppl=7.92, wps=5916.9, ups=0.09, wpb=64912, bsz=128, num_updates=932, lr=9.32e-05, gnorm=2.714, loss_scale=2, train_wall=10, gb_free=2.8, wall=10466 2021-06-18 21:33:23 | INFO | train_inner | epoch 001: 944 / 3002 loss=3.212, ppl=9.27, wps=5836.7, ups=0.09, wpb=64796, bsz=128, num_updates=933, lr=9.33e-05, gnorm=2.922, loss_scale=2, train_wall=11, gb_free=2.8, wall=10477 2021-06-18 21:33:34 | INFO | train_inner | epoch 001: 945 / 3002 loss=3.211, ppl=9.26, wps=5959.3, ups=0.09, wpb=64838, bsz=128, num_updates=934, lr=9.34e-05, gnorm=2.82, loss_scale=2, train_wall=10, gb_free=2.8, wall=10488 2021-06-18 21:33:44 | INFO | train_inner | epoch 001: 946 / 3002 loss=3.078, ppl=8.44, wps=5943, ups=0.09, wpb=64882, bsz=128, num_updates=935, lr=9.35e-05, gnorm=2.855, loss_scale=2, train_wall=10, gb_free=2.8, wall=10499 2021-06-18 21:33:55 | INFO | train_inner | epoch 001: 947 / 3002 loss=3.127, ppl=8.74, wps=5850.6, ups=0.09, wpb=64754, bsz=128, num_updates=936, lr=9.36e-05, gnorm=2.698, loss_scale=2, train_wall=11, gb_free=2.8, wall=10510 2021-06-18 21:34:06 | INFO | train_inner | epoch 001: 948 / 3002 loss=3.072, ppl=8.41, wps=5989.2, ups=0.09, wpb=64817, bsz=128, num_updates=937, lr=9.37e-05, gnorm=2.974, loss_scale=2, train_wall=10, gb_free=2.8, wall=10521 2021-06-18 21:34:17 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 2021-06-18 21:34:28 | INFO | train_inner | epoch 001: 950 / 3002 loss=2.964, ppl=7.8, wps=2956.2, ups=0.05, wpb=64863, bsz=128, num_updates=938, lr=9.38e-05, gnorm=2.655, loss_scale=1, train_wall=21, gb_free=2.8, wall=10543 2021-06-18 21:34:39 | INFO | train_inner | epoch 001: 951 / 3002 loss=2.974, ppl=7.86, wps=5898.7, ups=0.09, wpb=64851, bsz=128, num_updates=939, lr=9.39e-05, gnorm=2.704, loss_scale=1, train_wall=11, gb_free=2.8, wall=10554 2021-06-18 21:34:50 | INFO | train_inner | epoch 001: 952 / 3002 loss=3.013, ppl=8.07, wps=5896.2, ups=0.09, wpb=64851, bsz=128, num_updates=940, lr=9.4e-05, gnorm=2.607, loss_scale=1, train_wall=11, gb_free=2.8, wall=10565 2021-06-18 21:35:01 | INFO | train_inner | epoch 001: 953 / 3002 loss=2.988, ppl=7.93, wps=5890.2, ups=0.09, wpb=64892, bsz=128, num_updates=941, lr=9.41e-05, gnorm=2.753, loss_scale=1, train_wall=11, gb_free=2.8, wall=10576 2021-06-18 21:35:12 | INFO | train_inner | epoch 001: 954 / 3002 loss=3.11, ppl=8.63, wps=5890.3, ups=0.09, wpb=64900, bsz=128, num_updates=942, lr=9.42e-05, gnorm=2.674, loss_scale=1, train_wall=11, gb_free=2.8, wall=10587 2021-06-18 21:35:23 | INFO | train_inner | epoch 001: 955 / 3002 loss=3.167, ppl=8.98, wps=5806.1, ups=0.09, wpb=64761, bsz=128, num_updates=943, lr=9.43e-05, gnorm=3.364, loss_scale=1, train_wall=11, gb_free=2.8, wall=10598 2021-06-18 21:35:35 | INFO | train_inner | epoch 001: 956 / 3002 loss=3.055, ppl=8.31, wps=5795.9, ups=0.09, wpb=64848, bsz=128, num_updates=944, lr=9.44e-05, gnorm=2.777, loss_scale=1, train_wall=11, gb_free=2.8, wall=10609 2021-06-18 21:35:46 | INFO | train_inner | epoch 001: 957 / 3002 loss=3.282, ppl=9.73, wps=5898.6, ups=0.09, wpb=64876, bsz=128, num_updates=945, lr=9.45e-05, gnorm=2.812, loss_scale=1, train_wall=11, gb_free=2.8, wall=10620 2021-06-18 21:35:57 | INFO | train_inner | epoch 001: 958 / 3002 loss=3.099, ppl=8.57, wps=5777.5, ups=0.09, wpb=64835, bsz=128, num_updates=946, lr=9.46e-05, gnorm=2.774, loss_scale=1, train_wall=11, gb_free=2.8, wall=10631 2021-06-18 21:36:08 | INFO | train_inner | epoch 001: 959 / 3002 loss=3.179, ppl=9.06, wps=5834.1, ups=0.09, wpb=64867, bsz=128, num_updates=947, lr=9.47e-05, gnorm=2.819, loss_scale=1, train_wall=11, gb_free=2.8, wall=10642 2021-06-18 21:36:19 | INFO | train_inner | epoch 001: 960 / 3002 loss=3.264, ppl=9.61, wps=6018.3, ups=0.09, wpb=64825, bsz=128, num_updates=948, lr=9.48e-05, gnorm=2.756, loss_scale=1, train_wall=10, gb_free=2.8, wall=10653 2021-06-18 21:36:30 | INFO | train_inner | epoch 001: 961 / 3002 loss=3.176, ppl=9.04, wps=5777.2, ups=0.09, wpb=64823, bsz=128, num_updates=949, lr=9.49e-05, gnorm=2.686, loss_scale=1, train_wall=11, gb_free=2.8, wall=10664 2021-06-18 21:36:41 | INFO | train_inner | epoch 001: 962 / 3002 loss=3.232, ppl=9.39, wps=5827.2, ups=0.09, wpb=64728, bsz=128, num_updates=950, lr=9.5e-05, gnorm=2.909, loss_scale=1, train_wall=11, gb_free=2.8, wall=10675 2021-06-18 21:36:52 | INFO | train_inner | epoch 001: 963 / 3002 loss=3.164, ppl=8.96, wps=5797, ups=0.09, wpb=64762, bsz=128, num_updates=951, lr=9.51e-05, gnorm=2.75, loss_scale=1, train_wall=11, gb_free=2.8, wall=10687 2021-06-18 21:37:03 | INFO | train_inner | epoch 001: 964 / 3002 loss=3.084, ppl=8.48, wps=5947.3, ups=0.09, wpb=64933, bsz=128, num_updates=952, lr=9.52e-05, gnorm=4.081, loss_scale=1, train_wall=10, gb_free=2.8, wall=10697 2021-06-18 21:37:14 | INFO | train_inner | epoch 001: 965 / 3002 loss=3.049, ppl=8.27, wps=5879.2, ups=0.09, wpb=64832, bsz=128, num_updates=953, lr=9.53e-05, gnorm=2.679, loss_scale=1, train_wall=11, gb_free=2.8, wall=10709 2021-06-18 21:37:25 | INFO | train_inner | epoch 001: 966 / 3002 loss=3.224, ppl=9.34, wps=5947.5, ups=0.09, wpb=64805, bsz=128, num_updates=954, lr=9.54e-05, gnorm=3.124, loss_scale=1, train_wall=10, gb_free=2.8, wall=10719 2021-06-18 21:37:36 | INFO | train_inner | epoch 001: 967 / 3002 loss=3.117, ppl=8.67, wps=5834.8, ups=0.09, wpb=64908, bsz=128, num_updates=955, lr=9.55e-05, gnorm=2.712, loss_scale=1, train_wall=11, gb_free=2.8, wall=10731 2021-06-18 21:37:47 | INFO | train_inner | epoch 001: 968 / 3002 loss=2.96, ppl=7.78, wps=5857.5, ups=0.09, wpb=64858, bsz=128, num_updates=956, lr=9.56e-05, gnorm=2.675, loss_scale=1, train_wall=11, gb_free=2.8, wall=10742 2021-06-18 21:37:58 | INFO | train_inner | epoch 001: 969 / 3002 loss=3.221, ppl=9.32, wps=5969.5, ups=0.09, wpb=64871, bsz=128, num_updates=957, lr=9.57e-05, gnorm=2.895, loss_scale=1, train_wall=10, gb_free=2.8, wall=10752 2021-06-18 21:38:09 | INFO | train_inner | epoch 001: 970 / 3002 loss=2.94, ppl=7.67, wps=5761.2, ups=0.09, wpb=64814, bsz=128, num_updates=958, lr=9.58e-05, gnorm=2.672, loss_scale=1, train_wall=11, gb_free=2.8, wall=10764 2021-06-18 21:38:20 | INFO | train_inner | epoch 001: 971 / 3002 loss=3.031, ppl=8.17, wps=5867.9, ups=0.09, wpb=64827, bsz=128, num_updates=959, lr=9.59e-05, gnorm=14.176, loss_scale=1, train_wall=11, gb_free=2.8, wall=10775 2021-06-18 21:38:31 | INFO | train_inner | epoch 001: 972 / 3002 loss=3.141, ppl=8.82, wps=5928.9, ups=0.09, wpb=64897, bsz=128, num_updates=960, lr=9.6e-05, gnorm=2.769, loss_scale=1, train_wall=10, gb_free=2.8, wall=10786 2021-06-18 21:38:42 | INFO | train_inner | epoch 001: 973 / 3002 loss=2.979, ppl=7.89, wps=5941.6, ups=0.09, wpb=64846, bsz=128, num_updates=961, lr=9.61e-05, gnorm=2.619, loss_scale=1, train_wall=10, gb_free=2.8, wall=10797 2021-06-18 21:38:53 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 2021-06-18 21:39:04 | INFO | train_inner | epoch 001: 975 / 3002 loss=3.129, ppl=8.75, wps=2940.6, ups=0.05, wpb=64758, bsz=128, num_updates=962, lr=9.62e-05, gnorm=2.753, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=10819 2021-06-18 21:39:16 | INFO | train_inner | epoch 001: 976 / 3002 loss=3.236, ppl=9.43, wps=5796.6, ups=0.09, wpb=64793, bsz=128, num_updates=963, lr=9.63e-05, gnorm=2.633, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10830 2021-06-18 21:39:27 | INFO | train_inner | epoch 001: 977 / 3002 loss=3.124, ppl=8.72, wps=5861.2, ups=0.09, wpb=64852, bsz=128, num_updates=964, lr=9.64e-05, gnorm=2.656, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10841 2021-06-18 21:39:38 | INFO | train_inner | epoch 001: 978 / 3002 loss=3.203, ppl=9.21, wps=5854.4, ups=0.09, wpb=64792, bsz=128, num_updates=965, lr=9.65e-05, gnorm=2.611, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10852 2021-06-18 21:39:49 | INFO | train_inner | epoch 001: 979 / 3002 loss=3.024, ppl=8.13, wps=5866.2, ups=0.09, wpb=64773, bsz=128, num_updates=966, lr=9.66e-05, gnorm=2.595, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10863 2021-06-18 21:39:59 | INFO | train_inner | epoch 001: 980 / 3002 loss=3.288, ppl=9.76, wps=6087.4, ups=0.09, wpb=64882, bsz=128, num_updates=967, lr=9.67e-05, gnorm=2.708, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=10874 2021-06-18 21:40:10 | INFO | train_inner | epoch 001: 981 / 3002 loss=3.036, ppl=8.2, wps=5953.9, ups=0.09, wpb=64829, bsz=128, num_updates=968, lr=9.68e-05, gnorm=2.488, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=10885 2021-06-18 21:40:21 | INFO | train_inner | epoch 001: 982 / 3002 loss=3.16, ppl=8.94, wps=5829.9, ups=0.09, wpb=64861, bsz=128, num_updates=969, lr=9.69e-05, gnorm=2.708, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10896 2021-06-18 21:40:33 | INFO | train_inner | epoch 001: 983 / 3002 loss=3.293, ppl=9.8, wps=5743, ups=0.09, wpb=64817, bsz=128, num_updates=970, lr=9.7e-05, gnorm=2.782, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10907 2021-06-18 21:40:44 | INFO | train_inner | epoch 001: 984 / 3002 loss=3.211, ppl=9.26, wps=5933.7, ups=0.09, wpb=64826, bsz=128, num_updates=971, lr=9.71e-05, gnorm=2.593, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=10918 2021-06-18 21:40:55 | INFO | train_inner | epoch 001: 985 / 3002 loss=3.101, ppl=8.58, wps=5899, ups=0.09, wpb=64890, bsz=128, num_updates=972, lr=9.72e-05, gnorm=2.593, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10929 2021-06-18 21:41:06 | INFO | train_inner | epoch 001: 986 / 3002 loss=3.229, ppl=9.38, wps=5884.3, ups=0.09, wpb=64860, bsz=128, num_updates=973, lr=9.73e-05, gnorm=5.495, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10940 2021-06-18 21:41:16 | INFO | train_inner | epoch 001: 987 / 3002 loss=3.056, ppl=8.31, wps=5943.9, ups=0.09, wpb=64828, bsz=128, num_updates=974, lr=9.74e-05, gnorm=2.59, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=10951 2021-06-18 21:41:28 | INFO | train_inner | epoch 001: 988 / 3002 loss=2.948, ppl=7.72, wps=5837.3, ups=0.09, wpb=64737, bsz=128, num_updates=975, lr=9.75e-05, gnorm=2.608, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10962 2021-06-18 21:41:39 | INFO | train_inner | epoch 001: 989 / 3002 loss=3.152, ppl=8.89, wps=5857.8, ups=0.09, wpb=64831, bsz=128, num_updates=976, lr=9.76e-05, gnorm=2.957, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10973 2021-06-18 21:41:50 | INFO | train_inner | epoch 001: 990 / 3002 loss=2.964, ppl=7.81, wps=5790.9, ups=0.09, wpb=64860, bsz=128, num_updates=977, lr=9.77e-05, gnorm=2.743, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10984 2021-06-18 21:42:01 | INFO | train_inner | epoch 001: 991 / 3002 loss=3.225, ppl=9.35, wps=5963.9, ups=0.09, wpb=64828, bsz=128, num_updates=978, lr=9.78e-05, gnorm=2.606, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=10995 2021-06-18 21:42:12 | INFO | train_inner | epoch 001: 992 / 3002 loss=3.192, ppl=9.14, wps=5916.4, ups=0.09, wpb=64818, bsz=128, num_updates=979, lr=9.79e-05, gnorm=2.728, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11006 2021-06-18 21:42:23 | INFO | train_inner | epoch 001: 993 / 3002 loss=3.085, ppl=8.48, wps=5870.3, ups=0.09, wpb=64805, bsz=128, num_updates=980, lr=9.8e-05, gnorm=2.746, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11017 2021-06-18 21:42:34 | INFO | train_inner | epoch 001: 994 / 3002 loss=3.064, ppl=8.36, wps=5835.2, ups=0.09, wpb=64778, bsz=128, num_updates=981, lr=9.81e-05, gnorm=2.586, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11028 2021-06-18 21:42:45 | INFO | train_inner | epoch 001: 995 / 3002 loss=3.118, ppl=8.68, wps=5890.1, ups=0.09, wpb=64729, bsz=128, num_updates=982, lr=9.82e-05, gnorm=2.73, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11039 2021-06-18 21:42:56 | INFO | train_inner | epoch 001: 996 / 3002 loss=3.345, ppl=10.16, wps=5925.4, ups=0.09, wpb=64855, bsz=128, num_updates=983, lr=9.83e-05, gnorm=2.888, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11050 2021-06-18 21:43:07 | INFO | train_inner | epoch 001: 997 / 3002 loss=3.071, ppl=8.41, wps=5797.6, ups=0.09, wpb=64822, bsz=128, num_updates=984, lr=9.84e-05, gnorm=2.692, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11061 2021-06-18 21:43:18 | INFO | train_inner | epoch 001: 998 / 3002 loss=3.229, ppl=9.38, wps=5871.5, ups=0.09, wpb=64832, bsz=128, num_updates=985, lr=9.85e-05, gnorm=2.808, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11072 2021-06-18 21:43:29 | INFO | train_inner | epoch 001: 999 / 3002 loss=3.149, ppl=8.87, wps=5925.8, ups=0.09, wpb=64862, bsz=128, num_updates=986, lr=9.86e-05, gnorm=2.739, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11083 2021-06-18 21:43:40 | INFO | train_inner | epoch 001: 1000 / 3002 loss=3.111, ppl=8.64, wps=5815.6, ups=0.09, wpb=64827, bsz=128, num_updates=987, lr=9.87e-05, gnorm=2.712, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11094 2021-06-18 21:43:51 | INFO | train_inner | epoch 001: 1001 / 3002 loss=3.292, ppl=9.79, wps=5915.3, ups=0.09, wpb=64752, bsz=128, num_updates=988, lr=9.88e-05, gnorm=2.789, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11105 2021-06-18 21:44:02 | INFO | train_inner | epoch 001: 1002 / 3002 loss=3.267, ppl=9.62, wps=5834.2, ups=0.09, wpb=64806, bsz=128, num_updates=989, lr=9.89e-05, gnorm=2.77, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11116 2021-06-18 21:44:13 | INFO | train_inner | epoch 001: 1003 / 3002 loss=3.151, ppl=8.88, wps=5817.7, ups=0.09, wpb=64802, bsz=128, num_updates=990, lr=9.9e-05, gnorm=2.861, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11128 2021-06-18 21:44:24 | INFO | train_inner | epoch 001: 1004 / 3002 loss=3.131, ppl=8.76, wps=5954.7, ups=0.09, wpb=64943, bsz=128, num_updates=991, lr=9.91e-05, gnorm=2.751, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11139 2021-06-18 21:44:35 | INFO | train_inner | epoch 001: 1005 / 3002 loss=2.959, ppl=7.78, wps=5893.1, ups=0.09, wpb=64900, bsz=128, num_updates=992, lr=9.92e-05, gnorm=2.653, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11150 2021-06-18 21:44:46 | INFO | train_inner | epoch 001: 1006 / 3002 loss=3.136, ppl=8.79, wps=5881.3, ups=0.09, wpb=64889, bsz=128, num_updates=993, lr=9.93e-05, gnorm=2.789, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11161 2021-06-18 21:44:57 | INFO | train_inner | epoch 001: 1007 / 3002 loss=3.059, ppl=8.34, wps=5930.2, ups=0.09, wpb=64771, bsz=128, num_updates=994, lr=9.94e-05, gnorm=2.71, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11171 2021-06-18 21:45:08 | INFO | train_inner | epoch 001: 1008 / 3002 loss=3.043, ppl=8.24, wps=5905.7, ups=0.09, wpb=64853, bsz=128, num_updates=995, lr=9.95e-05, gnorm=2.651, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11182 2021-06-18 21:45:19 | INFO | train_inner | epoch 001: 1009 / 3002 loss=3.201, ppl=9.2, wps=5777.7, ups=0.09, wpb=64849, bsz=128, num_updates=996, lr=9.96e-05, gnorm=2.797, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11194 2021-06-18 21:45:31 | INFO | train_inner | epoch 001: 1010 / 3002 loss=3.063, ppl=8.36, wps=5798.2, ups=0.09, wpb=64820, bsz=128, num_updates=997, lr=9.97e-05, gnorm=2.679, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11205 2021-06-18 21:45:42 | INFO | train_inner | epoch 001: 1011 / 3002 loss=3.131, ppl=8.76, wps=5878.4, ups=0.09, wpb=64837, bsz=128, num_updates=998, lr=9.98e-05, gnorm=2.632, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11216 2021-06-18 21:45:53 | INFO | train_inner | epoch 001: 1012 / 3002 loss=3.085, ppl=8.49, wps=5825, ups=0.09, wpb=64760, bsz=128, num_updates=999, lr=9.99e-05, gnorm=2.694, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11227 2021-06-18 21:46:04 | INFO | train_inner | epoch 001: 1013 / 3002 loss=3.131, ppl=8.76, wps=5960.3, ups=0.09, wpb=64806, bsz=128, num_updates=1000, lr=0.0001, gnorm=2.837, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11238 2021-06-18 21:46:15 | INFO | train_inner | epoch 001: 1014 / 3002 loss=3.196, ppl=9.16, wps=5874.9, ups=0.09, wpb=64806, bsz=128, num_updates=1001, lr=0.0001, gnorm=2.637, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11249 2021-06-18 21:46:26 | INFO | train_inner | epoch 001: 1015 / 3002 loss=3.072, ppl=8.41, wps=5783.2, ups=0.09, wpb=64865, bsz=128, num_updates=1002, lr=0.0001, gnorm=2.665, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11260 2021-06-18 21:46:37 | INFO | train_inner | epoch 001: 1016 / 3002 loss=3.073, ppl=8.42, wps=5853.4, ups=0.09, wpb=64773, bsz=128, num_updates=1003, lr=0.0001, gnorm=2.6, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11271 2021-06-18 21:46:48 | INFO | train_inner | epoch 001: 1017 / 3002 loss=3.027, ppl=8.15, wps=5972, ups=0.09, wpb=64779, bsz=128, num_updates=1004, lr=0.0001, gnorm=2.708, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11282 2021-06-18 21:46:59 | INFO | train_inner | epoch 001: 1018 / 3002 loss=3.19, ppl=9.13, wps=5890, ups=0.09, wpb=64874, bsz=128, num_updates=1005, lr=0.0001, gnorm=3.055, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11293 2021-06-18 21:47:10 | INFO | train_inner | epoch 001: 1019 / 3002 loss=3.042, ppl=8.24, wps=5866.9, ups=0.09, wpb=64824, bsz=128, num_updates=1006, lr=0.0001, gnorm=3.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11304 2021-06-18 21:47:21 | INFO | train_inner | epoch 001: 1020 / 3002 loss=2.997, ppl=7.99, wps=5849.5, ups=0.09, wpb=64807, bsz=128, num_updates=1007, lr=9.99999e-05, gnorm=2.579, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11315 2021-06-18 21:47:32 | INFO | train_inner | epoch 001: 1021 / 3002 loss=3.199, ppl=9.18, wps=5962.3, ups=0.09, wpb=64875, bsz=128, num_updates=1008, lr=9.99999e-05, gnorm=4.003, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11326 2021-06-18 21:47:43 | INFO | train_inner | epoch 001: 1022 / 3002 loss=3.102, ppl=8.59, wps=5781.7, ups=0.09, wpb=64829, bsz=128, num_updates=1009, lr=9.99999e-05, gnorm=2.699, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11337 2021-06-18 21:47:54 | INFO | train_inner | epoch 001: 1023 / 3002 loss=2.9, ppl=7.46, wps=5982, ups=0.09, wpb=64859, bsz=128, num_updates=1010, lr=9.99999e-05, gnorm=2.563, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11348 2021-06-18 21:48:05 | INFO | train_inner | epoch 001: 1024 / 3002 loss=3.081, ppl=8.46, wps=5994.1, ups=0.09, wpb=64969, bsz=128, num_updates=1011, lr=9.99999e-05, gnorm=2.506, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11359 2021-06-18 21:48:16 | INFO | train_inner | epoch 001: 1025 / 3002 loss=2.985, ppl=7.92, wps=5788.1, ups=0.09, wpb=64880, bsz=128, num_updates=1012, lr=9.99999e-05, gnorm=2.581, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11370 2021-06-18 21:48:27 | INFO | train_inner | epoch 001: 1026 / 3002 loss=3.127, ppl=8.74, wps=5831.2, ups=0.09, wpb=64814, bsz=128, num_updates=1013, lr=9.99999e-05, gnorm=3.348, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11381 2021-06-18 21:48:38 | INFO | train_inner | epoch 001: 1027 / 3002 loss=3.103, ppl=8.59, wps=5881.8, ups=0.09, wpb=64817, bsz=128, num_updates=1014, lr=9.99999e-05, gnorm=2.737, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11392 2021-06-18 21:48:49 | INFO | train_inner | epoch 001: 1028 / 3002 loss=3.079, ppl=8.45, wps=5795, ups=0.09, wpb=64792, bsz=128, num_updates=1015, lr=9.99999e-05, gnorm=2.64, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11403 2021-06-18 21:49:00 | INFO | train_inner | epoch 001: 1029 / 3002 loss=3.281, ppl=9.72, wps=5772.5, ups=0.09, wpb=64818, bsz=128, num_updates=1016, lr=9.99999e-05, gnorm=2.743, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11415 2021-06-18 21:49:11 | INFO | train_inner | epoch 001: 1030 / 3002 loss=3.112, ppl=8.65, wps=5872.8, ups=0.09, wpb=64766, bsz=128, num_updates=1017, lr=9.99999e-05, gnorm=3.064, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11426 2021-06-18 21:49:22 | INFO | train_inner | epoch 001: 1031 / 3002 loss=2.871, ppl=7.32, wps=6081.8, ups=0.09, wpb=64908, bsz=128, num_updates=1018, lr=9.99999e-05, gnorm=2.604, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11436 2021-06-18 21:49:33 | INFO | train_inner | epoch 001: 1032 / 3002 loss=3.078, ppl=8.44, wps=5995.4, ups=0.09, wpb=64835, bsz=128, num_updates=1019, lr=9.99998e-05, gnorm=3.192, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11447 2021-06-18 21:49:44 | INFO | train_inner | epoch 001: 1033 / 3002 loss=3.012, ppl=8.06, wps=5776.6, ups=0.09, wpb=64889, bsz=128, num_updates=1020, lr=9.99998e-05, gnorm=2.676, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11458 2021-06-18 21:49:55 | INFO | train_inner | epoch 001: 1034 / 3002 loss=3.012, ppl=8.07, wps=5778.8, ups=0.09, wpb=64780, bsz=128, num_updates=1021, lr=9.99998e-05, gnorm=2.519, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11470 2021-06-18 21:50:06 | INFO | train_inner | epoch 001: 1035 / 3002 loss=3.211, ppl=9.26, wps=5831.7, ups=0.09, wpb=64710, bsz=128, num_updates=1022, lr=9.99998e-05, gnorm=2.805, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11481 2021-06-18 21:50:17 | INFO | train_inner | epoch 001: 1036 / 3002 loss=3.14, ppl=8.82, wps=5870.2, ups=0.09, wpb=64821, bsz=128, num_updates=1023, lr=9.99998e-05, gnorm=2.801, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11492 2021-06-18 21:50:28 | INFO | train_inner | epoch 001: 1037 / 3002 loss=3.148, ppl=8.87, wps=5914.2, ups=0.09, wpb=64854, bsz=128, num_updates=1024, lr=9.99998e-05, gnorm=2.62, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11503 2021-06-18 21:50:39 | INFO | train_inner | epoch 001: 1038 / 3002 loss=3.248, ppl=9.5, wps=5917.5, ups=0.09, wpb=64867, bsz=128, num_updates=1025, lr=9.99998e-05, gnorm=2.712, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11514 2021-06-18 21:50:50 | INFO | train_inner | epoch 001: 1039 / 3002 loss=3.121, ppl=8.7, wps=5952.3, ups=0.09, wpb=64904, bsz=128, num_updates=1026, lr=9.99998e-05, gnorm=2.747, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11525 2021-06-18 21:51:01 | INFO | train_inner | epoch 001: 1040 / 3002 loss=3.177, ppl=9.04, wps=5875, ups=0.09, wpb=64836, bsz=128, num_updates=1027, lr=9.99998e-05, gnorm=2.665, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11536 2021-06-18 21:51:12 | INFO | train_inner | epoch 001: 1041 / 3002 loss=3.237, ppl=9.43, wps=5841.9, ups=0.09, wpb=64869, bsz=128, num_updates=1028, lr=9.99998e-05, gnorm=2.681, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11547 2021-06-18 21:51:23 | INFO | train_inner | epoch 001: 1042 / 3002 loss=3.158, ppl=8.93, wps=6033.9, ups=0.09, wpb=64853, bsz=128, num_updates=1029, lr=9.99998e-05, gnorm=2.676, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11558 2021-06-18 21:51:34 | INFO | train_inner | epoch 001: 1043 / 3002 loss=3.1, ppl=8.57, wps=5855.8, ups=0.09, wpb=64854, bsz=128, num_updates=1030, lr=9.99998e-05, gnorm=2.627, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11569 2021-06-18 21:51:45 | INFO | train_inner | epoch 001: 1044 / 3002 loss=3.036, ppl=8.2, wps=5849.7, ups=0.09, wpb=64864, bsz=128, num_updates=1031, lr=9.99998e-05, gnorm=2.549, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11580 2021-06-18 21:51:56 | INFO | train_inner | epoch 001: 1045 / 3002 loss=3.1, ppl=8.58, wps=5877.3, ups=0.09, wpb=64748, bsz=128, num_updates=1032, lr=9.99997e-05, gnorm=2.646, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11591 2021-06-18 21:52:08 | INFO | train_inner | epoch 001: 1046 / 3002 loss=3.114, ppl=8.66, wps=5800.3, ups=0.09, wpb=64804, bsz=128, num_updates=1033, lr=9.99997e-05, gnorm=2.673, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11602 2021-06-18 21:52:19 | INFO | train_inner | epoch 001: 1047 / 3002 loss=2.978, ppl=7.88, wps=5883, ups=0.09, wpb=64811, bsz=128, num_updates=1034, lr=9.99997e-05, gnorm=2.617, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11613 2021-06-18 21:52:30 | INFO | train_inner | epoch 001: 1048 / 3002 loss=3.182, ppl=9.08, wps=5910.8, ups=0.09, wpb=64810, bsz=128, num_updates=1035, lr=9.99997e-05, gnorm=2.525, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11624 2021-06-18 21:52:41 | INFO | train_inner | epoch 001: 1049 / 3002 loss=3.277, ppl=9.69, wps=5829.2, ups=0.09, wpb=64847, bsz=128, num_updates=1036, lr=9.99997e-05, gnorm=2.702, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11635 2021-06-18 21:52:52 | INFO | train_inner | epoch 001: 1050 / 3002 loss=3.245, ppl=9.48, wps=5828.7, ups=0.09, wpb=64743, bsz=128, num_updates=1037, lr=9.99997e-05, gnorm=2.779, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11646 2021-06-18 21:53:03 | INFO | train_inner | epoch 001: 1051 / 3002 loss=3.124, ppl=8.72, wps=5818.5, ups=0.09, wpb=64878, bsz=128, num_updates=1038, lr=9.99997e-05, gnorm=3.002, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11657 2021-06-18 21:53:14 | INFO | train_inner | epoch 001: 1052 / 3002 loss=3.085, ppl=8.49, wps=6041.4, ups=0.09, wpb=64806, bsz=128, num_updates=1039, lr=9.99997e-05, gnorm=2.627, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11668 2021-06-18 21:53:25 | INFO | train_inner | epoch 001: 1053 / 3002 loss=3.267, ppl=9.63, wps=5834.9, ups=0.09, wpb=64803, bsz=128, num_updates=1040, lr=9.99997e-05, gnorm=3.23, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11679 2021-06-18 21:53:36 | INFO | train_inner | epoch 001: 1054 / 3002 loss=3.155, ppl=8.91, wps=5805.9, ups=0.09, wpb=64798, bsz=128, num_updates=1041, lr=9.99997e-05, gnorm=2.606, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11690 2021-06-18 21:53:47 | INFO | train_inner | epoch 001: 1055 / 3002 loss=3.1, ppl=8.58, wps=5767.9, ups=0.09, wpb=64787, bsz=128, num_updates=1042, lr=9.99997e-05, gnorm=2.652, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11701 2021-06-18 21:53:58 | INFO | train_inner | epoch 001: 1056 / 3002 loss=3.163, ppl=8.96, wps=6037.5, ups=0.09, wpb=64810, bsz=128, num_updates=1043, lr=9.99997e-05, gnorm=2.877, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11712 2021-06-18 21:54:09 | INFO | train_inner | epoch 001: 1057 / 3002 loss=3.294, ppl=9.81, wps=5913.4, ups=0.09, wpb=64752, bsz=128, num_updates=1044, lr=9.99996e-05, gnorm=2.65, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11723 2021-06-18 21:54:20 | INFO | train_inner | epoch 001: 1058 / 3002 loss=3.038, ppl=8.21, wps=5739.2, ups=0.09, wpb=64824, bsz=128, num_updates=1045, lr=9.99996e-05, gnorm=2.575, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11734 2021-06-18 21:54:31 | INFO | train_inner | epoch 001: 1059 / 3002 loss=3.134, ppl=8.78, wps=5833.3, ups=0.09, wpb=64869, bsz=128, num_updates=1046, lr=9.99996e-05, gnorm=2.597, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11746 2021-06-18 21:54:42 | INFO | train_inner | epoch 001: 1060 / 3002 loss=3.002, ppl=8.01, wps=5869.5, ups=0.09, wpb=64827, bsz=128, num_updates=1047, lr=9.99996e-05, gnorm=2.775, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11757 2021-06-18 21:54:54 | INFO | train_inner | epoch 001: 1061 / 3002 loss=2.931, ppl=7.63, wps=5791.4, ups=0.09, wpb=64913, bsz=128, num_updates=1048, lr=9.99996e-05, gnorm=2.491, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11768 2021-06-18 21:55:04 | INFO | train_inner | epoch 001: 1062 / 3002 loss=3.14, ppl=8.82, wps=5901.2, ups=0.09, wpb=64784, bsz=128, num_updates=1049, lr=9.99996e-05, gnorm=2.812, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11779 2021-06-18 21:55:16 | INFO | train_inner | epoch 001: 1063 / 3002 loss=2.964, ppl=7.81, wps=5858.2, ups=0.09, wpb=64796, bsz=128, num_updates=1050, lr=9.99996e-05, gnorm=2.53, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11790 2021-06-18 21:55:27 | INFO | train_inner | epoch 001: 1064 / 3002 loss=3.116, ppl=8.67, wps=5905.6, ups=0.09, wpb=64855, bsz=128, num_updates=1051, lr=9.99996e-05, gnorm=2.553, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11801 2021-06-18 21:55:38 | INFO | train_inner | epoch 001: 1065 / 3002 loss=3.171, ppl=9.01, wps=5911.9, ups=0.09, wpb=64843, bsz=128, num_updates=1052, lr=9.99996e-05, gnorm=2.594, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11812 2021-06-18 21:55:49 | INFO | train_inner | epoch 001: 1066 / 3002 loss=3.103, ppl=8.59, wps=5867.7, ups=0.09, wpb=64797, bsz=128, num_updates=1053, lr=9.99996e-05, gnorm=2.631, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11823 2021-06-18 21:55:59 | INFO | train_inner | epoch 001: 1067 / 3002 loss=3.037, ppl=8.21, wps=5972.2, ups=0.09, wpb=64865, bsz=128, num_updates=1054, lr=9.99996e-05, gnorm=2.614, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11834 2021-06-18 21:56:11 | INFO | train_inner | epoch 001: 1068 / 3002 loss=2.985, ppl=7.92, wps=5827.2, ups=0.09, wpb=64854, bsz=128, num_updates=1055, lr=9.99996e-05, gnorm=2.519, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11845 2021-06-18 21:56:22 | INFO | train_inner | epoch 001: 1069 / 3002 loss=3.183, ppl=9.08, wps=5892.9, ups=0.09, wpb=64842, bsz=128, num_updates=1056, lr=9.99996e-05, gnorm=2.633, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11856 2021-06-18 21:56:32 | INFO | train_inner | epoch 001: 1070 / 3002 loss=3.051, ppl=8.29, wps=5956.7, ups=0.09, wpb=64833, bsz=128, num_updates=1057, lr=9.99995e-05, gnorm=2.513, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11867 2021-06-18 21:56:44 | INFO | train_inner | epoch 001: 1071 / 3002 loss=3.092, ppl=8.53, wps=5816.2, ups=0.09, wpb=64840, bsz=128, num_updates=1058, lr=9.99995e-05, gnorm=2.598, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11878 2021-06-18 21:56:54 | INFO | train_inner | epoch 001: 1072 / 3002 loss=3.126, ppl=8.73, wps=6029.8, ups=0.09, wpb=64766, bsz=128, num_updates=1059, lr=9.99995e-05, gnorm=2.668, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11889 2021-06-18 21:57:06 | INFO | train_inner | epoch 001: 1073 / 3002 loss=3.007, ppl=8.04, wps=5766.1, ups=0.09, wpb=64808, bsz=128, num_updates=1060, lr=9.99995e-05, gnorm=2.796, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11900 2021-06-18 21:57:17 | INFO | train_inner | epoch 001: 1074 / 3002 loss=3.022, ppl=8.12, wps=5781, ups=0.09, wpb=64812, bsz=128, num_updates=1061, lr=9.99995e-05, gnorm=2.61, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11911 2021-06-18 21:57:28 | INFO | train_inner | epoch 001: 1075 / 3002 loss=3.178, ppl=9.05, wps=5857.1, ups=0.09, wpb=64804, bsz=128, num_updates=1062, lr=9.99995e-05, gnorm=2.629, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11922 2021-06-18 21:57:39 | INFO | train_inner | epoch 001: 1076 / 3002 loss=3.182, ppl=9.07, wps=5900.5, ups=0.09, wpb=64828, bsz=128, num_updates=1063, lr=9.99995e-05, gnorm=2.652, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11933 2021-06-18 21:57:50 | INFO | train_inner | epoch 001: 1077 / 3002 loss=3.094, ppl=8.54, wps=5893.7, ups=0.09, wpb=64864, bsz=128, num_updates=1064, lr=9.99995e-05, gnorm=2.61, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11944 2021-06-18 21:58:01 | INFO | train_inner | epoch 001: 1078 / 3002 loss=3.199, ppl=9.18, wps=5894.9, ups=0.09, wpb=64826, bsz=128, num_updates=1065, lr=9.99995e-05, gnorm=2.778, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11955 2021-06-18 21:58:12 | INFO | train_inner | epoch 001: 1079 / 3002 loss=3.047, ppl=8.26, wps=5915.2, ups=0.09, wpb=64871, bsz=128, num_updates=1066, lr=9.99995e-05, gnorm=2.644, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11966 2021-06-18 21:58:23 | INFO | train_inner | epoch 001: 1080 / 3002 loss=3.078, ppl=8.44, wps=5932.2, ups=0.09, wpb=64772, bsz=128, num_updates=1067, lr=9.99995e-05, gnorm=2.598, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11977 2021-06-18 21:58:34 | INFO | train_inner | epoch 001: 1081 / 3002 loss=3.126, ppl=8.73, wps=5854.8, ups=0.09, wpb=64820, bsz=128, num_updates=1068, lr=9.99995e-05, gnorm=2.627, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11988 2021-06-18 21:58:45 | INFO | train_inner | epoch 001: 1082 / 3002 loss=3.011, ppl=8.06, wps=5830.4, ups=0.09, wpb=64877, bsz=128, num_updates=1069, lr=9.99994e-05, gnorm=2.577, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11999 2021-06-18 21:58:56 | INFO | train_inner | epoch 001: 1083 / 3002 loss=3.11, ppl=8.64, wps=5805.8, ups=0.09, wpb=64826, bsz=128, num_updates=1070, lr=9.99994e-05, gnorm=2.546, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12010 2021-06-18 21:59:07 | INFO | train_inner | epoch 001: 1084 / 3002 loss=3.078, ppl=8.44, wps=5966.6, ups=0.09, wpb=64832, bsz=128, num_updates=1071, lr=9.99994e-05, gnorm=2.618, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12021 2021-06-18 21:59:18 | INFO | train_inner | epoch 001: 1085 / 3002 loss=3.257, ppl=9.56, wps=5835.4, ups=0.09, wpb=64790, bsz=128, num_updates=1072, lr=9.99994e-05, gnorm=2.576, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12032 2021-06-18 21:59:29 | INFO | train_inner | epoch 001: 1086 / 3002 loss=3.088, ppl=8.5, wps=5780.7, ups=0.09, wpb=64823, bsz=128, num_updates=1073, lr=9.99994e-05, gnorm=2.657, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12044 2021-06-18 21:59:40 | INFO | train_inner | epoch 001: 1087 / 3002 loss=3.078, ppl=8.45, wps=5899.7, ups=0.09, wpb=64868, bsz=128, num_updates=1074, lr=9.99994e-05, gnorm=2.585, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12055 2021-06-18 21:59:51 | INFO | train_inner | epoch 001: 1088 / 3002 loss=3.02, ppl=8.11, wps=5896.6, ups=0.09, wpb=64828, bsz=128, num_updates=1075, lr=9.99994e-05, gnorm=2.591, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12066 2021-06-18 22:00:02 | INFO | train_inner | epoch 001: 1089 / 3002 loss=3.168, ppl=8.99, wps=5942.9, ups=0.09, wpb=64785, bsz=128, num_updates=1076, lr=9.99994e-05, gnorm=2.883, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12076 2021-06-18 22:00:13 | INFO | train_inner | epoch 001: 1090 / 3002 loss=3.03, ppl=8.17, wps=5894.6, ups=0.09, wpb=64774, bsz=128, num_updates=1077, lr=9.99994e-05, gnorm=3.151, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12087 2021-06-18 22:00:24 | INFO | train_inner | epoch 001: 1091 / 3002 loss=2.999, ppl=8, wps=5814.2, ups=0.09, wpb=64796, bsz=128, num_updates=1078, lr=9.99994e-05, gnorm=2.464, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12099 2021-06-18 22:00:35 | INFO | train_inner | epoch 001: 1092 / 3002 loss=3.137, ppl=8.8, wps=5903, ups=0.09, wpb=64808, bsz=128, num_updates=1079, lr=9.99994e-05, gnorm=2.671, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12110 2021-06-18 22:00:46 | INFO | train_inner | epoch 001: 1093 / 3002 loss=3.228, ppl=9.37, wps=5955.1, ups=0.09, wpb=64836, bsz=128, num_updates=1080, lr=9.99994e-05, gnorm=2.662, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12120 2021-06-18 22:00:57 | INFO | train_inner | epoch 001: 1094 / 3002 loss=3.192, ppl=9.14, wps=5881.3, ups=0.09, wpb=64784, bsz=128, num_updates=1081, lr=9.99994e-05, gnorm=2.492, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12131 2021-06-18 22:01:08 | INFO | train_inner | epoch 001: 1095 / 3002 loss=3.132, ppl=8.76, wps=5916.7, ups=0.09, wpb=64914, bsz=128, num_updates=1082, lr=9.99993e-05, gnorm=2.771, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12142 2021-06-18 22:01:19 | INFO | train_inner | epoch 001: 1096 / 3002 loss=2.923, ppl=7.58, wps=5870.8, ups=0.09, wpb=64894, bsz=128, num_updates=1083, lr=9.99993e-05, gnorm=2.694, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12154 2021-06-18 22:01:30 | INFO | train_inner | epoch 001: 1097 / 3002 loss=3.192, ppl=9.14, wps=5789.4, ups=0.09, wpb=64837, bsz=128, num_updates=1084, lr=9.99993e-05, gnorm=3.095, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12165 2021-06-18 22:01:41 | INFO | train_inner | epoch 001: 1098 / 3002 loss=3.049, ppl=8.28, wps=5859.2, ups=0.09, wpb=64788, bsz=128, num_updates=1085, lr=9.99993e-05, gnorm=2.647, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12176 2021-06-18 22:01:53 | INFO | train_inner | epoch 001: 1099 / 3002 loss=2.98, ppl=7.89, wps=5796.5, ups=0.09, wpb=64773, bsz=128, num_updates=1086, lr=9.99993e-05, gnorm=2.597, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12187 2021-06-18 22:02:04 | INFO | train_inner | epoch 001: 1100 / 3002 loss=3.203, ppl=9.21, wps=5751.9, ups=0.09, wpb=64785, bsz=128, num_updates=1087, lr=9.99993e-05, gnorm=2.609, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12198 2021-06-18 22:02:15 | INFO | train_inner | epoch 001: 1101 / 3002 loss=3.128, ppl=8.74, wps=5930.6, ups=0.09, wpb=64811, bsz=128, num_updates=1088, lr=9.99993e-05, gnorm=2.634, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12209 2021-06-18 22:02:26 | INFO | train_inner | epoch 001: 1102 / 3002 loss=3.192, ppl=9.14, wps=5799.3, ups=0.09, wpb=64785, bsz=128, num_updates=1089, lr=9.99993e-05, gnorm=7.973, loss_scale=1, train_wall=11, gb_free=2.8, wall=12220 2021-06-18 22:02:37 | INFO | train_inner | epoch 001: 1103 / 3002 loss=3.084, ppl=8.48, wps=5965.5, ups=0.09, wpb=64890, bsz=128, num_updates=1090, lr=9.99993e-05, gnorm=2.732, loss_scale=1, train_wall=10, gb_free=2.8, wall=12231 2021-06-18 22:02:48 | INFO | train_inner | epoch 001: 1104 / 3002 loss=3.153, ppl=8.9, wps=5797.5, ups=0.09, wpb=64807, bsz=128, num_updates=1091, lr=9.99993e-05, gnorm=2.73, loss_scale=1, train_wall=11, gb_free=2.8, wall=12242 2021-06-18 22:02:59 | INFO | train_inner | epoch 001: 1105 / 3002 loss=3.157, ppl=8.92, wps=5848.7, ups=0.09, wpb=64831, bsz=128, num_updates=1092, lr=9.99993e-05, gnorm=2.692, loss_scale=1, train_wall=11, gb_free=2.8, wall=12253 2021-06-18 22:03:10 | INFO | train_inner | epoch 001: 1106 / 3002 loss=3.233, ppl=9.4, wps=5816.5, ups=0.09, wpb=64864, bsz=128, num_updates=1093, lr=9.99993e-05, gnorm=2.701, loss_scale=1, train_wall=11, gb_free=2.8, wall=12265 2021-06-18 22:03:21 | INFO | train_inner | epoch 001: 1107 / 3002 loss=3.076, ppl=8.43, wps=5860.1, ups=0.09, wpb=64858, bsz=128, num_updates=1094, lr=9.99992e-05, gnorm=2.713, loss_scale=1, train_wall=11, gb_free=2.8, wall=12276 2021-06-18 22:03:32 | INFO | train_inner | epoch 001: 1108 / 3002 loss=3.148, ppl=8.87, wps=5949.5, ups=0.09, wpb=64837, bsz=128, num_updates=1095, lr=9.99992e-05, gnorm=2.967, loss_scale=1, train_wall=10, gb_free=2.8, wall=12287 2021-06-18 22:03:43 | INFO | train_inner | epoch 001: 1109 / 3002 loss=3.152, ppl=8.89, wps=5800.5, ups=0.09, wpb=64764, bsz=128, num_updates=1096, lr=9.99992e-05, gnorm=2.723, loss_scale=1, train_wall=11, gb_free=2.8, wall=12298 2021-06-18 22:03:54 | INFO | train_inner | epoch 001: 1110 / 3002 loss=3.057, ppl=8.32, wps=5866.3, ups=0.09, wpb=64877, bsz=128, num_updates=1097, lr=9.99992e-05, gnorm=2.518, loss_scale=1, train_wall=11, gb_free=2.8, wall=12309 2021-06-18 22:04:06 | INFO | train_inner | epoch 001: 1111 / 3002 loss=3.135, ppl=8.78, wps=5810.5, ups=0.09, wpb=64776, bsz=128, num_updates=1098, lr=9.99992e-05, gnorm=2.601, loss_scale=1, train_wall=11, gb_free=2.8, wall=12320 2021-06-18 22:04:17 | INFO | train_inner | epoch 001: 1112 / 3002 loss=3.105, ppl=8.61, wps=5833.3, ups=0.09, wpb=64864, bsz=128, num_updates=1099, lr=9.99992e-05, gnorm=2.763, loss_scale=1, train_wall=11, gb_free=2.8, wall=12331 2021-06-18 22:04:28 | INFO | train_inner | epoch 001: 1113 / 3002 loss=3, ppl=8, wps=5982.8, ups=0.09, wpb=64847, bsz=128, num_updates=1100, lr=9.99992e-05, gnorm=2.603, loss_scale=1, train_wall=10, gb_free=2.8, wall=12342 2021-06-18 22:04:39 | INFO | train_inner | epoch 001: 1114 / 3002 loss=3.056, ppl=8.31, wps=5738.3, ups=0.09, wpb=64815, bsz=128, num_updates=1101, lr=9.99992e-05, gnorm=2.554, loss_scale=1, train_wall=11, gb_free=2.8, wall=12353 2021-06-18 22:04:50 | INFO | train_inner | epoch 001: 1115 / 3002 loss=2.913, ppl=7.53, wps=5858, ups=0.09, wpb=64831, bsz=128, num_updates=1102, lr=9.99992e-05, gnorm=2.593, loss_scale=1, train_wall=11, gb_free=2.8, wall=12364 2021-06-18 22:05:01 | INFO | train_inner | epoch 001: 1116 / 3002 loss=2.984, ppl=7.91, wps=5853.6, ups=0.09, wpb=64800, bsz=128, num_updates=1103, lr=9.99992e-05, gnorm=2.65, loss_scale=1, train_wall=11, gb_free=2.8, wall=12375 2021-06-18 22:05:12 | INFO | train_inner | epoch 001: 1117 / 3002 loss=3.107, ppl=8.61, wps=5990.4, ups=0.09, wpb=64891, bsz=128, num_updates=1104, lr=9.99992e-05, gnorm=2.699, loss_scale=1, train_wall=10, gb_free=2.8, wall=12386 2021-06-18 22:05:23 | INFO | train_inner | epoch 001: 1118 / 3002 loss=3.092, ppl=8.53, wps=5814.1, ups=0.09, wpb=64806, bsz=128, num_updates=1105, lr=9.99992e-05, gnorm=2.579, loss_scale=1, train_wall=11, gb_free=2.8, wall=12397 2021-06-18 22:05:34 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 2021-06-18 22:05:45 | INFO | train_inner | epoch 001: 1120 / 3002 loss=3.11, ppl=8.64, wps=2971.1, ups=0.05, wpb=64802, bsz=128, num_updates=1106, lr=9.99992e-05, gnorm=2.89, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=12419 2021-06-18 22:05:56 | INFO | train_inner | epoch 001: 1121 / 3002 loss=3.161, ppl=8.94, wps=5859.7, ups=0.09, wpb=64838, bsz=128, num_updates=1107, lr=9.99991e-05, gnorm=2.783, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12430 2021-06-18 22:06:07 | INFO | train_inner | epoch 001: 1122 / 3002 loss=2.974, ppl=7.86, wps=5865.1, ups=0.09, wpb=64900, bsz=128, num_updates=1108, lr=9.99991e-05, gnorm=2.546, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12441 2021-06-18 22:06:18 | INFO | train_inner | epoch 001: 1123 / 3002 loss=3.052, ppl=8.3, wps=5928.7, ups=0.09, wpb=64884, bsz=128, num_updates=1109, lr=9.99991e-05, gnorm=2.659, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12452 2021-06-18 22:06:29 | INFO | train_inner | epoch 001: 1124 / 3002 loss=3.011, ppl=8.06, wps=5773.1, ups=0.09, wpb=64827, bsz=128, num_updates=1110, lr=9.99991e-05, gnorm=2.566, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12463 2021-06-18 22:06:40 | INFO | train_inner | epoch 001: 1125 / 3002 loss=3.123, ppl=8.71, wps=5927.1, ups=0.09, wpb=64817, bsz=128, num_updates=1111, lr=9.99991e-05, gnorm=2.615, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12474 2021-06-18 22:06:51 | INFO | train_inner | epoch 001: 1126 / 3002 loss=3.099, ppl=8.57, wps=5901.9, ups=0.09, wpb=64885, bsz=128, num_updates=1112, lr=9.99991e-05, gnorm=2.596, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12485 2021-06-18 22:07:02 | INFO | train_inner | epoch 001: 1127 / 3002 loss=3.085, ppl=8.48, wps=5902.6, ups=0.09, wpb=64866, bsz=128, num_updates=1113, lr=9.99991e-05, gnorm=2.509, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12496 2021-06-18 22:07:13 | INFO | train_inner | epoch 001: 1128 / 3002 loss=3.253, ppl=9.54, wps=5991.7, ups=0.09, wpb=64824, bsz=128, num_updates=1114, lr=9.99991e-05, gnorm=2.69, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12507 2021-06-18 22:07:24 | INFO | train_inner | epoch 001: 1129 / 3002 loss=3.113, ppl=8.65, wps=5999.4, ups=0.09, wpb=64845, bsz=128, num_updates=1115, lr=9.99991e-05, gnorm=2.535, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12518 2021-06-18 22:07:35 | INFO | train_inner | epoch 001: 1130 / 3002 loss=2.949, ppl=7.72, wps=5899.9, ups=0.09, wpb=64944, bsz=128, num_updates=1116, lr=9.99991e-05, gnorm=2.591, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12529 2021-06-18 22:07:46 | INFO | train_inner | epoch 001: 1131 / 3002 loss=3.134, ppl=8.78, wps=5789.5, ups=0.09, wpb=64797, bsz=128, num_updates=1117, lr=9.99991e-05, gnorm=2.64, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12540 2021-06-18 22:07:57 | INFO | train_inner | epoch 001: 1132 / 3002 loss=3.186, ppl=9.1, wps=5924.2, ups=0.09, wpb=64920, bsz=128, num_updates=1118, lr=9.99991e-05, gnorm=2.609, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12551 2021-06-18 22:08:08 | INFO | train_inner | epoch 001: 1133 / 3002 loss=2.999, ppl=8, wps=5813.4, ups=0.09, wpb=64841, bsz=128, num_updates=1119, lr=9.9999e-05, gnorm=3.685, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12562 2021-06-18 22:08:19 | INFO | train_inner | epoch 001: 1134 / 3002 loss=3.025, ppl=8.14, wps=5859.4, ups=0.09, wpb=64805, bsz=128, num_updates=1120, lr=9.9999e-05, gnorm=2.426, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12573 2021-06-18 22:08:30 | INFO | train_inner | epoch 001: 1135 / 3002 loss=3.213, ppl=9.27, wps=5872.5, ups=0.09, wpb=64884, bsz=128, num_updates=1121, lr=9.9999e-05, gnorm=2.59, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12584 2021-06-18 22:08:41 | INFO | train_inner | epoch 001: 1136 / 3002 loss=3.129, ppl=8.75, wps=5920.5, ups=0.09, wpb=64830, bsz=128, num_updates=1122, lr=9.9999e-05, gnorm=2.605, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12595 2021-06-18 22:08:52 | INFO | train_inner | epoch 001: 1137 / 3002 loss=3.17, ppl=9, wps=5689.7, ups=0.09, wpb=64760, bsz=128, num_updates=1123, lr=9.9999e-05, gnorm=2.645, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12607 2021-06-18 22:09:04 | INFO | train_inner | epoch 001: 1138 / 3002 loss=3.21, ppl=9.25, wps=5754.2, ups=0.09, wpb=64738, bsz=128, num_updates=1124, lr=9.9999e-05, gnorm=3.132, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12618 2021-06-18 22:09:15 | INFO | train_inner | epoch 001: 1139 / 3002 loss=3.394, ppl=10.51, wps=5837.4, ups=0.09, wpb=64800, bsz=128, num_updates=1125, lr=9.9999e-05, gnorm=2.569, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12629 2021-06-18 22:09:26 | INFO | train_inner | epoch 001: 1140 / 3002 loss=2.882, ppl=7.37, wps=5858.5, ups=0.09, wpb=64858, bsz=128, num_updates=1126, lr=9.9999e-05, gnorm=3.107, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12640 2021-06-18 22:09:37 | INFO | train_inner | epoch 001: 1141 / 3002 loss=3.251, ppl=9.52, wps=6019.3, ups=0.09, wpb=64902, bsz=128, num_updates=1127, lr=9.9999e-05, gnorm=2.641, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12651 2021-06-18 22:09:48 | INFO | train_inner | epoch 001: 1142 / 3002 loss=3.131, ppl=8.76, wps=5873.2, ups=0.09, wpb=64770, bsz=128, num_updates=1128, lr=9.9999e-05, gnorm=2.591, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12662 2021-06-18 22:09:59 | INFO | train_inner | epoch 001: 1143 / 3002 loss=3.088, ppl=8.5, wps=5915.6, ups=0.09, wpb=64785, bsz=128, num_updates=1129, lr=9.9999e-05, gnorm=2.617, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12673 2021-06-18 22:10:10 | INFO | train_inner | epoch 001: 1144 / 3002 loss=3.223, ppl=9.33, wps=5860.3, ups=0.09, wpb=64795, bsz=128, num_updates=1130, lr=9.9999e-05, gnorm=2.644, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12684 2021-06-18 22:10:21 | INFO | train_inner | epoch 001: 1145 / 3002 loss=3.204, ppl=9.21, wps=5951.1, ups=0.09, wpb=64830, bsz=128, num_updates=1131, lr=9.9999e-05, gnorm=2.613, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12695 2021-06-18 22:10:32 | INFO | train_inner | epoch 001: 1146 / 3002 loss=3.25, ppl=9.51, wps=5876, ups=0.09, wpb=64958, bsz=128, num_updates=1132, lr=9.99989e-05, gnorm=2.617, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12706 2021-06-18 22:10:43 | INFO | train_inner | epoch 001: 1147 / 3002 loss=3.099, ppl=8.57, wps=5841.6, ups=0.09, wpb=64808, bsz=128, num_updates=1133, lr=9.99989e-05, gnorm=2.603, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12717 2021-06-18 22:10:54 | INFO | train_inner | epoch 001: 1148 / 3002 loss=3.11, ppl=8.63, wps=5831.2, ups=0.09, wpb=64871, bsz=128, num_updates=1134, lr=9.99989e-05, gnorm=2.777, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12728 2021-06-18 22:11:05 | INFO | train_inner | epoch 001: 1149 / 3002 loss=3.098, ppl=8.56, wps=5839.9, ups=0.09, wpb=64753, bsz=128, num_updates=1135, lr=9.99989e-05, gnorm=2.604, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12739 2021-06-18 22:11:16 | INFO | train_inner | epoch 001: 1150 / 3002 loss=3.04, ppl=8.23, wps=5889, ups=0.09, wpb=64760, bsz=128, num_updates=1136, lr=9.99989e-05, gnorm=2.698, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12750 2021-06-18 22:11:27 | INFO | train_inner | epoch 001: 1151 / 3002 loss=3.03, ppl=8.17, wps=5912.9, ups=0.09, wpb=64848, bsz=128, num_updates=1137, lr=9.99989e-05, gnorm=2.748, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12761 2021-06-18 22:11:38 | INFO | train_inner | epoch 001: 1152 / 3002 loss=3.11, ppl=8.63, wps=5866.5, ups=0.09, wpb=64830, bsz=128, num_updates=1138, lr=9.99989e-05, gnorm=2.976, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12772 2021-06-18 22:11:49 | INFO | train_inner | epoch 001: 1153 / 3002 loss=3.215, ppl=9.28, wps=5985.8, ups=0.09, wpb=64834, bsz=128, num_updates=1139, lr=9.99989e-05, gnorm=2.591, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12783 2021-06-18 22:12:00 | INFO | train_inner | epoch 001: 1154 / 3002 loss=2.929, ppl=7.62, wps=5861.8, ups=0.09, wpb=64862, bsz=128, num_updates=1140, lr=9.99989e-05, gnorm=6.929, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12794 2021-06-18 22:12:11 | INFO | train_inner | epoch 001: 1155 / 3002 loss=3.12, ppl=8.69, wps=5913.8, ups=0.09, wpb=64842, bsz=128, num_updates=1141, lr=9.99989e-05, gnorm=2.84, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12805 2021-06-18 22:12:22 | INFO | train_inner | epoch 001: 1156 / 3002 loss=3.121, ppl=8.7, wps=5873.5, ups=0.09, wpb=64802, bsz=128, num_updates=1142, lr=9.99989e-05, gnorm=2.568, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12816 2021-06-18 22:12:33 | INFO | train_inner | epoch 001: 1157 / 3002 loss=3.313, ppl=9.94, wps=5840.6, ups=0.09, wpb=64754, bsz=128, num_updates=1143, lr=9.99989e-05, gnorm=2.844, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12827 2021-06-18 22:12:44 | INFO | train_inner | epoch 001: 1158 / 3002 loss=3.143, ppl=8.84, wps=5881.4, ups=0.09, wpb=64838, bsz=128, num_updates=1144, lr=9.99988e-05, gnorm=2.559, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12838 2021-06-18 22:12:55 | INFO | train_inner | epoch 001: 1159 / 3002 loss=2.942, ppl=7.69, wps=5785.3, ups=0.09, wpb=64830, bsz=128, num_updates=1145, lr=9.99988e-05, gnorm=2.556, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12849 2021-06-18 22:13:06 | INFO | train_inner | epoch 001: 1160 / 3002 loss=3.104, ppl=8.6, wps=5789.8, ups=0.09, wpb=64862, bsz=128, num_updates=1146, lr=9.99988e-05, gnorm=2.659, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12861 2021-06-18 22:13:17 | INFO | train_inner | epoch 001: 1161 / 3002 loss=3.329, ppl=10.05, wps=5913, ups=0.09, wpb=64725, bsz=128, num_updates=1147, lr=9.99988e-05, gnorm=2.818, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12872 2021-06-18 22:13:28 | INFO | train_inner | epoch 001: 1162 / 3002 loss=3.225, ppl=9.35, wps=5962.2, ups=0.09, wpb=64751, bsz=128, num_updates=1148, lr=9.99988e-05, gnorm=2.641, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12882 2021-06-18 22:13:39 | INFO | train_inner | epoch 001: 1163 / 3002 loss=3.043, ppl=8.24, wps=5837.4, ups=0.09, wpb=64846, bsz=128, num_updates=1149, lr=9.99988e-05, gnorm=3.128, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12894 2021-06-18 22:13:50 | INFO | train_inner | epoch 001: 1164 / 3002 loss=3.171, ppl=9, wps=5906.4, ups=0.09, wpb=64843, bsz=128, num_updates=1150, lr=9.99988e-05, gnorm=2.785, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12905 2021-06-18 22:14:01 | INFO | train_inner | epoch 001: 1165 / 3002 loss=3.12, ppl=8.69, wps=5884, ups=0.09, wpb=64703, bsz=128, num_updates=1151, lr=9.99988e-05, gnorm=2.687, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12916 2021-06-18 22:14:12 | INFO | train_inner | epoch 001: 1166 / 3002 loss=3.082, ppl=8.47, wps=5987.2, ups=0.09, wpb=64836, bsz=128, num_updates=1152, lr=9.99988e-05, gnorm=2.889, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12926 2021-06-18 22:14:23 | INFO | train_inner | epoch 001: 1167 / 3002 loss=3.111, ppl=8.64, wps=5966.8, ups=0.09, wpb=64852, bsz=128, num_updates=1153, lr=9.99988e-05, gnorm=2.859, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12937 2021-06-18 22:14:34 | INFO | train_inner | epoch 001: 1168 / 3002 loss=3.043, ppl=8.24, wps=5835.4, ups=0.09, wpb=64755, bsz=128, num_updates=1154, lr=9.99988e-05, gnorm=2.593, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12948 2021-06-18 22:14:45 | INFO | train_inner | epoch 001: 1169 / 3002 loss=3.26, ppl=9.58, wps=5781.5, ups=0.09, wpb=64826, bsz=128, num_updates=1155, lr=9.99988e-05, gnorm=2.629, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12960 2021-06-18 22:14:56 | INFO | train_inner | epoch 001: 1170 / 3002 loss=3.154, ppl=8.9, wps=5838.4, ups=0.09, wpb=64764, bsz=128, num_updates=1156, lr=9.99988e-05, gnorm=6.202, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12971 2021-06-18 22:15:07 | INFO | train_inner | epoch 001: 1171 / 3002 loss=3.098, ppl=8.56, wps=5914.8, ups=0.09, wpb=64857, bsz=128, num_updates=1157, lr=9.99987e-05, gnorm=2.665, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12982 2021-06-18 22:15:18 | INFO | train_inner | epoch 001: 1172 / 3002 loss=2.997, ppl=7.98, wps=5912.4, ups=0.09, wpb=64875, bsz=128, num_updates=1158, lr=9.99987e-05, gnorm=2.66, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12993 2021-06-18 22:15:29 | INFO | train_inner | epoch 001: 1173 / 3002 loss=2.981, ppl=7.9, wps=5939.8, ups=0.09, wpb=64877, bsz=128, num_updates=1159, lr=9.99987e-05, gnorm=2.563, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13004 2021-06-18 22:15:40 | INFO | train_inner | epoch 001: 1174 / 3002 loss=2.985, ppl=7.92, wps=5838.4, ups=0.09, wpb=64819, bsz=128, num_updates=1160, lr=9.99987e-05, gnorm=2.483, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13015 2021-06-18 22:15:51 | INFO | train_inner | epoch 001: 1175 / 3002 loss=3.05, ppl=8.28, wps=6003, ups=0.09, wpb=64771, bsz=128, num_updates=1161, lr=9.99987e-05, gnorm=2.674, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13025 2021-06-18 22:16:02 | INFO | train_inner | epoch 001: 1176 / 3002 loss=3.048, ppl=8.27, wps=5921.4, ups=0.09, wpb=64885, bsz=128, num_updates=1162, lr=9.99987e-05, gnorm=2.658, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13036 2021-06-18 22:16:13 | INFO | train_inner | epoch 001: 1177 / 3002 loss=3.236, ppl=9.42, wps=5720.7, ups=0.09, wpb=64863, bsz=128, num_updates=1163, lr=9.99987e-05, gnorm=2.71, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13048 2021-06-18 22:16:24 | INFO | train_inner | epoch 001: 1178 / 3002 loss=3.165, ppl=8.97, wps=5851.7, ups=0.09, wpb=64861, bsz=128, num_updates=1164, lr=9.99987e-05, gnorm=2.669, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13059 2021-06-18 22:16:36 | INFO | train_inner | epoch 001: 1179 / 3002 loss=3.1, ppl=8.58, wps=5824.4, ups=0.09, wpb=64833, bsz=128, num_updates=1165, lr=9.99987e-05, gnorm=2.525, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13070 2021-06-18 22:16:46 | INFO | train_inner | epoch 001: 1180 / 3002 loss=3.067, ppl=8.38, wps=6028, ups=0.09, wpb=64938, bsz=128, num_updates=1166, lr=9.99987e-05, gnorm=2.591, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13081 2021-06-18 22:16:57 | INFO | train_inner | epoch 001: 1181 / 3002 loss=3.251, ppl=9.52, wps=5876.4, ups=0.09, wpb=64875, bsz=128, num_updates=1167, lr=9.99987e-05, gnorm=2.585, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13092 2021-06-18 22:17:08 | INFO | train_inner | epoch 001: 1182 / 3002 loss=2.977, ppl=7.87, wps=6009.7, ups=0.09, wpb=64900, bsz=128, num_updates=1168, lr=9.99987e-05, gnorm=2.514, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13103 2021-06-18 22:17:19 | INFO | train_inner | epoch 001: 1183 / 3002 loss=3.1, ppl=8.57, wps=5836.8, ups=0.09, wpb=64846, bsz=128, num_updates=1169, lr=9.99986e-05, gnorm=2.642, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13114 2021-06-18 22:17:30 | INFO | train_inner | epoch 001: 1184 / 3002 loss=3.067, ppl=8.38, wps=5871.9, ups=0.09, wpb=64790, bsz=128, num_updates=1170, lr=9.99986e-05, gnorm=2.585, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13125 2021-06-18 22:17:41 | INFO | train_inner | epoch 001: 1185 / 3002 loss=3.048, ppl=8.27, wps=5971.4, ups=0.09, wpb=64810, bsz=128, num_updates=1171, lr=9.99986e-05, gnorm=2.64, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13136 2021-06-18 22:17:52 | INFO | train_inner | epoch 001: 1186 / 3002 loss=3.059, ppl=8.34, wps=5925.1, ups=0.09, wpb=64785, bsz=128, num_updates=1172, lr=9.99986e-05, gnorm=2.586, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13146 2021-06-18 22:18:03 | INFO | train_inner | epoch 001: 1187 / 3002 loss=3.01, ppl=8.06, wps=5763.1, ups=0.09, wpb=64828, bsz=128, num_updates=1173, lr=9.99986e-05, gnorm=2.567, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13158 2021-06-18 22:18:15 | INFO | train_inner | epoch 001: 1188 / 3002 loss=3.072, ppl=8.41, wps=5763.8, ups=0.09, wpb=64814, bsz=128, num_updates=1174, lr=9.99986e-05, gnorm=2.499, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13169 2021-06-18 22:18:26 | INFO | train_inner | epoch 001: 1189 / 3002 loss=3.175, ppl=9.03, wps=5795.2, ups=0.09, wpb=64811, bsz=128, num_updates=1175, lr=9.99986e-05, gnorm=2.538, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13180 2021-06-18 22:18:37 | INFO | train_inner | epoch 001: 1190 / 3002 loss=2.988, ppl=7.93, wps=5980.4, ups=0.09, wpb=64852, bsz=128, num_updates=1176, lr=9.99986e-05, gnorm=2.563, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13191 2021-06-18 22:18:48 | INFO | train_inner | epoch 001: 1191 / 3002 loss=3.109, ppl=8.63, wps=5797.7, ups=0.09, wpb=64846, bsz=128, num_updates=1177, lr=9.99986e-05, gnorm=2.499, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13202 2021-06-18 22:18:59 | INFO | train_inner | epoch 001: 1192 / 3002 loss=2.939, ppl=7.67, wps=5759.5, ups=0.09, wpb=64756, bsz=128, num_updates=1178, lr=9.99986e-05, gnorm=2.53, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13213 2021-06-18 22:19:10 | INFO | train_inner | epoch 001: 1193 / 3002 loss=3.049, ppl=8.28, wps=5971.6, ups=0.09, wpb=64876, bsz=128, num_updates=1179, lr=9.99986e-05, gnorm=2.626, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13224 2021-06-18 22:19:21 | INFO | train_inner | epoch 001: 1194 / 3002 loss=2.946, ppl=7.71, wps=5841.5, ups=0.09, wpb=64837, bsz=128, num_updates=1180, lr=9.99986e-05, gnorm=2.566, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13235 2021-06-18 22:19:32 | INFO | train_inner | epoch 001: 1195 / 3002 loss=3.108, ppl=8.62, wps=5950.3, ups=0.09, wpb=64777, bsz=128, num_updates=1181, lr=9.99986e-05, gnorm=4.18, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13246 2021-06-18 22:19:43 | INFO | train_inner | epoch 001: 1196 / 3002 loss=3.18, ppl=9.06, wps=5891, ups=0.09, wpb=64807, bsz=128, num_updates=1182, lr=9.99985e-05, gnorm=2.596, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13257 2021-06-18 22:19:54 | INFO | train_inner | epoch 001: 1197 / 3002 loss=3.074, ppl=8.42, wps=5827.3, ups=0.09, wpb=64748, bsz=128, num_updates=1183, lr=9.99985e-05, gnorm=6.758, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13268 2021-06-18 22:20:05 | INFO | train_inner | epoch 001: 1198 / 3002 loss=3.239, ppl=9.44, wps=5854.2, ups=0.09, wpb=64812, bsz=128, num_updates=1184, lr=9.99985e-05, gnorm=2.896, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13279 2021-06-18 22:20:16 | INFO | train_inner | epoch 001: 1199 / 3002 loss=3, ppl=8, wps=5826.1, ups=0.09, wpb=64871, bsz=128, num_updates=1185, lr=9.99985e-05, gnorm=2.635, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13291 2021-06-18 22:20:27 | INFO | train_inner | epoch 001: 1200 / 3002 loss=2.875, ppl=7.34, wps=5891.1, ups=0.09, wpb=64906, bsz=128, num_updates=1186, lr=9.99985e-05, gnorm=2.524, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13302 2021-06-18 22:20:38 | INFO | train_inner | epoch 001: 1201 / 3002 loss=3.153, ppl=8.9, wps=5806.4, ups=0.09, wpb=64898, bsz=128, num_updates=1187, lr=9.99985e-05, gnorm=2.873, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13313 2021-06-18 22:20:50 | INFO | train_inner | epoch 001: 1202 / 3002 loss=3.087, ppl=8.49, wps=5862.1, ups=0.09, wpb=64859, bsz=128, num_updates=1188, lr=9.99985e-05, gnorm=2.905, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13324 2021-06-18 22:21:00 | INFO | train_inner | epoch 001: 1203 / 3002 loss=3.005, ppl=8.03, wps=5973.9, ups=0.09, wpb=64871, bsz=128, num_updates=1189, lr=9.99985e-05, gnorm=2.901, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13335 2021-06-18 22:21:11 | INFO | train_inner | epoch 001: 1204 / 3002 loss=3.124, ppl=8.72, wps=5832.4, ups=0.09, wpb=64822, bsz=128, num_updates=1190, lr=9.99985e-05, gnorm=2.85, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13346 2021-06-18 22:21:23 | INFO | train_inner | epoch 001: 1205 / 3002 loss=3.107, ppl=8.62, wps=5798, ups=0.09, wpb=64904, bsz=128, num_updates=1191, lr=9.99985e-05, gnorm=2.683, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13357 2021-06-18 22:21:34 | INFO | train_inner | epoch 001: 1206 / 3002 loss=3.059, ppl=8.33, wps=5889.3, ups=0.09, wpb=64742, bsz=128, num_updates=1192, lr=9.99985e-05, gnorm=2.609, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13368 2021-06-18 22:21:45 | INFO | train_inner | epoch 001: 1207 / 3002 loss=3.066, ppl=8.37, wps=5848.8, ups=0.09, wpb=64872, bsz=128, num_updates=1193, lr=9.99985e-05, gnorm=2.845, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13379 2021-06-18 22:21:56 | INFO | train_inner | epoch 001: 1208 / 3002 loss=3.162, ppl=8.95, wps=5903, ups=0.09, wpb=64800, bsz=128, num_updates=1194, lr=9.99984e-05, gnorm=2.533, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13390 2021-06-18 22:22:07 | INFO | train_inner | epoch 001: 1209 / 3002 loss=3.047, ppl=8.26, wps=5883.9, ups=0.09, wpb=64805, bsz=128, num_updates=1195, lr=9.99984e-05, gnorm=2.675, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13401 2021-06-18 22:22:18 | INFO | train_inner | epoch 001: 1210 / 3002 loss=2.942, ppl=7.68, wps=5852.2, ups=0.09, wpb=64860, bsz=128, num_updates=1196, lr=9.99984e-05, gnorm=2.602, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13412 2021-06-18 22:22:29 | INFO | train_inner | epoch 001: 1211 / 3002 loss=3.023, ppl=8.13, wps=5819.8, ups=0.09, wpb=64794, bsz=128, num_updates=1197, lr=9.99984e-05, gnorm=2.642, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13423 2021-06-18 22:22:40 | INFO | train_inner | epoch 001: 1212 / 3002 loss=3.116, ppl=8.67, wps=5829.5, ups=0.09, wpb=64866, bsz=128, num_updates=1198, lr=9.99984e-05, gnorm=2.685, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13434 2021-06-18 22:22:51 | INFO | train_inner | epoch 001: 1213 / 3002 loss=3.029, ppl=8.16, wps=6028.2, ups=0.09, wpb=64872, bsz=128, num_updates=1199, lr=9.99984e-05, gnorm=2.597, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13445 2021-06-18 22:23:02 | INFO | train_inner | epoch 001: 1214 / 3002 loss=3.048, ppl=8.27, wps=5888.4, ups=0.09, wpb=64809, bsz=128, num_updates=1200, lr=9.99984e-05, gnorm=2.816, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13456 2021-06-18 22:23:13 | INFO | train_inner | epoch 001: 1215 / 3002 loss=3.338, ppl=10.11, wps=5853.1, ups=0.09, wpb=64905, bsz=128, num_updates=1201, lr=9.99984e-05, gnorm=2.648, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13467 2021-06-18 22:23:24 | INFO | train_inner | epoch 001: 1216 / 3002 loss=3.118, ppl=8.68, wps=5859.9, ups=0.09, wpb=64758, bsz=128, num_updates=1202, lr=9.99984e-05, gnorm=5.829, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13478 2021-06-18 22:23:35 | INFO | train_inner | epoch 001: 1217 / 3002 loss=3.316, ppl=9.96, wps=5816.5, ups=0.09, wpb=64747, bsz=128, num_updates=1203, lr=9.99984e-05, gnorm=2.617, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13489 2021-06-18 22:23:46 | INFO | train_inner | epoch 001: 1218 / 3002 loss=3, ppl=8, wps=5917.2, ups=0.09, wpb=64807, bsz=128, num_updates=1204, lr=9.99984e-05, gnorm=2.591, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13500 2021-06-18 22:23:57 | INFO | train_inner | epoch 001: 1219 / 3002 loss=3.063, ppl=8.36, wps=5882.7, ups=0.09, wpb=64812, bsz=128, num_updates=1205, lr=9.99984e-05, gnorm=2.705, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13511 2021-06-18 22:24:08 | INFO | train_inner | epoch 001: 1220 / 3002 loss=3.11, ppl=8.63, wps=5940, ups=0.09, wpb=64895, bsz=128, num_updates=1206, lr=9.99984e-05, gnorm=2.466, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13522 2021-06-18 22:24:19 | INFO | train_inner | epoch 001: 1221 / 3002 loss=3.148, ppl=8.86, wps=5856.1, ups=0.09, wpb=64818, bsz=128, num_updates=1207, lr=9.99983e-05, gnorm=2.641, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13533 2021-06-18 22:24:30 | INFO | train_inner | epoch 001: 1222 / 3002 loss=3.037, ppl=8.21, wps=5966.3, ups=0.09, wpb=64912, bsz=128, num_updates=1208, lr=9.99983e-05, gnorm=2.778, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13544 2021-06-18 22:24:41 | INFO | train_inner | epoch 001: 1223 / 3002 loss=3.075, ppl=8.43, wps=5874.3, ups=0.09, wpb=64807, bsz=128, num_updates=1209, lr=9.99983e-05, gnorm=2.568, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13555 2021-06-18 22:24:52 | INFO | train_inner | epoch 001: 1224 / 3002 loss=3.015, ppl=8.08, wps=5890.4, ups=0.09, wpb=64904, bsz=128, num_updates=1210, lr=9.99983e-05, gnorm=4.254, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13566 2021-06-18 22:25:03 | INFO | train_inner | epoch 001: 1225 / 3002 loss=3.134, ppl=8.78, wps=5805.6, ups=0.09, wpb=64851, bsz=128, num_updates=1211, lr=9.99983e-05, gnorm=2.853, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13578 2021-06-18 22:25:14 | INFO | train_inner | epoch 001: 1226 / 3002 loss=3.186, ppl=9.1, wps=5975.3, ups=0.09, wpb=64805, bsz=128, num_updates=1212, lr=9.99983e-05, gnorm=3.364, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13588 2021-06-18 22:25:25 | INFO | train_inner | epoch 001: 1227 / 3002 loss=3.008, ppl=8.05, wps=5842.8, ups=0.09, wpb=64857, bsz=128, num_updates=1213, lr=9.99983e-05, gnorm=2.797, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13600 2021-06-18 22:25:36 | INFO | train_inner | epoch 001: 1228 / 3002 loss=3.039, ppl=8.22, wps=5748.8, ups=0.09, wpb=64797, bsz=128, num_updates=1214, lr=9.99983e-05, gnorm=2.647, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13611 2021-06-18 22:25:47 | INFO | train_inner | epoch 001: 1229 / 3002 loss=3.034, ppl=8.19, wps=5879.2, ups=0.09, wpb=64881, bsz=128, num_updates=1215, lr=9.99983e-05, gnorm=5.101, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13622 2021-06-18 22:25:59 | INFO | train_inner | epoch 001: 1230 / 3002 loss=2.985, ppl=7.92, wps=5805.9, ups=0.09, wpb=64844, bsz=128, num_updates=1216, lr=9.99983e-05, gnorm=2.706, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13633 2021-06-18 22:26:10 | INFO | train_inner | epoch 001: 1231 / 3002 loss=3.165, ppl=8.97, wps=5815.8, ups=0.09, wpb=64763, bsz=128, num_updates=1217, lr=9.99983e-05, gnorm=2.653, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13644 2021-06-18 22:26:21 | INFO | train_inner | epoch 001: 1232 / 3002 loss=3.086, ppl=8.49, wps=5802.1, ups=0.09, wpb=64858, bsz=128, num_updates=1218, lr=9.99983e-05, gnorm=3.13, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13655 2021-06-18 22:26:32 | INFO | train_inner | epoch 001: 1233 / 3002 loss=3.023, ppl=8.13, wps=5866.3, ups=0.09, wpb=64859, bsz=128, num_updates=1219, lr=9.99982e-05, gnorm=3.276, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13666 2021-06-18 22:26:43 | INFO | train_inner | epoch 001: 1234 / 3002 loss=3.092, ppl=8.53, wps=5965, ups=0.09, wpb=64831, bsz=128, num_updates=1220, lr=9.99982e-05, gnorm=2.63, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13677 2021-06-18 22:26:54 | INFO | train_inner | epoch 001: 1235 / 3002 loss=3.25, ppl=9.51, wps=5943.9, ups=0.09, wpb=64845, bsz=128, num_updates=1221, lr=9.99982e-05, gnorm=2.95, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13688 2021-06-18 22:27:05 | INFO | train_inner | epoch 001: 1236 / 3002 loss=3.137, ppl=8.8, wps=5866.6, ups=0.09, wpb=64750, bsz=128, num_updates=1222, lr=9.99982e-05, gnorm=2.773, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13699 2021-06-18 22:27:16 | INFO | train_inner | epoch 001: 1237 / 3002 loss=3.056, ppl=8.32, wps=5785.6, ups=0.09, wpb=64875, bsz=128, num_updates=1223, lr=9.99982e-05, gnorm=2.783, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13710 2021-06-18 22:27:27 | INFO | train_inner | epoch 001: 1238 / 3002 loss=3.156, ppl=8.91, wps=5942.1, ups=0.09, wpb=64833, bsz=128, num_updates=1224, lr=9.99982e-05, gnorm=2.667, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13721 2021-06-18 22:27:38 | INFO | train_inner | epoch 001: 1239 / 3002 loss=2.976, ppl=7.87, wps=6069.4, ups=0.09, wpb=64858, bsz=128, num_updates=1225, lr=9.99982e-05, gnorm=2.527, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13732 2021-06-18 22:27:48 | INFO | train_inner | epoch 001: 1240 / 3002 loss=3.04, ppl=8.23, wps=5982.8, ups=0.09, wpb=64816, bsz=128, num_updates=1226, lr=9.99982e-05, gnorm=2.693, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13743 2021-06-18 22:28:00 | INFO | train_inner | epoch 001: 1241 / 3002 loss=3.128, ppl=8.74, wps=5817.8, ups=0.09, wpb=64886, bsz=128, num_updates=1227, lr=9.99982e-05, gnorm=2.665, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13754 2021-06-18 22:28:11 | INFO | train_inner | epoch 001: 1242 / 3002 loss=3.043, ppl=8.24, wps=5887.8, ups=0.09, wpb=64875, bsz=128, num_updates=1228, lr=9.99982e-05, gnorm=2.585, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13765 2021-06-18 22:28:22 | INFO | train_inner | epoch 001: 1243 / 3002 loss=2.805, ppl=6.99, wps=5844, ups=0.09, wpb=64873, bsz=128, num_updates=1229, lr=9.99982e-05, gnorm=4.171, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13776 2021-06-18 22:28:33 | INFO | train_inner | epoch 001: 1244 / 3002 loss=2.923, ppl=7.59, wps=5807.1, ups=0.09, wpb=64809, bsz=128, num_updates=1230, lr=9.99982e-05, gnorm=5.66, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13787 2021-06-18 22:28:44 | INFO | train_inner | epoch 001: 1245 / 3002 loss=3.057, ppl=8.32, wps=5838.8, ups=0.09, wpb=64794, bsz=128, num_updates=1231, lr=9.99982e-05, gnorm=2.882, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13798 2021-06-18 22:28:55 | INFO | train_inner | epoch 001: 1246 / 3002 loss=3.066, ppl=8.37, wps=5862.1, ups=0.09, wpb=64822, bsz=128, num_updates=1232, lr=9.99981e-05, gnorm=2.865, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13809 2021-06-18 22:29:06 | INFO | train_inner | epoch 001: 1247 / 3002 loss=3.213, ppl=9.27, wps=5905.1, ups=0.09, wpb=64802, bsz=128, num_updates=1233, lr=9.99981e-05, gnorm=2.631, loss_scale=1, train_wall=10, gb_free=2.8, wall=13820 2021-06-18 22:29:17 | INFO | train_inner | epoch 001: 1248 / 3002 loss=3.084, ppl=8.48, wps=5757.9, ups=0.09, wpb=64854, bsz=128, num_updates=1234, lr=9.99981e-05, gnorm=2.679, loss_scale=1, train_wall=11, gb_free=2.8, wall=13832 2021-06-18 22:29:28 | INFO | train_inner | epoch 001: 1249 / 3002 loss=3.062, ppl=8.35, wps=5875.7, ups=0.09, wpb=64782, bsz=128, num_updates=1235, lr=9.99981e-05, gnorm=2.742, loss_scale=1, train_wall=11, gb_free=2.8, wall=13843 2021-06-18 22:29:39 | INFO | train_inner | epoch 001: 1250 / 3002 loss=3.225, ppl=9.35, wps=5860.1, ups=0.09, wpb=64876, bsz=128, num_updates=1236, lr=9.99981e-05, gnorm=3.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=13854 2021-06-18 22:29:50 | INFO | train_inner | epoch 001: 1251 / 3002 loss=3.072, ppl=8.41, wps=5917.3, ups=0.09, wpb=64780, bsz=128, num_updates=1237, lr=9.99981e-05, gnorm=2.85, loss_scale=1, train_wall=11, gb_free=2.8, wall=13865 2021-06-18 22:30:01 | INFO | train_inner | epoch 001: 1252 / 3002 loss=3.209, ppl=9.25, wps=5944.4, ups=0.09, wpb=64832, bsz=128, num_updates=1238, lr=9.99981e-05, gnorm=2.713, loss_scale=1, train_wall=10, gb_free=2.8, wall=13876 2021-06-18 22:30:12 | INFO | train_inner | epoch 001: 1253 / 3002 loss=3.053, ppl=8.3, wps=5857.7, ups=0.09, wpb=64940, bsz=128, num_updates=1239, lr=9.99981e-05, gnorm=2.658, loss_scale=1, train_wall=11, gb_free=2.8, wall=13887 2021-06-18 22:30:23 | INFO | train_inner | epoch 001: 1254 / 3002 loss=3.051, ppl=8.29, wps=5840.5, ups=0.09, wpb=64811, bsz=128, num_updates=1240, lr=9.99981e-05, gnorm=2.574, loss_scale=1, train_wall=11, gb_free=2.8, wall=13898 2021-06-18 22:30:34 | INFO | train_inner | epoch 001: 1255 / 3002 loss=3.125, ppl=8.72, wps=5886.3, ups=0.09, wpb=64835, bsz=128, num_updates=1241, lr=9.99981e-05, gnorm=2.705, loss_scale=1, train_wall=11, gb_free=2.8, wall=13909 2021-06-18 22:30:45 | INFO | train_inner | epoch 001: 1256 / 3002 loss=2.967, ppl=7.82, wps=5924.8, ups=0.09, wpb=64832, bsz=128, num_updates=1242, lr=9.99981e-05, gnorm=2.558, loss_scale=1, train_wall=11, gb_free=2.8, wall=13920 2021-06-18 22:30:56 | INFO | train_inner | epoch 001: 1257 / 3002 loss=2.963, ppl=7.8, wps=5848.6, ups=0.09, wpb=64838, bsz=128, num_updates=1243, lr=9.99981e-05, gnorm=2.553, loss_scale=1, train_wall=11, gb_free=2.8, wall=13931 2021-06-18 22:31:07 | INFO | train_inner | epoch 001: 1258 / 3002 loss=3.045, ppl=8.25, wps=5974.7, ups=0.09, wpb=64853, bsz=128, num_updates=1244, lr=9.9998e-05, gnorm=2.538, loss_scale=1, train_wall=10, gb_free=2.8, wall=13942 2021-06-18 22:31:18 | INFO | train_inner | epoch 001: 1259 / 3002 loss=3.162, ppl=8.95, wps=5896.2, ups=0.09, wpb=64878, bsz=128, num_updates=1245, lr=9.9998e-05, gnorm=4.079, loss_scale=1, train_wall=11, gb_free=2.8, wall=13953 2021-06-18 22:31:30 | INFO | train_inner | epoch 001: 1260 / 3002 loss=3.177, ppl=9.04, wps=5760.2, ups=0.09, wpb=64797, bsz=128, num_updates=1246, lr=9.9998e-05, gnorm=2.672, loss_scale=1, train_wall=11, gb_free=2.8, wall=13964 2021-06-18 22:31:41 | INFO | train_inner | epoch 001: 1261 / 3002 loss=3.101, ppl=8.58, wps=5838.6, ups=0.09, wpb=64832, bsz=128, num_updates=1247, lr=9.9998e-05, gnorm=2.715, loss_scale=1, train_wall=11, gb_free=2.8, wall=13975 2021-06-18 22:31:52 | INFO | train_inner | epoch 001: 1262 / 3002 loss=2.835, ppl=7.14, wps=5747.3, ups=0.09, wpb=64854, bsz=128, num_updates=1248, lr=9.9998e-05, gnorm=2.447, loss_scale=1, train_wall=11, gb_free=2.8, wall=13986 2021-06-18 22:32:03 | INFO | train_inner | epoch 001: 1263 / 3002 loss=3.138, ppl=8.81, wps=5801.9, ups=0.09, wpb=64809, bsz=128, num_updates=1249, lr=9.9998e-05, gnorm=2.544, loss_scale=1, train_wall=11, gb_free=2.8, wall=13997 2021-06-18 22:32:14 | INFO | train_inner | epoch 001: 1264 / 3002 loss=3.011, ppl=8.06, wps=5828.6, ups=0.09, wpb=64906, bsz=128, num_updates=1250, lr=9.9998e-05, gnorm=2.583, loss_scale=1, train_wall=11, gb_free=2.8, wall=14009 2021-06-18 22:32:25 | INFO | train_inner | epoch 001: 1265 / 3002 loss=2.905, ppl=7.49, wps=5906.3, ups=0.09, wpb=64832, bsz=128, num_updates=1251, lr=9.9998e-05, gnorm=2.502, loss_scale=1, train_wall=11, gb_free=2.8, wall=14020 2021-06-18 22:32:36 | INFO | train_inner | epoch 001: 1266 / 3002 loss=3.084, ppl=8.48, wps=5827.4, ups=0.09, wpb=64797, bsz=128, num_updates=1252, lr=9.9998e-05, gnorm=3.686, loss_scale=1, train_wall=11, gb_free=2.8, wall=14031 2021-06-18 22:32:48 | INFO | train_inner | epoch 001: 1267 / 3002 loss=3.169, ppl=9, wps=5743.3, ups=0.09, wpb=64748, bsz=128, num_updates=1253, lr=9.9998e-05, gnorm=2.53, loss_scale=1, train_wall=11, gb_free=2.8, wall=14042 2021-06-18 22:32:59 | INFO | train_inner | epoch 001: 1268 / 3002 loss=3.028, ppl=8.16, wps=5885.5, ups=0.09, wpb=64831, bsz=128, num_updates=1254, lr=9.9998e-05, gnorm=2.618, loss_scale=1, train_wall=11, gb_free=2.8, wall=14053 2021-06-18 22:33:10 | INFO | train_inner | epoch 001: 1269 / 3002 loss=3.125, ppl=8.72, wps=5829.7, ups=0.09, wpb=64832, bsz=128, num_updates=1255, lr=9.9998e-05, gnorm=2.527, loss_scale=1, train_wall=11, gb_free=2.8, wall=14064 2021-06-18 22:33:21 | INFO | train_inner | epoch 001: 1270 / 3002 loss=2.99, ppl=7.95, wps=5930.3, ups=0.09, wpb=64897, bsz=128, num_updates=1256, lr=9.9998e-05, gnorm=2.597, loss_scale=1, train_wall=11, gb_free=2.8, wall=14075 2021-06-18 22:33:32 | INFO | train_inner | epoch 001: 1271 / 3002 loss=2.995, ppl=7.97, wps=5844.8, ups=0.09, wpb=64876, bsz=128, num_updates=1257, lr=9.99979e-05, gnorm=2.54, loss_scale=1, train_wall=11, gb_free=2.8, wall=14086 2021-06-18 22:33:43 | INFO | train_inner | epoch 001: 1272 / 3002 loss=3.064, ppl=8.36, wps=5779.7, ups=0.09, wpb=64871, bsz=128, num_updates=1258, lr=9.99979e-05, gnorm=2.739, loss_scale=1, train_wall=11, gb_free=2.8, wall=14097 2021-06-18 22:33:54 | INFO | train_inner | epoch 001: 1273 / 3002 loss=3.014, ppl=8.08, wps=5955.1, ups=0.09, wpb=64860, bsz=128, num_updates=1259, lr=9.99979e-05, gnorm=3.225, loss_scale=1, train_wall=10, gb_free=2.8, wall=14108 2021-06-18 22:34:05 | INFO | train_inner | epoch 001: 1274 / 3002 loss=3.101, ppl=8.58, wps=5858.3, ups=0.09, wpb=64786, bsz=128, num_updates=1260, lr=9.99979e-05, gnorm=2.479, loss_scale=1, train_wall=11, gb_free=2.8, wall=14119 2021-06-18 22:34:16 | INFO | train_inner | epoch 001: 1275 / 3002 loss=3.329, ppl=10.05, wps=5821.6, ups=0.09, wpb=64827, bsz=128, num_updates=1261, lr=9.99979e-05, gnorm=2.689, loss_scale=1, train_wall=11, gb_free=2.8, wall=14130 2021-06-18 22:34:27 | INFO | train_inner | epoch 001: 1276 / 3002 loss=3.067, ppl=8.38, wps=5859.2, ups=0.09, wpb=64823, bsz=128, num_updates=1262, lr=9.99979e-05, gnorm=2.575, loss_scale=1, train_wall=11, gb_free=2.8, wall=14142 2021-06-18 22:34:38 | INFO | train_inner | epoch 001: 1277 / 3002 loss=3.228, ppl=9.37, wps=5810, ups=0.09, wpb=64834, bsz=128, num_updates=1263, lr=9.99979e-05, gnorm=2.592, loss_scale=1, train_wall=11, gb_free=2.8, wall=14153 2021-06-18 22:34:49 | INFO | train_inner | epoch 001: 1278 / 3002 loss=3.027, ppl=8.15, wps=5837.6, ups=0.09, wpb=64875, bsz=128, num_updates=1264, lr=9.99979e-05, gnorm=2.615, loss_scale=1, train_wall=11, gb_free=2.8, wall=14164 2021-06-18 22:35:01 | INFO | train_inner | epoch 001: 1279 / 3002 loss=3.027, ppl=8.15, wps=5871.6, ups=0.09, wpb=64881, bsz=128, num_updates=1265, lr=9.99979e-05, gnorm=2.631, loss_scale=1, train_wall=11, gb_free=2.8, wall=14175 2021-06-18 22:35:12 | INFO | train_inner | epoch 001: 1280 / 3002 loss=2.975, ppl=7.86, wps=5809.9, ups=0.09, wpb=64852, bsz=128, num_updates=1266, lr=9.99979e-05, gnorm=2.433, loss_scale=1, train_wall=11, gb_free=2.8, wall=14186 2021-06-18 22:35:23 | INFO | train_inner | epoch 001: 1281 / 3002 loss=3.096, ppl=8.55, wps=5829.8, ups=0.09, wpb=64812, bsz=128, num_updates=1267, lr=9.99979e-05, gnorm=2.677, loss_scale=1, train_wall=11, gb_free=2.8, wall=14197 2021-06-18 22:35:34 | INFO | train_inner | epoch 001: 1282 / 3002 loss=3.168, ppl=8.99, wps=5923.8, ups=0.09, wpb=64815, bsz=128, num_updates=1268, lr=9.99979e-05, gnorm=2.635, loss_scale=1, train_wall=10, gb_free=2.8, wall=14208 2021-06-18 22:35:45 | INFO | train_inner | epoch 001: 1283 / 3002 loss=2.936, ppl=7.65, wps=5884, ups=0.09, wpb=64830, bsz=128, num_updates=1269, lr=9.99978e-05, gnorm=2.509, loss_scale=1, train_wall=11, gb_free=2.8, wall=14219 2021-06-18 22:35:56 | INFO | train_inner | epoch 001: 1284 / 3002 loss=3.009, ppl=8.05, wps=5806.9, ups=0.09, wpb=64782, bsz=128, num_updates=1270, lr=9.99978e-05, gnorm=2.542, loss_scale=1, train_wall=11, gb_free=2.8, wall=14230 2021-06-18 22:36:07 | INFO | train_inner | epoch 001: 1285 / 3002 loss=3.073, ppl=8.42, wps=5845.3, ups=0.09, wpb=64899, bsz=128, num_updates=1271, lr=9.99978e-05, gnorm=2.577, loss_scale=1, train_wall=11, gb_free=2.8, wall=14241 2021-06-18 22:36:18 | INFO | train_inner | epoch 001: 1286 / 3002 loss=3.077, ppl=8.44, wps=5913.1, ups=0.09, wpb=64793, bsz=128, num_updates=1272, lr=9.99978e-05, gnorm=2.578, loss_scale=1, train_wall=11, gb_free=2.8, wall=14252 2021-06-18 22:36:29 | INFO | train_inner | epoch 001: 1287 / 3002 loss=3.024, ppl=8.14, wps=5975.8, ups=0.09, wpb=64906, bsz=128, num_updates=1273, lr=9.99978e-05, gnorm=2.833, loss_scale=1, train_wall=10, gb_free=2.8, wall=14263 2021-06-18 22:36:40 | INFO | train_inner | epoch 001: 1288 / 3002 loss=2.868, ppl=7.3, wps=5927.4, ups=0.09, wpb=64891, bsz=128, num_updates=1274, lr=9.99978e-05, gnorm=2.757, loss_scale=1, train_wall=10, gb_free=2.8, wall=14274 2021-06-18 22:36:51 | INFO | train_inner | epoch 001: 1289 / 3002 loss=3.026, ppl=8.14, wps=5868.4, ups=0.09, wpb=64839, bsz=128, num_updates=1275, lr=9.99978e-05, gnorm=2.625, loss_scale=1, train_wall=11, gb_free=2.8, wall=14285 2021-06-18 22:37:02 | INFO | train_inner | epoch 001: 1290 / 3002 loss=3.119, ppl=8.69, wps=5979.5, ups=0.09, wpb=64776, bsz=128, num_updates=1276, lr=9.99978e-05, gnorm=4.228, loss_scale=1, train_wall=10, gb_free=2.8, wall=14296 2021-06-18 22:37:13 | INFO | train_inner | epoch 001: 1291 / 3002 loss=2.983, ppl=7.91, wps=5911.7, ups=0.09, wpb=64837, bsz=128, num_updates=1277, lr=9.99978e-05, gnorm=2.55, loss_scale=1, train_wall=11, gb_free=2.8, wall=14307 2021-06-18 22:37:24 | INFO | train_inner | epoch 001: 1292 / 3002 loss=3.089, ppl=8.51, wps=5783.7, ups=0.09, wpb=64825, bsz=128, num_updates=1278, lr=9.99978e-05, gnorm=2.597, loss_scale=1, train_wall=11, gb_free=2.8, wall=14318 2021-06-18 22:37:35 | INFO | train_inner | epoch 001: 1293 / 3002 loss=3.08, ppl=8.46, wps=5797, ups=0.09, wpb=64797, bsz=128, num_updates=1279, lr=9.99978e-05, gnorm=2.544, loss_scale=1, train_wall=11, gb_free=2.8, wall=14329 2021-06-18 22:37:46 | INFO | train_inner | epoch 001: 1294 / 3002 loss=3.033, ppl=8.19, wps=5963.8, ups=0.09, wpb=64883, bsz=128, num_updates=1280, lr=9.99978e-05, gnorm=2.748, loss_scale=1, train_wall=10, gb_free=2.8, wall=14340 2021-06-18 22:37:57 | INFO | train_inner | epoch 001: 1295 / 3002 loss=3.273, ppl=9.67, wps=5903.5, ups=0.09, wpb=64751, bsz=128, num_updates=1281, lr=9.99978e-05, gnorm=5.855, loss_scale=1, train_wall=11, gb_free=2.8, wall=14351 2021-06-18 22:38:08 | INFO | train_inner | epoch 001: 1296 / 3002 loss=2.982, ppl=7.9, wps=5753.5, ups=0.09, wpb=64867, bsz=128, num_updates=1282, lr=9.99977e-05, gnorm=2.443, loss_scale=1, train_wall=11, gb_free=2.8, wall=14362 2021-06-18 22:38:19 | INFO | train_inner | epoch 001: 1297 / 3002 loss=3.278, ppl=9.7, wps=5917.5, ups=0.09, wpb=64831, bsz=128, num_updates=1283, lr=9.99977e-05, gnorm=2.66, loss_scale=1, train_wall=11, gb_free=2.8, wall=14373 2021-06-18 22:38:30 | INFO | train_inner | epoch 001: 1298 / 3002 loss=3.061, ppl=8.35, wps=5886.6, ups=0.09, wpb=64872, bsz=128, num_updates=1284, lr=9.99977e-05, gnorm=2.804, loss_scale=1, train_wall=11, gb_free=2.8, wall=14384 2021-06-18 22:38:41 | INFO | train_inner | epoch 001: 1299 / 3002 loss=3.188, ppl=9.11, wps=5734, ups=0.09, wpb=64845, bsz=128, num_updates=1285, lr=9.99977e-05, gnorm=2.843, loss_scale=1, train_wall=11, gb_free=2.8, wall=14396 2021-06-18 22:38:53 | INFO | train_inner | epoch 001: 1300 / 3002 loss=3.039, ppl=8.22, wps=5865, ups=0.09, wpb=64851, bsz=128, num_updates=1286, lr=9.99977e-05, gnorm=2.453, loss_scale=1, train_wall=11, gb_free=2.8, wall=14407 2021-06-18 22:39:04 | INFO | train_inner | epoch 001: 1301 / 3002 loss=3.085, ppl=8.49, wps=5861.8, ups=0.09, wpb=64843, bsz=128, num_updates=1287, lr=9.99977e-05, gnorm=2.973, loss_scale=1, train_wall=11, gb_free=2.8, wall=14418 2021-06-18 22:39:15 | INFO | train_inner | epoch 001: 1302 / 3002 loss=3.086, ppl=8.49, wps=5818.1, ups=0.09, wpb=64803, bsz=128, num_updates=1288, lr=9.99977e-05, gnorm=2.579, loss_scale=1, train_wall=11, gb_free=2.8, wall=14429 2021-06-18 22:39:26 | INFO | train_inner | epoch 001: 1303 / 3002 loss=3.065, ppl=8.37, wps=5881.9, ups=0.09, wpb=64760, bsz=128, num_updates=1289, lr=9.99977e-05, gnorm=2.625, loss_scale=1, train_wall=11, gb_free=2.8, wall=14440 2021-06-18 22:39:37 | INFO | train_inner | epoch 001: 1304 / 3002 loss=3.007, ppl=8.04, wps=5857.7, ups=0.09, wpb=64880, bsz=128, num_updates=1290, lr=9.99977e-05, gnorm=2.7, loss_scale=1, train_wall=11, gb_free=2.8, wall=14451 2021-06-18 22:39:48 | INFO | train_inner | epoch 001: 1305 / 3002 loss=2.913, ppl=7.53, wps=5798.2, ups=0.09, wpb=64797, bsz=128, num_updates=1291, lr=9.99977e-05, gnorm=2.628, loss_scale=1, train_wall=11, gb_free=2.8, wall=14462 2021-06-18 22:39:59 | INFO | train_inner | epoch 001: 1306 / 3002 loss=2.978, ppl=7.88, wps=5848.9, ups=0.09, wpb=64843, bsz=128, num_updates=1292, lr=9.99977e-05, gnorm=2.584, loss_scale=1, train_wall=11, gb_free=2.8, wall=14473 2021-06-18 22:40:10 | INFO | train_inner | epoch 001: 1307 / 3002 loss=3.152, ppl=8.89, wps=5958.1, ups=0.09, wpb=64783, bsz=128, num_updates=1293, lr=9.99977e-05, gnorm=2.59, loss_scale=1, train_wall=10, gb_free=2.8, wall=14484 2021-06-18 22:40:21 | INFO | train_inner | epoch 001: 1308 / 3002 loss=3.051, ppl=8.29, wps=5754, ups=0.09, wpb=64824, bsz=128, num_updates=1294, lr=9.99976e-05, gnorm=2.636, loss_scale=1, train_wall=11, gb_free=2.8, wall=14496 2021-06-18 22:40:33 | INFO | train_inner | epoch 001: 1309 / 3002 loss=3.184, ppl=9.09, wps=5732, ups=0.09, wpb=64851, bsz=128, num_updates=1295, lr=9.99976e-05, gnorm=2.702, loss_scale=1, train_wall=11, gb_free=2.8, wall=14507 2021-06-18 22:40:44 | INFO | train_inner | epoch 001: 1310 / 3002 loss=3.08, ppl=8.46, wps=5830.8, ups=0.09, wpb=64804, bsz=128, num_updates=1296, lr=9.99976e-05, gnorm=2.562, loss_scale=1, train_wall=11, gb_free=2.8, wall=14518 2021-06-18 22:40:55 | INFO | train_inner | epoch 001: 1311 / 3002 loss=3.096, ppl=8.55, wps=5949.6, ups=0.09, wpb=64890, bsz=128, num_updates=1297, lr=9.99976e-05, gnorm=2.446, loss_scale=1, train_wall=10, gb_free=2.8, wall=14529 2021-06-18 22:41:05 | INFO | train_inner | epoch 001: 1312 / 3002 loss=2.957, ppl=7.76, wps=5914.9, ups=0.09, wpb=64774, bsz=128, num_updates=1298, lr=9.99976e-05, gnorm=2.627, loss_scale=1, train_wall=11, gb_free=2.8, wall=14540 2021-06-18 22:41:16 | INFO | train_inner | epoch 001: 1313 / 3002 loss=2.983, ppl=7.91, wps=5921.7, ups=0.09, wpb=64768, bsz=128, num_updates=1299, lr=9.99976e-05, gnorm=2.555, loss_scale=1, train_wall=10, gb_free=2.8, wall=14551 2021-06-18 22:41:28 | INFO | train_inner | epoch 001: 1314 / 3002 loss=3.328, ppl=10.04, wps=5837.1, ups=0.09, wpb=64793, bsz=128, num_updates=1300, lr=9.99976e-05, gnorm=2.716, loss_scale=1, train_wall=11, gb_free=2.8, wall=14562 2021-06-18 22:41:39 | INFO | train_inner | epoch 001: 1315 / 3002 loss=3.188, ppl=9.11, wps=5861.8, ups=0.09, wpb=64886, bsz=128, num_updates=1301, lr=9.99976e-05, gnorm=2.501, loss_scale=1, train_wall=11, gb_free=2.8, wall=14573 2021-06-18 22:41:49 | INFO | train_inner | epoch 001: 1316 / 3002 loss=3.005, ppl=8.03, wps=5960.7, ups=0.09, wpb=64890, bsz=128, num_updates=1302, lr=9.99976e-05, gnorm=2.484, loss_scale=1, train_wall=10, gb_free=2.8, wall=14584 2021-06-18 22:42:01 | INFO | train_inner | epoch 001: 1317 / 3002 loss=3.06, ppl=8.34, wps=5770.4, ups=0.09, wpb=64791, bsz=128, num_updates=1303, lr=9.99976e-05, gnorm=2.643, loss_scale=1, train_wall=11, gb_free=2.8, wall=14595 2021-06-18 22:42:12 | INFO | train_inner | epoch 001: 1318 / 3002 loss=3.01, ppl=8.06, wps=5933.1, ups=0.09, wpb=64793, bsz=128, num_updates=1304, lr=9.99976e-05, gnorm=2.456, loss_scale=1, train_wall=10, gb_free=2.8, wall=14606 2021-06-18 22:42:23 | INFO | train_inner | epoch 001: 1319 / 3002 loss=3.063, ppl=8.36, wps=5902.7, ups=0.09, wpb=64761, bsz=128, num_updates=1305, lr=9.99976e-05, gnorm=2.676, loss_scale=1, train_wall=11, gb_free=2.8, wall=14617 2021-06-18 22:42:34 | INFO | train_inner | epoch 001: 1320 / 3002 loss=2.955, ppl=7.75, wps=5806, ups=0.09, wpb=64863, bsz=128, num_updates=1306, lr=9.99976e-05, gnorm=2.552, loss_scale=1, train_wall=11, gb_free=2.8, wall=14628 2021-06-18 22:42:45 | INFO | train_inner | epoch 001: 1321 / 3002 loss=3.157, ppl=8.92, wps=5798.9, ups=0.09, wpb=64842, bsz=128, num_updates=1307, lr=9.99975e-05, gnorm=2.711, loss_scale=1, train_wall=11, gb_free=2.8, wall=14639 2021-06-18 22:42:56 | INFO | train_inner | epoch 001: 1322 / 3002 loss=2.905, ppl=7.49, wps=5862.2, ups=0.09, wpb=64918, bsz=128, num_updates=1308, lr=9.99975e-05, gnorm=2.54, loss_scale=1, train_wall=11, gb_free=2.8, wall=14650 2021-06-18 22:43:07 | INFO | train_inner | epoch 001: 1323 / 3002 loss=3.161, ppl=8.95, wps=5889.5, ups=0.09, wpb=64827, bsz=128, num_updates=1309, lr=9.99975e-05, gnorm=2.623, loss_scale=1, train_wall=11, gb_free=2.8, wall=14661 2021-06-18 22:43:18 | INFO | train_inner | epoch 001: 1324 / 3002 loss=2.933, ppl=7.64, wps=5905.1, ups=0.09, wpb=64808, bsz=128, num_updates=1310, lr=9.99975e-05, gnorm=2.626, loss_scale=1, train_wall=11, gb_free=2.8, wall=14672 2021-06-18 22:43:29 | INFO | train_inner | epoch 001: 1325 / 3002 loss=2.978, ppl=7.88, wps=5823.7, ups=0.09, wpb=64867, bsz=128, num_updates=1311, lr=9.99975e-05, gnorm=2.552, loss_scale=1, train_wall=11, gb_free=2.8, wall=14683 2021-06-18 22:43:40 | INFO | train_inner | epoch 001: 1326 / 3002 loss=3.083, ppl=8.48, wps=5928.6, ups=0.09, wpb=64882, bsz=128, num_updates=1312, lr=9.99975e-05, gnorm=2.643, loss_scale=1, train_wall=11, gb_free=2.8, wall=14694 2021-06-18 22:43:51 | INFO | train_inner | epoch 001: 1327 / 3002 loss=3.107, ppl=8.62, wps=5721.2, ups=0.09, wpb=64763, bsz=128, num_updates=1313, lr=9.99975e-05, gnorm=2.556, loss_scale=1, train_wall=11, gb_free=2.8, wall=14706 2021-06-18 22:44:03 | INFO | train_inner | epoch 001: 1328 / 3002 loss=2.875, ppl=7.34, wps=5811.7, ups=0.09, wpb=64777, bsz=128, num_updates=1314, lr=9.99975e-05, gnorm=2.497, loss_scale=1, train_wall=11, gb_free=2.8, wall=14717 2021-06-18 22:44:14 | INFO | train_inner | epoch 001: 1329 / 3002 loss=3.151, ppl=8.88, wps=5908.3, ups=0.09, wpb=64699, bsz=128, num_updates=1315, lr=9.99975e-05, gnorm=2.654, loss_scale=1, train_wall=10, gb_free=2.8, wall=14728 2021-06-18 22:44:25 | INFO | train_inner | epoch 001: 1330 / 3002 loss=3.057, ppl=8.32, wps=5783.4, ups=0.09, wpb=64848, bsz=128, num_updates=1316, lr=9.99975e-05, gnorm=2.54, loss_scale=1, train_wall=11, gb_free=2.8, wall=14739 2021-06-18 22:44:36 | INFO | train_inner | epoch 001: 1331 / 3002 loss=3.05, ppl=8.28, wps=5883.8, ups=0.09, wpb=64766, bsz=128, num_updates=1317, lr=9.99975e-05, gnorm=2.634, loss_scale=1, train_wall=11, gb_free=2.8, wall=14750 2021-06-18 22:44:47 | INFO | train_inner | epoch 001: 1332 / 3002 loss=3.053, ppl=8.3, wps=5758.9, ups=0.09, wpb=64931, bsz=128, num_updates=1318, lr=9.99975e-05, gnorm=2.463, loss_scale=1, train_wall=11, gb_free=2.8, wall=14761 2021-06-18 22:44:58 | INFO | train_inner | epoch 001: 1333 / 3002 loss=3.012, ppl=8.07, wps=5752.5, ups=0.09, wpb=64767, bsz=128, num_updates=1319, lr=9.99974e-05, gnorm=2.494, loss_scale=1, train_wall=11, gb_free=2.8, wall=14773 2021-06-18 22:45:10 | INFO | train_inner | epoch 001: 1334 / 3002 loss=3.173, ppl=9.02, wps=5737.4, ups=0.09, wpb=64873, bsz=128, num_updates=1320, lr=9.99974e-05, gnorm=2.646, loss_scale=1, train_wall=11, gb_free=2.8, wall=14784 2021-06-18 22:45:21 | INFO | train_inner | epoch 001: 1335 / 3002 loss=3.037, ppl=8.21, wps=5890.3, ups=0.09, wpb=64823, bsz=128, num_updates=1321, lr=9.99974e-05, gnorm=2.519, loss_scale=1, train_wall=11, gb_free=2.8, wall=14795 2021-06-18 22:45:31 | INFO | train_inner | epoch 001: 1336 / 3002 loss=3.035, ppl=8.2, wps=5970.1, ups=0.09, wpb=64816, bsz=128, num_updates=1322, lr=9.99974e-05, gnorm=2.585, loss_scale=1, train_wall=10, gb_free=2.8, wall=14806 2021-06-18 22:45:43 | INFO | train_inner | epoch 001: 1337 / 3002 loss=3.192, ppl=9.14, wps=5767.4, ups=0.09, wpb=64874, bsz=128, num_updates=1323, lr=9.99974e-05, gnorm=2.591, loss_scale=1, train_wall=11, gb_free=2.8, wall=14817 2021-06-18 22:45:54 | INFO | train_inner | epoch 001: 1338 / 3002 loss=3.05, ppl=8.28, wps=5780.7, ups=0.09, wpb=64832, bsz=128, num_updates=1324, lr=9.99974e-05, gnorm=2.401, loss_scale=1, train_wall=11, gb_free=2.8, wall=14828 2021-06-18 22:46:05 | INFO | train_inner | epoch 001: 1339 / 3002 loss=3.044, ppl=8.25, wps=5908.3, ups=0.09, wpb=64884, bsz=128, num_updates=1325, lr=9.99974e-05, gnorm=2.661, loss_scale=1, train_wall=11, gb_free=2.8, wall=14839 2021-06-18 22:46:16 | INFO | train_inner | epoch 001: 1340 / 3002 loss=3.033, ppl=8.18, wps=5959.1, ups=0.09, wpb=64850, bsz=128, num_updates=1326, lr=9.99974e-05, gnorm=2.533, loss_scale=1, train_wall=10, gb_free=2.8, wall=14850 2021-06-18 22:46:27 | INFO | train_inner | epoch 001: 1341 / 3002 loss=3.067, ppl=8.38, wps=5825.8, ups=0.09, wpb=64802, bsz=128, num_updates=1327, lr=9.99974e-05, gnorm=2.439, loss_scale=1, train_wall=11, gb_free=2.8, wall=14861 2021-06-18 22:46:38 | INFO | train_inner | epoch 001: 1342 / 3002 loss=3.08, ppl=8.45, wps=5838.4, ups=0.09, wpb=64876, bsz=128, num_updates=1328, lr=9.99974e-05, gnorm=2.531, loss_scale=1, train_wall=11, gb_free=2.8, wall=14872 2021-06-18 22:46:49 | INFO | train_inner | epoch 001: 1343 / 3002 loss=3.087, ppl=8.5, wps=5818, ups=0.09, wpb=64889, bsz=128, num_updates=1329, lr=9.99974e-05, gnorm=2.642, loss_scale=1, train_wall=11, gb_free=2.8, wall=14883 2021-06-18 22:47:00 | INFO | train_inner | epoch 001: 1344 / 3002 loss=2.983, ppl=7.91, wps=5939.9, ups=0.09, wpb=64851, bsz=128, num_updates=1330, lr=9.99974e-05, gnorm=2.561, loss_scale=1, train_wall=10, gb_free=2.8, wall=14894 2021-06-18 22:47:11 | INFO | train_inner | epoch 001: 1345 / 3002 loss=2.929, ppl=7.62, wps=5846.7, ups=0.09, wpb=64731, bsz=128, num_updates=1331, lr=9.99974e-05, gnorm=2.468, loss_scale=1, train_wall=11, gb_free=2.8, wall=14905 2021-06-18 22:47:22 | INFO | train_inner | epoch 001: 1346 / 3002 loss=2.761, ppl=6.78, wps=5784.7, ups=0.09, wpb=64797, bsz=128, num_updates=1332, lr=9.99973e-05, gnorm=2.461, loss_scale=1, train_wall=11, gb_free=2.8, wall=14917 2021-06-18 22:47:33 | INFO | train_inner | epoch 001: 1347 / 3002 loss=3.025, ppl=8.14, wps=5954.4, ups=0.09, wpb=64812, bsz=128, num_updates=1333, lr=9.99973e-05, gnorm=3.014, loss_scale=1, train_wall=10, gb_free=2.8, wall=14928 2021-06-18 22:47:44 | INFO | train_inner | epoch 001: 1348 / 3002 loss=3.181, ppl=9.07, wps=5930.3, ups=0.09, wpb=64796, bsz=128, num_updates=1334, lr=9.99973e-05, gnorm=2.634, loss_scale=1, train_wall=10, gb_free=2.8, wall=14938 2021-06-18 22:47:55 | INFO | train_inner | epoch 001: 1349 / 3002 loss=3.03, ppl=8.17, wps=5951, ups=0.09, wpb=64808, bsz=128, num_updates=1335, lr=9.99973e-05, gnorm=2.529, loss_scale=1, train_wall=10, gb_free=2.8, wall=14949 2021-06-18 22:48:06 | INFO | train_inner | epoch 001: 1350 / 3002 loss=2.956, ppl=7.76, wps=5840.3, ups=0.09, wpb=64826, bsz=128, num_updates=1336, lr=9.99973e-05, gnorm=2.427, loss_scale=1, train_wall=11, gb_free=2.8, wall=14960 2021-06-18 22:48:17 | INFO | train_inner | epoch 001: 1351 / 3002 loss=2.983, ppl=7.91, wps=5866, ups=0.09, wpb=64883, bsz=128, num_updates=1337, lr=9.99973e-05, gnorm=2.512, loss_scale=1, train_wall=11, gb_free=2.8, wall=14972 2021-06-18 22:48:28 | INFO | train_inner | epoch 001: 1352 / 3002 loss=3.003, ppl=8.02, wps=5985.6, ups=0.09, wpb=64868, bsz=128, num_updates=1338, lr=9.99973e-05, gnorm=2.537, loss_scale=1, train_wall=10, gb_free=2.8, wall=14982 2021-06-18 22:48:39 | INFO | train_inner | epoch 001: 1353 / 3002 loss=3.073, ppl=8.42, wps=5811, ups=0.09, wpb=64854, bsz=128, num_updates=1339, lr=9.99973e-05, gnorm=2.445, loss_scale=1, train_wall=11, gb_free=2.8, wall=14994 2021-06-18 22:48:50 | INFO | train_inner | epoch 001: 1354 / 3002 loss=3.021, ppl=8.12, wps=5777.2, ups=0.09, wpb=64830, bsz=128, num_updates=1340, lr=9.99973e-05, gnorm=2.625, loss_scale=1, train_wall=11, gb_free=2.8, wall=15005 2021-06-18 22:49:01 | INFO | train_inner | epoch 001: 1355 / 3002 loss=3.109, ppl=8.63, wps=5960, ups=0.09, wpb=64829, bsz=128, num_updates=1341, lr=9.99973e-05, gnorm=2.59, loss_scale=1, train_wall=10, gb_free=2.8, wall=15016 2021-06-18 22:49:12 | INFO | train_inner | epoch 001: 1356 / 3002 loss=3.063, ppl=8.36, wps=5803.4, ups=0.09, wpb=64799, bsz=128, num_updates=1342, lr=9.99973e-05, gnorm=2.799, loss_scale=1, train_wall=11, gb_free=2.8, wall=15027 2021-06-18 22:49:24 | INFO | train_inner | epoch 001: 1357 / 3002 loss=3.013, ppl=8.07, wps=5852.8, ups=0.09, wpb=64812, bsz=128, num_updates=1343, lr=9.99973e-05, gnorm=2.538, loss_scale=1, train_wall=11, gb_free=2.8, wall=15038 2021-06-18 22:49:34 | INFO | train_inner | epoch 001: 1358 / 3002 loss=3.101, ppl=8.58, wps=5968.1, ups=0.09, wpb=64836, bsz=128, num_updates=1344, lr=9.99972e-05, gnorm=2.526, loss_scale=1, train_wall=10, gb_free=2.8, wall=15049 2021-06-18 22:49:46 | INFO | train_inner | epoch 001: 1359 / 3002 loss=3.005, ppl=8.03, wps=5834, ups=0.09, wpb=64834, bsz=128, num_updates=1345, lr=9.99972e-05, gnorm=2.548, loss_scale=1, train_wall=11, gb_free=2.8, wall=15060 2021-06-18 22:49:57 | INFO | train_inner | epoch 001: 1360 / 3002 loss=3.076, ppl=8.43, wps=5855.6, ups=0.09, wpb=64761, bsz=128, num_updates=1346, lr=9.99972e-05, gnorm=2.481, loss_scale=1, train_wall=11, gb_free=2.8, wall=15071 2021-06-18 22:50:08 | INFO | train_inner | epoch 001: 1361 / 3002 loss=2.946, ppl=7.71, wps=5907.5, ups=0.09, wpb=64780, bsz=128, num_updates=1347, lr=9.99972e-05, gnorm=2.441, loss_scale=1, train_wall=11, gb_free=2.8, wall=15082 2021-06-18 22:50:19 | INFO | train_inner | epoch 001: 1362 / 3002 loss=3.092, ppl=8.53, wps=5900.6, ups=0.09, wpb=64838, bsz=128, num_updates=1348, lr=9.99972e-05, gnorm=2.59, loss_scale=1, train_wall=11, gb_free=2.8, wall=15093 2021-06-18 22:50:29 | INFO | train_inner | epoch 001: 1363 / 3002 loss=3.191, ppl=9.13, wps=6017.6, ups=0.09, wpb=64831, bsz=128, num_updates=1349, lr=9.99972e-05, gnorm=2.727, loss_scale=1, train_wall=10, gb_free=2.8, wall=15104 2021-06-18 22:50:40 | INFO | train_inner | epoch 001: 1364 / 3002 loss=2.985, ppl=7.92, wps=5970.6, ups=0.09, wpb=64874, bsz=128, num_updates=1350, lr=9.99972e-05, gnorm=5.209, loss_scale=1, train_wall=10, gb_free=2.8, wall=15115 2021-06-18 22:50:51 | INFO | train_inner | epoch 001: 1365 / 3002 loss=3.148, ppl=8.86, wps=5935.9, ups=0.09, wpb=64767, bsz=128, num_updates=1351, lr=9.99972e-05, gnorm=2.405, loss_scale=1, train_wall=10, gb_free=2.8, wall=15125 2021-06-18 22:51:02 | INFO | train_inner | epoch 001: 1366 / 3002 loss=3.121, ppl=8.7, wps=5850.8, ups=0.09, wpb=64866, bsz=128, num_updates=1352, lr=9.99972e-05, gnorm=3.326, loss_scale=1, train_wall=11, gb_free=2.8, wall=15137 2021-06-18 22:51:13 | INFO | train_inner | epoch 001: 1367 / 3002 loss=2.884, ppl=7.38, wps=5927.5, ups=0.09, wpb=64851, bsz=128, num_updates=1353, lr=9.99972e-05, gnorm=2.615, loss_scale=1, train_wall=10, gb_free=2.8, wall=15147 2021-06-18 22:51:24 | INFO | train_inner | epoch 001: 1368 / 3002 loss=3.144, ppl=8.84, wps=5755.8, ups=0.09, wpb=64841, bsz=128, num_updates=1354, lr=9.99972e-05, gnorm=2.693, loss_scale=1, train_wall=11, gb_free=2.8, wall=15159 2021-06-18 22:51:36 | INFO | train_inner | epoch 001: 1369 / 3002 loss=3.207, ppl=9.24, wps=5686.9, ups=0.09, wpb=64850, bsz=128, num_updates=1355, lr=9.99972e-05, gnorm=2.638, loss_scale=1, train_wall=11, gb_free=2.8, wall=15170 2021-06-18 22:51:47 | INFO | train_inner | epoch 001: 1370 / 3002 loss=3.02, ppl=8.11, wps=5699.7, ups=0.09, wpb=64819, bsz=128, num_updates=1356, lr=9.99972e-05, gnorm=2.657, loss_scale=1, train_wall=11, gb_free=2.8, wall=15181 2021-06-18 22:51:58 | INFO | train_inner | epoch 001: 1371 / 3002 loss=3.056, ppl=8.32, wps=5909.7, ups=0.09, wpb=64724, bsz=128, num_updates=1357, lr=9.99971e-05, gnorm=2.872, loss_scale=1, train_wall=11, gb_free=2.8, wall=15192 2021-06-18 22:52:09 | INFO | train_inner | epoch 001: 1372 / 3002 loss=3.152, ppl=8.89, wps=5874.6, ups=0.09, wpb=64780, bsz=128, num_updates=1358, lr=9.99971e-05, gnorm=2.745, loss_scale=1, train_wall=11, gb_free=2.8, wall=15203 2021-06-18 22:52:20 | INFO | train_inner | epoch 001: 1373 / 3002 loss=3.073, ppl=8.41, wps=5792.3, ups=0.09, wpb=64766, bsz=128, num_updates=1359, lr=9.99971e-05, gnorm=2.526, loss_scale=1, train_wall=11, gb_free=2.8, wall=15215 2021-06-18 22:52:31 | INFO | train_inner | epoch 001: 1374 / 3002 loss=3.085, ppl=8.49, wps=5923.9, ups=0.09, wpb=64921, bsz=128, num_updates=1360, lr=9.99971e-05, gnorm=25.934, loss_scale=1, train_wall=10, gb_free=2.8, wall=15226 2021-06-18 22:52:42 | INFO | train_inner | epoch 001: 1375 / 3002 loss=3.354, ppl=10.23, wps=5930.7, ups=0.09, wpb=64835, bsz=128, num_updates=1361, lr=9.99971e-05, gnorm=2.713, loss_scale=2, train_wall=10, gb_free=2.8, wall=15237 2021-06-18 22:52:53 | INFO | train_inner | epoch 001: 1376 / 3002 loss=3.016, ppl=8.09, wps=5777.5, ups=0.09, wpb=64800, bsz=128, num_updates=1362, lr=9.99971e-05, gnorm=3.702, loss_scale=2, train_wall=11, gb_free=2.8, wall=15248 2021-06-18 22:53:05 | INFO | train_inner | epoch 001: 1377 / 3002 loss=3.176, ppl=9.04, wps=5758.1, ups=0.09, wpb=64823, bsz=128, num_updates=1363, lr=9.99971e-05, gnorm=3.501, loss_scale=2, train_wall=11, gb_free=2.8, wall=15259 2021-06-18 22:53:16 | INFO | train_inner | epoch 001: 1378 / 3002 loss=3.275, ppl=9.68, wps=5841.7, ups=0.09, wpb=64819, bsz=128, num_updates=1364, lr=9.99971e-05, gnorm=3.395, loss_scale=2, train_wall=11, gb_free=2.8, wall=15270 2021-06-18 22:53:27 | INFO | train_inner | epoch 001: 1379 / 3002 loss=3.181, ppl=9.07, wps=5668.6, ups=0.09, wpb=64846, bsz=128, num_updates=1365, lr=9.99971e-05, gnorm=4.376, loss_scale=2, train_wall=11, gb_free=2.8, wall=15282 2021-06-18 22:53:38 | INFO | train_inner | epoch 001: 1380 / 3002 loss=3.215, ppl=9.29, wps=5791.4, ups=0.09, wpb=64699, bsz=128, num_updates=1366, lr=9.99971e-05, gnorm=3.361, loss_scale=2, train_wall=11, gb_free=2.8, wall=15293 2021-06-18 22:53:50 | INFO | train_inner | epoch 001: 1381 / 3002 loss=3.157, ppl=8.92, wps=5736.5, ups=0.09, wpb=64770, bsz=128, num_updates=1367, lr=9.99971e-05, gnorm=3.123, loss_scale=2, train_wall=11, gb_free=2.8, wall=15304 2021-06-18 22:54:01 | INFO | train_inner | epoch 001: 1382 / 3002 loss=3.236, ppl=9.42, wps=5910.7, ups=0.09, wpb=64861, bsz=128, num_updates=1368, lr=9.99971e-05, gnorm=4.146, loss_scale=2, train_wall=11, gb_free=2.8, wall=15315 2021-06-18 22:54:12 | INFO | train_inner | epoch 001: 1383 / 3002 loss=2.981, ppl=7.89, wps=5875.2, ups=0.09, wpb=64856, bsz=128, num_updates=1369, lr=9.9997e-05, gnorm=3.226, loss_scale=2, train_wall=11, gb_free=2.8, wall=15326 2021-06-18 22:54:23 | INFO | train_inner | epoch 001: 1384 / 3002 loss=3.063, ppl=8.36, wps=5851.9, ups=0.09, wpb=64818, bsz=128, num_updates=1370, lr=9.9997e-05, gnorm=3.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=15337 2021-06-18 22:54:33 | INFO | train_inner | epoch 001: 1385 / 3002 loss=3.061, ppl=8.34, wps=6138.2, ups=0.09, wpb=64850, bsz=128, num_updates=1371, lr=9.9997e-05, gnorm=2.945, loss_scale=2, train_wall=10, gb_free=2.8, wall=15348 2021-06-18 22:54:44 | INFO | train_inner | epoch 001: 1386 / 3002 loss=3.119, ppl=8.69, wps=5953, ups=0.09, wpb=64822, bsz=128, num_updates=1372, lr=9.9997e-05, gnorm=2.84, loss_scale=2, train_wall=10, gb_free=2.8, wall=15359 2021-06-18 22:54:55 | INFO | train_inner | epoch 001: 1387 / 3002 loss=3.391, ppl=10.49, wps=5877.1, ups=0.09, wpb=64803, bsz=128, num_updates=1373, lr=9.9997e-05, gnorm=2.919, loss_scale=2, train_wall=11, gb_free=2.8, wall=15370 2021-06-18 22:55:06 | INFO | train_inner | epoch 001: 1388 / 3002 loss=3.014, ppl=8.08, wps=5940.8, ups=0.09, wpb=64919, bsz=128, num_updates=1374, lr=9.9997e-05, gnorm=2.784, loss_scale=2, train_wall=10, gb_free=2.8, wall=15381 2021-06-18 22:55:17 | INFO | train_inner | epoch 001: 1389 / 3002 loss=3.064, ppl=8.36, wps=5871.2, ups=0.09, wpb=64826, bsz=128, num_updates=1375, lr=9.9997e-05, gnorm=3.154, loss_scale=2, train_wall=11, gb_free=2.8, wall=15392 2021-06-18 22:55:28 | INFO | train_inner | epoch 001: 1390 / 3002 loss=2.934, ppl=7.64, wps=5796.3, ups=0.09, wpb=64833, bsz=128, num_updates=1376, lr=9.9997e-05, gnorm=2.567, loss_scale=2, train_wall=11, gb_free=2.8, wall=15403 2021-06-18 22:55:40 | INFO | train_inner | epoch 001: 1391 / 3002 loss=3.071, ppl=8.41, wps=5782.2, ups=0.09, wpb=64760, bsz=128, num_updates=1377, lr=9.9997e-05, gnorm=2.623, loss_scale=2, train_wall=11, gb_free=2.8, wall=15414 2021-06-18 22:55:50 | INFO | train_inner | epoch 001: 1392 / 3002 loss=3.157, ppl=8.92, wps=6074.1, ups=0.09, wpb=64857, bsz=128, num_updates=1378, lr=9.9997e-05, gnorm=2.68, loss_scale=2, train_wall=10, gb_free=2.8, wall=15425 2021-06-18 22:56:01 | INFO | train_inner | epoch 001: 1393 / 3002 loss=3.113, ppl=8.65, wps=5904, ups=0.09, wpb=64838, bsz=128, num_updates=1379, lr=9.9997e-05, gnorm=2.809, loss_scale=2, train_wall=11, gb_free=2.8, wall=15436 2021-06-18 22:56:12 | INFO | train_inner | epoch 001: 1394 / 3002 loss=3.091, ppl=8.52, wps=5872.2, ups=0.09, wpb=64835, bsz=128, num_updates=1380, lr=9.9997e-05, gnorm=2.594, loss_scale=2, train_wall=11, gb_free=2.8, wall=15447 2021-06-18 22:56:23 | INFO | train_inner | epoch 001: 1395 / 3002 loss=3.107, ppl=8.61, wps=5793.1, ups=0.09, wpb=64736, bsz=128, num_updates=1381, lr=9.9997e-05, gnorm=2.68, loss_scale=2, train_wall=11, gb_free=2.8, wall=15458 2021-06-18 22:56:35 | INFO | train_inner | epoch 001: 1396 / 3002 loss=2.918, ppl=7.56, wps=5842.6, ups=0.09, wpb=64814, bsz=128, num_updates=1382, lr=9.99969e-05, gnorm=2.628, loss_scale=2, train_wall=11, gb_free=2.8, wall=15469 2021-06-18 22:56:46 | INFO | train_inner | epoch 001: 1397 / 3002 loss=3.085, ppl=8.49, wps=5830.1, ups=0.09, wpb=64890, bsz=128, num_updates=1383, lr=9.99969e-05, gnorm=2.661, loss_scale=2, train_wall=11, gb_free=2.8, wall=15480 2021-06-18 22:56:57 | INFO | train_inner | epoch 001: 1398 / 3002 loss=2.912, ppl=7.53, wps=5874.6, ups=0.09, wpb=64859, bsz=128, num_updates=1384, lr=9.99969e-05, gnorm=2.849, loss_scale=2, train_wall=11, gb_free=2.8, wall=15491 2021-06-18 22:57:08 | INFO | train_inner | epoch 001: 1399 / 3002 loss=3.041, ppl=8.23, wps=5873.8, ups=0.09, wpb=64755, bsz=128, num_updates=1385, lr=9.99969e-05, gnorm=3.197, loss_scale=2, train_wall=11, gb_free=2.8, wall=15502 2021-06-18 22:57:19 | INFO | train_inner | epoch 001: 1400 / 3002 loss=2.889, ppl=7.41, wps=5850.4, ups=0.09, wpb=64829, bsz=128, num_updates=1386, lr=9.99969e-05, gnorm=2.915, loss_scale=2, train_wall=11, gb_free=2.8, wall=15513 2021-06-18 22:57:30 | INFO | train_inner | epoch 001: 1401 / 3002 loss=3.222, ppl=9.33, wps=5773.1, ups=0.09, wpb=64869, bsz=128, num_updates=1387, lr=9.99969e-05, gnorm=2.77, loss_scale=2, train_wall=11, gb_free=2.8, wall=15524 2021-06-18 22:57:41 | INFO | train_inner | epoch 001: 1402 / 3002 loss=3.091, ppl=8.52, wps=5866.9, ups=0.09, wpb=64787, bsz=128, num_updates=1388, lr=9.99969e-05, gnorm=2.618, loss_scale=2, train_wall=11, gb_free=2.8, wall=15535 2021-06-18 22:57:52 | INFO | train_inner | epoch 001: 1403 / 3002 loss=3.181, ppl=9.07, wps=5890.2, ups=0.09, wpb=64802, bsz=128, num_updates=1389, lr=9.99969e-05, gnorm=2.634, loss_scale=2, train_wall=11, gb_free=2.8, wall=15546 2021-06-18 22:58:03 | INFO | train_inner | epoch 001: 1404 / 3002 loss=2.863, ppl=7.27, wps=5891.5, ups=0.09, wpb=64842, bsz=128, num_updates=1390, lr=9.99969e-05, gnorm=2.691, loss_scale=2, train_wall=11, gb_free=2.8, wall=15557 2021-06-18 22:58:14 | INFO | train_inner | epoch 001: 1405 / 3002 loss=2.987, ppl=7.93, wps=6028.9, ups=0.09, wpb=64891, bsz=128, num_updates=1391, lr=9.99969e-05, gnorm=2.681, loss_scale=2, train_wall=10, gb_free=2.8, wall=15568 2021-06-18 22:58:25 | INFO | train_inner | epoch 001: 1406 / 3002 loss=2.988, ppl=7.93, wps=5795.5, ups=0.09, wpb=64789, bsz=128, num_updates=1392, lr=9.99969e-05, gnorm=2.749, loss_scale=2, train_wall=11, gb_free=2.8, wall=15579 2021-06-18 22:58:36 | INFO | train_inner | epoch 001: 1407 / 3002 loss=3.146, ppl=8.85, wps=5864.6, ups=0.09, wpb=64872, bsz=128, num_updates=1393, lr=9.99969e-05, gnorm=2.646, loss_scale=2, train_wall=11, gb_free=2.8, wall=15590 2021-06-18 22:58:47 | INFO | train_inner | epoch 001: 1408 / 3002 loss=3.185, ppl=9.09, wps=5889.7, ups=0.09, wpb=64813, bsz=128, num_updates=1394, lr=9.99968e-05, gnorm=2.605, loss_scale=2, train_wall=11, gb_free=2.8, wall=15601 2021-06-18 22:58:58 | INFO | train_inner | epoch 001: 1409 / 3002 loss=3.067, ppl=8.38, wps=5813.3, ups=0.09, wpb=64749, bsz=128, num_updates=1395, lr=9.99968e-05, gnorm=2.502, loss_scale=2, train_wall=11, gb_free=2.8, wall=15613 2021-06-18 22:59:10 | INFO | train_inner | epoch 001: 1410 / 3002 loss=3.087, ppl=8.5, wps=5764.2, ups=0.09, wpb=64760, bsz=128, num_updates=1396, lr=9.99968e-05, gnorm=2.484, loss_scale=2, train_wall=11, gb_free=2.8, wall=15624 2021-06-18 22:59:21 | INFO | train_inner | epoch 001: 1411 / 3002 loss=3.157, ppl=8.92, wps=5768.5, ups=0.09, wpb=64768, bsz=128, num_updates=1397, lr=9.99968e-05, gnorm=2.551, loss_scale=2, train_wall=11, gb_free=2.8, wall=15635 2021-06-18 22:59:32 | INFO | train_inner | epoch 001: 1412 / 3002 loss=3.096, ppl=8.55, wps=5850.1, ups=0.09, wpb=64751, bsz=128, num_updates=1398, lr=9.99968e-05, gnorm=2.45, loss_scale=2, train_wall=11, gb_free=2.8, wall=15646 2021-06-18 22:59:43 | INFO | train_inner | epoch 001: 1413 / 3002 loss=3.111, ppl=8.64, wps=5872.1, ups=0.09, wpb=64839, bsz=128, num_updates=1399, lr=9.99968e-05, gnorm=2.689, loss_scale=2, train_wall=11, gb_free=2.8, wall=15657 2021-06-18 22:59:54 | INFO | train_inner | epoch 001: 1414 / 3002 loss=3.028, ppl=8.16, wps=5891.1, ups=0.09, wpb=64837, bsz=128, num_updates=1400, lr=9.99968e-05, gnorm=2.467, loss_scale=2, train_wall=11, gb_free=2.8, wall=15668 2021-06-18 23:00:05 | INFO | train_inner | epoch 001: 1415 / 3002 loss=3.173, ppl=9.02, wps=5839.4, ups=0.09, wpb=64878, bsz=128, num_updates=1401, lr=9.99968e-05, gnorm=2.575, loss_scale=2, train_wall=11, gb_free=2.8, wall=15679 2021-06-18 23:00:16 | INFO | train_inner | epoch 001: 1416 / 3002 loss=3.049, ppl=8.28, wps=5907.5, ups=0.09, wpb=64752, bsz=128, num_updates=1402, lr=9.99968e-05, gnorm=2.669, loss_scale=2, train_wall=11, gb_free=2.8, wall=15690 2021-06-18 23:00:27 | INFO | train_inner | epoch 001: 1417 / 3002 loss=3.213, ppl=9.27, wps=5828.5, ups=0.09, wpb=64754, bsz=128, num_updates=1403, lr=9.99968e-05, gnorm=2.583, loss_scale=2, train_wall=11, gb_free=2.8, wall=15701 2021-06-18 23:00:38 | INFO | train_inner | epoch 001: 1418 / 3002 loss=3.112, ppl=8.65, wps=5765.3, ups=0.09, wpb=64843, bsz=128, num_updates=1404, lr=9.99968e-05, gnorm=2.51, loss_scale=2, train_wall=11, gb_free=2.8, wall=15713 2021-06-18 23:00:49 | INFO | train_inner | epoch 001: 1419 / 3002 loss=2.998, ppl=7.99, wps=5842.2, ups=0.09, wpb=64820, bsz=128, num_updates=1405, lr=9.99968e-05, gnorm=2.606, loss_scale=2, train_wall=11, gb_free=2.8, wall=15724 2021-06-18 23:01:00 | INFO | train_inner | epoch 001: 1420 / 3002 loss=3.094, ppl=8.54, wps=5947.3, ups=0.09, wpb=64857, bsz=128, num_updates=1406, lr=9.99968e-05, gnorm=4.097, loss_scale=2, train_wall=10, gb_free=2.8, wall=15735 2021-06-18 23:01:11 | INFO | train_inner | epoch 001: 1421 / 3002 loss=3.089, ppl=8.51, wps=5817.9, ups=0.09, wpb=64748, bsz=128, num_updates=1407, lr=9.99967e-05, gnorm=2.612, loss_scale=2, train_wall=11, gb_free=2.8, wall=15746 2021-06-18 23:01:23 | INFO | train_inner | epoch 001: 1422 / 3002 loss=3.142, ppl=8.83, wps=5777.5, ups=0.09, wpb=64849, bsz=128, num_updates=1408, lr=9.99967e-05, gnorm=2.589, loss_scale=2, train_wall=11, gb_free=2.8, wall=15757 2021-06-18 23:01:34 | INFO | train_inner | epoch 001: 1423 / 3002 loss=3.053, ppl=8.3, wps=5870, ups=0.09, wpb=64793, bsz=128, num_updates=1409, lr=9.99967e-05, gnorm=2.602, loss_scale=2, train_wall=11, gb_free=2.8, wall=15768 2021-06-18 23:01:45 | INFO | train_inner | epoch 001: 1424 / 3002 loss=2.926, ppl=7.6, wps=5980.4, ups=0.09, wpb=64914, bsz=128, num_updates=1410, lr=9.99967e-05, gnorm=2.542, loss_scale=2, train_wall=10, gb_free=2.8, wall=15779 2021-06-18 23:01:56 | INFO | train_inner | epoch 001: 1425 / 3002 loss=2.96, ppl=7.78, wps=5770.4, ups=0.09, wpb=64842, bsz=128, num_updates=1411, lr=9.99967e-05, gnorm=2.894, loss_scale=2, train_wall=11, gb_free=2.8, wall=15790 2021-06-18 23:02:07 | INFO | train_inner | epoch 001: 1426 / 3002 loss=3.074, ppl=8.42, wps=5924.9, ups=0.09, wpb=64880, bsz=128, num_updates=1412, lr=9.99967e-05, gnorm=2.911, loss_scale=2, train_wall=11, gb_free=2.8, wall=15801 2021-06-18 23:02:18 | INFO | train_inner | epoch 001: 1427 / 3002 loss=2.946, ppl=7.71, wps=5946.9, ups=0.09, wpb=64828, bsz=128, num_updates=1413, lr=9.99967e-05, gnorm=2.444, loss_scale=2, train_wall=10, gb_free=2.8, wall=15812 2021-06-18 23:02:29 | INFO | train_inner | epoch 001: 1428 / 3002 loss=3.087, ppl=8.5, wps=5783, ups=0.09, wpb=64795, bsz=128, num_updates=1414, lr=9.99967e-05, gnorm=2.58, loss_scale=2, train_wall=11, gb_free=2.8, wall=15823 2021-06-18 23:02:40 | INFO | train_inner | epoch 001: 1429 / 3002 loss=3.006, ppl=8.03, wps=5859.5, ups=0.09, wpb=64835, bsz=128, num_updates=1415, lr=9.99967e-05, gnorm=2.701, loss_scale=2, train_wall=11, gb_free=2.8, wall=15834 2021-06-18 23:02:51 | INFO | train_inner | epoch 001: 1430 / 3002 loss=3.105, ppl=8.6, wps=5749.6, ups=0.09, wpb=64824, bsz=128, num_updates=1416, lr=9.99967e-05, gnorm=2.646, loss_scale=2, train_wall=11, gb_free=2.8, wall=15846 2021-06-18 23:03:02 | INFO | train_inner | epoch 001: 1431 / 3002 loss=2.987, ppl=7.93, wps=5877.6, ups=0.09, wpb=64855, bsz=128, num_updates=1417, lr=9.99967e-05, gnorm=2.435, loss_scale=2, train_wall=11, gb_free=2.8, wall=15857 2021-06-18 23:03:13 | INFO | train_inner | epoch 001: 1432 / 3002 loss=3.004, ppl=8.02, wps=5901.5, ups=0.09, wpb=64888, bsz=128, num_updates=1418, lr=9.99967e-05, gnorm=2.519, loss_scale=2, train_wall=11, gb_free=2.8, wall=15868 2021-06-18 23:03:24 | INFO | train_inner | epoch 001: 1433 / 3002 loss=3.137, ppl=8.79, wps=5879.3, ups=0.09, wpb=64836, bsz=128, num_updates=1419, lr=9.99966e-05, gnorm=3.485, loss_scale=2, train_wall=11, gb_free=2.8, wall=15879 2021-06-18 23:03:35 | INFO | train_inner | epoch 001: 1434 / 3002 loss=2.874, ppl=7.33, wps=5889.6, ups=0.09, wpb=64933, bsz=128, num_updates=1420, lr=9.99966e-05, gnorm=2.549, loss_scale=2, train_wall=11, gb_free=2.8, wall=15890 2021-06-18 23:03:46 | INFO | train_inner | epoch 001: 1435 / 3002 loss=3.041, ppl=8.23, wps=5868.6, ups=0.09, wpb=64827, bsz=128, num_updates=1421, lr=9.99966e-05, gnorm=2.545, loss_scale=2, train_wall=11, gb_free=2.8, wall=15901 2021-06-18 23:03:57 | INFO | train_inner | epoch 001: 1436 / 3002 loss=3.286, ppl=9.75, wps=5981.4, ups=0.09, wpb=64820, bsz=128, num_updates=1422, lr=9.99966e-05, gnorm=2.688, loss_scale=2, train_wall=10, gb_free=2.8, wall=15911 2021-06-18 23:04:08 | INFO | train_inner | epoch 001: 1437 / 3002 loss=3.197, ppl=9.17, wps=5845.8, ups=0.09, wpb=64794, bsz=128, num_updates=1423, lr=9.99966e-05, gnorm=2.592, loss_scale=2, train_wall=11, gb_free=2.8, wall=15923 2021-06-18 23:04:19 | INFO | train_inner | epoch 001: 1438 / 3002 loss=3.128, ppl=8.74, wps=5912.8, ups=0.09, wpb=64853, bsz=128, num_updates=1424, lr=9.99966e-05, gnorm=8.01, loss_scale=2, train_wall=11, gb_free=2.8, wall=15934 2021-06-18 23:04:30 | INFO | train_inner | epoch 001: 1439 / 3002 loss=3.001, ppl=8, wps=5840.2, ups=0.09, wpb=64792, bsz=128, num_updates=1425, lr=9.99966e-05, gnorm=2.965, loss_scale=2, train_wall=11, gb_free=2.8, wall=15945 2021-06-18 23:04:41 | INFO | train_inner | epoch 001: 1440 / 3002 loss=3.018, ppl=8.1, wps=5824.3, ups=0.09, wpb=64849, bsz=128, num_updates=1426, lr=9.99966e-05, gnorm=2.466, loss_scale=2, train_wall=11, gb_free=2.8, wall=15956 2021-06-18 23:04:53 | INFO | train_inner | epoch 001: 1441 / 3002 loss=3.008, ppl=8.04, wps=5794.1, ups=0.09, wpb=64846, bsz=128, num_updates=1427, lr=9.99966e-05, gnorm=2.682, loss_scale=2, train_wall=11, gb_free=2.8, wall=15967 2021-06-18 23:05:04 | INFO | train_inner | epoch 001: 1442 / 3002 loss=3.074, ppl=8.42, wps=5823.3, ups=0.09, wpb=64807, bsz=128, num_updates=1428, lr=9.99966e-05, gnorm=2.67, loss_scale=2, train_wall=11, gb_free=2.8, wall=15978 2021-06-18 23:05:15 | INFO | train_inner | epoch 001: 1443 / 3002 loss=3.139, ppl=8.81, wps=5830.6, ups=0.09, wpb=64818, bsz=128, num_updates=1429, lr=9.99966e-05, gnorm=2.625, loss_scale=2, train_wall=11, gb_free=2.8, wall=15989 2021-06-18 23:05:26 | INFO | train_inner | epoch 001: 1444 / 3002 loss=3.05, ppl=8.28, wps=5794.2, ups=0.09, wpb=64866, bsz=128, num_updates=1430, lr=9.99966e-05, gnorm=2.581, loss_scale=2, train_wall=11, gb_free=2.8, wall=16000 2021-06-18 23:05:37 | INFO | train_inner | epoch 001: 1445 / 3002 loss=2.873, ppl=7.33, wps=5882.9, ups=0.09, wpb=64803, bsz=128, num_updates=1431, lr=9.99966e-05, gnorm=2.71, loss_scale=2, train_wall=11, gb_free=2.8, wall=16011 2021-06-18 23:05:48 | INFO | train_inner | epoch 001: 1446 / 3002 loss=2.986, ppl=7.92, wps=5782.1, ups=0.09, wpb=64821, bsz=128, num_updates=1432, lr=9.99965e-05, gnorm=2.591, loss_scale=2, train_wall=11, gb_free=2.8, wall=16023 2021-06-18 23:05:59 | INFO | train_inner | epoch 001: 1447 / 3002 loss=3.183, ppl=9.08, wps=5917.6, ups=0.09, wpb=64809, bsz=128, num_updates=1433, lr=9.99965e-05, gnorm=2.723, loss_scale=2, train_wall=11, gb_free=2.8, wall=16034 2021-06-18 23:06:10 | INFO | train_inner | epoch 001: 1448 / 3002 loss=3.009, ppl=8.05, wps=5824.7, ups=0.09, wpb=64814, bsz=128, num_updates=1434, lr=9.99965e-05, gnorm=2.659, loss_scale=2, train_wall=11, gb_free=2.8, wall=16045 2021-06-18 23:06:22 | INFO | train_inner | epoch 001: 1449 / 3002 loss=3.12, ppl=8.7, wps=5795, ups=0.09, wpb=64855, bsz=128, num_updates=1435, lr=9.99965e-05, gnorm=2.506, loss_scale=2, train_wall=11, gb_free=2.8, wall=16056 2021-06-18 23:06:33 | INFO | train_inner | epoch 001: 1450 / 3002 loss=2.89, ppl=7.41, wps=5820.1, ups=0.09, wpb=64833, bsz=128, num_updates=1436, lr=9.99965e-05, gnorm=2.539, loss_scale=2, train_wall=11, gb_free=2.8, wall=16067 2021-06-18 23:06:44 | INFO | train_inner | epoch 001: 1451 / 3002 loss=2.924, ppl=7.59, wps=5824.4, ups=0.09, wpb=64882, bsz=128, num_updates=1437, lr=9.99965e-05, gnorm=2.541, loss_scale=2, train_wall=11, gb_free=2.8, wall=16078 2021-06-18 23:06:55 | INFO | train_inner | epoch 001: 1452 / 3002 loss=2.983, ppl=7.91, wps=5829.4, ups=0.09, wpb=64852, bsz=128, num_updates=1438, lr=9.99965e-05, gnorm=2.538, loss_scale=2, train_wall=11, gb_free=2.8, wall=16089 2021-06-18 23:07:06 | INFO | train_inner | epoch 001: 1453 / 3002 loss=2.923, ppl=7.58, wps=5941.6, ups=0.09, wpb=64774, bsz=128, num_updates=1439, lr=9.99965e-05, gnorm=2.494, loss_scale=2, train_wall=10, gb_free=2.8, wall=16100 2021-06-18 23:07:17 | INFO | train_inner | epoch 001: 1454 / 3002 loss=2.967, ppl=7.82, wps=5869.7, ups=0.09, wpb=64798, bsz=128, num_updates=1440, lr=9.99965e-05, gnorm=2.576, loss_scale=2, train_wall=11, gb_free=2.8, wall=16111 2021-06-18 23:07:28 | INFO | train_inner | epoch 001: 1455 / 3002 loss=3.021, ppl=8.12, wps=5782.2, ups=0.09, wpb=64811, bsz=128, num_updates=1441, lr=9.99965e-05, gnorm=2.494, loss_scale=2, train_wall=11, gb_free=2.8, wall=16122 2021-06-18 23:07:39 | INFO | train_inner | epoch 001: 1456 / 3002 loss=2.868, ppl=7.3, wps=5833.3, ups=0.09, wpb=64817, bsz=128, num_updates=1442, lr=9.99965e-05, gnorm=2.584, loss_scale=2, train_wall=11, gb_free=2.8, wall=16134 2021-06-18 23:07:50 | INFO | train_inner | epoch 001: 1457 / 3002 loss=3.141, ppl=8.82, wps=5959.6, ups=0.09, wpb=64864, bsz=128, num_updates=1443, lr=9.99965e-05, gnorm=2.661, loss_scale=2, train_wall=10, gb_free=2.8, wall=16144 2021-06-18 23:08:01 | INFO | train_inner | epoch 001: 1458 / 3002 loss=3.174, ppl=9.03, wps=5830.5, ups=0.09, wpb=64818, bsz=128, num_updates=1444, lr=9.99964e-05, gnorm=2.553, loss_scale=2, train_wall=11, gb_free=2.8, wall=16156 2021-06-18 23:08:12 | INFO | train_inner | epoch 001: 1459 / 3002 loss=2.935, ppl=7.65, wps=5846.1, ups=0.09, wpb=64830, bsz=128, num_updates=1445, lr=9.99964e-05, gnorm=2.564, loss_scale=2, train_wall=11, gb_free=2.8, wall=16167 2021-06-18 23:08:23 | INFO | train_inner | epoch 001: 1460 / 3002 loss=3.171, ppl=9.01, wps=5825.5, ups=0.09, wpb=64813, bsz=128, num_updates=1446, lr=9.99964e-05, gnorm=3.049, loss_scale=2, train_wall=11, gb_free=2.8, wall=16178 2021-06-18 23:08:35 | INFO | train_inner | epoch 001: 1461 / 3002 loss=2.877, ppl=7.35, wps=5729.8, ups=0.09, wpb=64738, bsz=128, num_updates=1447, lr=9.99964e-05, gnorm=3.962, loss_scale=2, train_wall=11, gb_free=2.8, wall=16189 2021-06-18 23:08:46 | INFO | train_inner | epoch 001: 1462 / 3002 loss=3.123, ppl=8.71, wps=5836.5, ups=0.09, wpb=64837, bsz=128, num_updates=1448, lr=9.99964e-05, gnorm=2.433, loss_scale=2, train_wall=11, gb_free=2.8, wall=16200 2021-06-18 23:08:57 | INFO | train_inner | epoch 001: 1463 / 3002 loss=2.988, ppl=7.93, wps=5837, ups=0.09, wpb=64862, bsz=128, num_updates=1449, lr=9.99964e-05, gnorm=2.686, loss_scale=2, train_wall=11, gb_free=2.8, wall=16211 2021-06-18 23:09:08 | INFO | train_inner | epoch 001: 1464 / 3002 loss=3.084, ppl=8.48, wps=5836.5, ups=0.09, wpb=64836, bsz=128, num_updates=1450, lr=9.99964e-05, gnorm=2.542, loss_scale=2, train_wall=11, gb_free=2.8, wall=16222 2021-06-18 23:09:19 | INFO | train_inner | epoch 001: 1465 / 3002 loss=3.204, ppl=9.22, wps=5991.6, ups=0.09, wpb=64797, bsz=128, num_updates=1451, lr=9.99964e-05, gnorm=2.607, loss_scale=2, train_wall=10, gb_free=2.8, wall=16233 2021-06-18 23:09:30 | INFO | train_inner | epoch 001: 1466 / 3002 loss=3.127, ppl=8.73, wps=5826.1, ups=0.09, wpb=64852, bsz=128, num_updates=1452, lr=9.99964e-05, gnorm=2.651, loss_scale=2, train_wall=11, gb_free=2.8, wall=16244 2021-06-18 23:09:41 | INFO | train_inner | epoch 001: 1467 / 3002 loss=3.179, ppl=9.06, wps=5929.4, ups=0.09, wpb=64798, bsz=128, num_updates=1453, lr=9.99964e-05, gnorm=2.461, loss_scale=2, train_wall=10, gb_free=2.8, wall=16255 2021-06-18 23:09:52 | INFO | train_inner | epoch 001: 1468 / 3002 loss=2.942, ppl=7.69, wps=5883.1, ups=0.09, wpb=64872, bsz=128, num_updates=1454, lr=9.99964e-05, gnorm=2.572, loss_scale=2, train_wall=11, gb_free=2.8, wall=16266 2021-06-18 23:10:03 | INFO | train_inner | epoch 001: 1469 / 3002 loss=3.093, ppl=8.53, wps=5796.3, ups=0.09, wpb=64760, bsz=128, num_updates=1455, lr=9.99964e-05, gnorm=2.571, loss_scale=2, train_wall=11, gb_free=2.8, wall=16277 2021-06-18 23:10:14 | INFO | train_inner | epoch 001: 1470 / 3002 loss=2.945, ppl=7.7, wps=5801.7, ups=0.09, wpb=64805, bsz=128, num_updates=1456, lr=9.99964e-05, gnorm=2.447, loss_scale=2, train_wall=11, gb_free=2.8, wall=16289 2021-06-18 23:10:25 | INFO | train_inner | epoch 001: 1471 / 3002 loss=2.934, ppl=7.64, wps=5858.4, ups=0.09, wpb=64838, bsz=128, num_updates=1457, lr=9.99963e-05, gnorm=2.665, loss_scale=2, train_wall=11, gb_free=2.8, wall=16300 2021-06-18 23:10:37 | INFO | train_inner | epoch 001: 1472 / 3002 loss=3.174, ppl=9.03, wps=5815.5, ups=0.09, wpb=64823, bsz=128, num_updates=1458, lr=9.99963e-05, gnorm=2.742, loss_scale=2, train_wall=11, gb_free=2.8, wall=16311 2021-06-18 23:10:48 | INFO | train_inner | epoch 001: 1473 / 3002 loss=3.106, ppl=8.61, wps=5880.1, ups=0.09, wpb=64787, bsz=128, num_updates=1459, lr=9.99963e-05, gnorm=2.478, loss_scale=2, train_wall=11, gb_free=2.8, wall=16322 2021-06-18 23:10:59 | INFO | train_inner | epoch 001: 1474 / 3002 loss=3.045, ppl=8.25, wps=5892.3, ups=0.09, wpb=64823, bsz=128, num_updates=1460, lr=9.99963e-05, gnorm=2.398, loss_scale=2, train_wall=11, gb_free=2.8, wall=16333 2021-06-18 23:11:10 | INFO | train_inner | epoch 001: 1475 / 3002 loss=3.01, ppl=8.06, wps=5724, ups=0.09, wpb=64870, bsz=128, num_updates=1461, lr=9.99963e-05, gnorm=2.889, loss_scale=2, train_wall=11, gb_free=2.8, wall=16344 2021-06-18 23:11:21 | INFO | train_inner | epoch 001: 1476 / 3002 loss=3.179, ppl=9.06, wps=5803.5, ups=0.09, wpb=64813, bsz=128, num_updates=1462, lr=9.99963e-05, gnorm=2.599, loss_scale=2, train_wall=11, gb_free=2.8, wall=16355 2021-06-18 23:11:32 | INFO | train_inner | epoch 001: 1477 / 3002 loss=3.053, ppl=8.3, wps=5824.8, ups=0.09, wpb=64866, bsz=128, num_updates=1463, lr=9.99963e-05, gnorm=2.508, loss_scale=2, train_wall=11, gb_free=2.8, wall=16367 2021-06-18 23:11:44 | INFO | train_inner | epoch 001: 1478 / 3002 loss=2.855, ppl=7.24, wps=5735.7, ups=0.09, wpb=64857, bsz=128, num_updates=1464, lr=9.99963e-05, gnorm=2.601, loss_scale=2, train_wall=11, gb_free=2.8, wall=16378 2021-06-18 23:11:55 | INFO | train_inner | epoch 001: 1479 / 3002 loss=2.928, ppl=7.61, wps=5825.4, ups=0.09, wpb=64846, bsz=128, num_updates=1465, lr=9.99963e-05, gnorm=2.58, loss_scale=2, train_wall=11, gb_free=2.8, wall=16389 2021-06-18 23:12:06 | INFO | train_inner | epoch 001: 1480 / 3002 loss=3.083, ppl=8.48, wps=5907.2, ups=0.09, wpb=64760, bsz=128, num_updates=1466, lr=9.99963e-05, gnorm=3.352, loss_scale=2, train_wall=11, gb_free=2.8, wall=16400 2021-06-18 23:12:17 | INFO | train_inner | epoch 001: 1481 / 3002 loss=3.064, ppl=8.36, wps=5928.5, ups=0.09, wpb=64802, bsz=128, num_updates=1467, lr=9.99963e-05, gnorm=2.649, loss_scale=2, train_wall=10, gb_free=2.8, wall=16411 2021-06-18 23:12:27 | INFO | train_inner | epoch 001: 1482 / 3002 loss=3.067, ppl=8.38, wps=5923.4, ups=0.09, wpb=64819, bsz=128, num_updates=1468, lr=9.99963e-05, gnorm=2.699, loss_scale=2, train_wall=11, gb_free=2.8, wall=16422 2021-06-18 23:12:39 | INFO | train_inner | epoch 001: 1483 / 3002 loss=3.048, ppl=8.27, wps=5806.6, ups=0.09, wpb=64845, bsz=128, num_updates=1469, lr=9.99962e-05, gnorm=2.605, loss_scale=2, train_wall=11, gb_free=2.8, wall=16433 2021-06-18 23:12:50 | INFO | train_inner | epoch 001: 1484 / 3002 loss=3.018, ppl=8.1, wps=5810.1, ups=0.09, wpb=64819, bsz=128, num_updates=1470, lr=9.99962e-05, gnorm=2.57, loss_scale=2, train_wall=11, gb_free=2.8, wall=16444 2021-06-18 23:13:01 | INFO | train_inner | epoch 001: 1485 / 3002 loss=2.923, ppl=7.59, wps=5745.6, ups=0.09, wpb=64827, bsz=128, num_updates=1471, lr=9.99962e-05, gnorm=2.501, loss_scale=2, train_wall=11, gb_free=2.8, wall=16455 2021-06-18 23:13:12 | INFO | train_inner | epoch 001: 1486 / 3002 loss=3.111, ppl=8.64, wps=5903.8, ups=0.09, wpb=64814, bsz=128, num_updates=1472, lr=9.99962e-05, gnorm=2.604, loss_scale=2, train_wall=11, gb_free=2.8, wall=16466 2021-06-18 23:13:23 | INFO | train_inner | epoch 001: 1487 / 3002 loss=3.103, ppl=8.59, wps=5857.7, ups=0.09, wpb=64764, bsz=128, num_updates=1473, lr=9.99962e-05, gnorm=2.63, loss_scale=2, train_wall=11, gb_free=2.8, wall=16477 2021-06-18 23:13:34 | INFO | train_inner | epoch 001: 1488 / 3002 loss=3.011, ppl=8.06, wps=5886, ups=0.09, wpb=64835, bsz=128, num_updates=1474, lr=9.99962e-05, gnorm=3.558, loss_scale=2, train_wall=11, gb_free=2.8, wall=16488 2021-06-18 23:13:45 | INFO | train_inner | epoch 001: 1489 / 3002 loss=3.02, ppl=8.11, wps=5934.6, ups=0.09, wpb=64828, bsz=128, num_updates=1475, lr=9.99962e-05, gnorm=2.457, loss_scale=2, train_wall=10, gb_free=2.8, wall=16499 2021-06-18 23:13:56 | INFO | train_inner | epoch 001: 1490 / 3002 loss=3.032, ppl=8.18, wps=5900.6, ups=0.09, wpb=64811, bsz=128, num_updates=1476, lr=9.99962e-05, gnorm=2.591, loss_scale=2, train_wall=11, gb_free=2.8, wall=16510 2021-06-18 23:14:07 | INFO | train_inner | epoch 001: 1491 / 3002 loss=2.949, ppl=7.72, wps=5867.8, ups=0.09, wpb=64847, bsz=128, num_updates=1477, lr=9.99962e-05, gnorm=2.52, loss_scale=2, train_wall=11, gb_free=2.8, wall=16521 2021-06-18 23:14:18 | INFO | train_inner | epoch 001: 1492 / 3002 loss=3.144, ppl=8.84, wps=5852, ups=0.09, wpb=64734, bsz=128, num_updates=1478, lr=9.99962e-05, gnorm=2.693, loss_scale=2, train_wall=11, gb_free=2.8, wall=16532 2021-06-18 23:14:29 | INFO | train_inner | epoch 001: 1493 / 3002 loss=2.941, ppl=7.68, wps=5860.8, ups=0.09, wpb=64817, bsz=128, num_updates=1479, lr=9.99962e-05, gnorm=2.683, loss_scale=2, train_wall=11, gb_free=2.8, wall=16544 2021-06-18 23:14:40 | INFO | train_inner | epoch 001: 1494 / 3002 loss=2.932, ppl=7.63, wps=5935.9, ups=0.09, wpb=64811, bsz=128, num_updates=1480, lr=9.99962e-05, gnorm=3.019, loss_scale=2, train_wall=10, gb_free=2.8, wall=16554 2021-06-18 23:14:51 | INFO | train_inner | epoch 001: 1495 / 3002 loss=2.861, ppl=7.27, wps=5841.3, ups=0.09, wpb=64860, bsz=128, num_updates=1481, lr=9.99962e-05, gnorm=2.503, loss_scale=2, train_wall=11, gb_free=2.8, wall=16566 2021-06-18 23:15:02 | INFO | train_inner | epoch 001: 1496 / 3002 loss=3.083, ppl=8.47, wps=5823.9, ups=0.09, wpb=64814, bsz=128, num_updates=1482, lr=9.99961e-05, gnorm=2.587, loss_scale=2, train_wall=11, gb_free=2.8, wall=16577 2021-06-18 23:15:13 | INFO | train_inner | epoch 001: 1497 / 3002 loss=3.024, ppl=8.13, wps=5864.6, ups=0.09, wpb=64868, bsz=128, num_updates=1483, lr=9.99961e-05, gnorm=2.577, loss_scale=2, train_wall=11, gb_free=2.8, wall=16588 2021-06-18 23:15:24 | INFO | train_inner | epoch 001: 1498 / 3002 loss=3.003, ppl=8.02, wps=5919.7, ups=0.09, wpb=64753, bsz=128, num_updates=1484, lr=9.99961e-05, gnorm=2.492, loss_scale=2, train_wall=10, gb_free=2.8, wall=16599 2021-06-18 23:15:35 | INFO | train_inner | epoch 001: 1499 / 3002 loss=3.03, ppl=8.17, wps=5852.3, ups=0.09, wpb=64755, bsz=128, num_updates=1485, lr=9.99961e-05, gnorm=2.461, loss_scale=2, train_wall=11, gb_free=2.8, wall=16610 2021-06-18 23:15:46 | INFO | train_inner | epoch 001: 1500 / 3002 loss=2.923, ppl=7.58, wps=5930.3, ups=0.09, wpb=64795, bsz=128, num_updates=1486, lr=9.99961e-05, gnorm=2.423, loss_scale=2, train_wall=10, gb_free=2.8, wall=16621 2021-06-18 23:15:57 | INFO | train_inner | epoch 001: 1501 / 3002 loss=2.876, ppl=7.34, wps=5830.3, ups=0.09, wpb=64886, bsz=128, num_updates=1487, lr=9.99961e-05, gnorm=2.511, loss_scale=2, train_wall=11, gb_free=2.8, wall=16632 2021-06-18 23:16:09 | INFO | train_inner | epoch 001: 1502 / 3002 loss=2.904, ppl=7.48, wps=5844.8, ups=0.09, wpb=64875, bsz=128, num_updates=1488, lr=9.99961e-05, gnorm=2.449, loss_scale=2, train_wall=11, gb_free=2.8, wall=16643 2021-06-18 23:16:20 | INFO | train_inner | epoch 001: 1503 / 3002 loss=3.058, ppl=8.33, wps=5856.8, ups=0.09, wpb=64875, bsz=128, num_updates=1489, lr=9.99961e-05, gnorm=2.506, loss_scale=4, train_wall=11, gb_free=2.8, wall=16654 2021-06-18 23:16:31 | INFO | train_inner | epoch 001: 1504 / 3002 loss=2.957, ppl=7.76, wps=5799.3, ups=0.09, wpb=64790, bsz=128, num_updates=1490, lr=9.99961e-05, gnorm=2.438, loss_scale=4, train_wall=11, gb_free=2.8, wall=16665 2021-06-18 23:16:42 | INFO | train_inner | epoch 001: 1505 / 3002 loss=3.08, ppl=8.46, wps=5863.2, ups=0.09, wpb=64825, bsz=128, num_updates=1491, lr=9.99961e-05, gnorm=2.49, loss_scale=4, train_wall=11, gb_free=2.8, wall=16676 2021-06-18 23:16:53 | INFO | train_inner | epoch 001: 1506 / 3002 loss=2.939, ppl=7.67, wps=5745.4, ups=0.09, wpb=64922, bsz=128, num_updates=1492, lr=9.99961e-05, gnorm=2.553, loss_scale=4, train_wall=11, gb_free=2.8, wall=16688 2021-06-18 23:17:04 | INFO | train_inner | epoch 001: 1507 / 3002 loss=2.919, ppl=7.56, wps=5766.3, ups=0.09, wpb=64834, bsz=128, num_updates=1493, lr=9.99961e-05, gnorm=2.413, loss_scale=4, train_wall=11, gb_free=2.8, wall=16699 2021-06-18 23:17:16 | INFO | train_inner | epoch 001: 1508 / 3002 loss=2.919, ppl=7.57, wps=5816.6, ups=0.09, wpb=64849, bsz=128, num_updates=1494, lr=9.9996e-05, gnorm=3.309, loss_scale=4, train_wall=11, gb_free=2.8, wall=16710 2021-06-18 23:17:27 | INFO | train_inner | epoch 001: 1509 / 3002 loss=2.929, ppl=7.62, wps=5900.2, ups=0.09, wpb=64858, bsz=128, num_updates=1495, lr=9.9996e-05, gnorm=2.484, loss_scale=4, train_wall=11, gb_free=2.8, wall=16721 2021-06-18 23:17:38 | INFO | train_inner | epoch 001: 1510 / 3002 loss=2.913, ppl=7.53, wps=5859.3, ups=0.09, wpb=64876, bsz=128, num_updates=1496, lr=9.9996e-05, gnorm=2.516, loss_scale=4, train_wall=11, gb_free=2.8, wall=16732 2021-06-18 23:17:49 | INFO | train_inner | epoch 001: 1511 / 3002 loss=3.011, ppl=8.06, wps=5748.7, ups=0.09, wpb=64789, bsz=128, num_updates=1497, lr=9.9996e-05, gnorm=2.541, loss_scale=4, train_wall=11, gb_free=2.8, wall=16743 2021-06-18 23:18:00 | INFO | train_inner | epoch 001: 1512 / 3002 loss=2.93, ppl=7.62, wps=5832.5, ups=0.09, wpb=64808, bsz=128, num_updates=1498, lr=9.9996e-05, gnorm=2.482, loss_scale=4, train_wall=11, gb_free=2.8, wall=16754 2021-06-18 23:18:11 | INFO | train_inner | epoch 001: 1513 / 3002 loss=3.058, ppl=8.33, wps=5799, ups=0.09, wpb=64886, bsz=128, num_updates=1499, lr=9.9996e-05, gnorm=2.574, loss_scale=4, train_wall=11, gb_free=2.8, wall=16766 2021-06-18 23:18:22 | INFO | train_inner | epoch 001: 1514 / 3002 loss=3.01, ppl=8.05, wps=5791.7, ups=0.09, wpb=64764, bsz=128, num_updates=1500, lr=9.9996e-05, gnorm=2.42, loss_scale=4, train_wall=11, gb_free=2.8, wall=16777 2021-06-18 23:18:33 | INFO | train_inner | epoch 001: 1515 / 3002 loss=3.023, ppl=8.13, wps=5891.3, ups=0.09, wpb=64859, bsz=128, num_updates=1501, lr=9.9996e-05, gnorm=2.789, loss_scale=4, train_wall=11, gb_free=2.8, wall=16788 2021-06-18 23:18:45 | INFO | train_inner | epoch 001: 1516 / 3002 loss=3.081, ppl=8.46, wps=5824.9, ups=0.09, wpb=64751, bsz=128, num_updates=1502, lr=9.9996e-05, gnorm=2.41, loss_scale=4, train_wall=11, gb_free=2.8, wall=16799 2021-06-18 23:18:56 | INFO | train_inner | epoch 001: 1517 / 3002 loss=3.128, ppl=8.74, wps=5841.3, ups=0.09, wpb=64848, bsz=128, num_updates=1503, lr=9.9996e-05, gnorm=2.479, loss_scale=4, train_wall=11, gb_free=2.8, wall=16810 2021-06-18 23:19:07 | INFO | train_inner | epoch 001: 1518 / 3002 loss=2.828, ppl=7.1, wps=5803.6, ups=0.09, wpb=64813, bsz=128, num_updates=1504, lr=9.9996e-05, gnorm=2.453, loss_scale=4, train_wall=11, gb_free=2.8, wall=16821 2021-06-18 23:19:18 | INFO | train_inner | epoch 001: 1519 / 3002 loss=2.852, ppl=7.22, wps=5826.7, ups=0.09, wpb=64835, bsz=128, num_updates=1505, lr=9.9996e-05, gnorm=3.142, loss_scale=4, train_wall=11, gb_free=2.8, wall=16832 2021-06-18 23:19:29 | INFO | train_inner | epoch 001: 1520 / 3002 loss=3.114, ppl=8.66, wps=5826.2, ups=0.09, wpb=64787, bsz=128, num_updates=1506, lr=9.9996e-05, gnorm=2.616, loss_scale=4, train_wall=11, gb_free=2.8, wall=16843 2021-06-18 23:19:40 | INFO | train_inner | epoch 001: 1521 / 3002 loss=3.062, ppl=8.35, wps=5840, ups=0.09, wpb=64793, bsz=128, num_updates=1507, lr=9.99959e-05, gnorm=2.571, loss_scale=4, train_wall=11, gb_free=2.8, wall=16854 2021-06-18 23:19:51 | INFO | train_inner | epoch 001: 1522 / 3002 loss=3.118, ppl=8.68, wps=5913.8, ups=0.09, wpb=64935, bsz=128, num_updates=1508, lr=9.99959e-05, gnorm=2.486, loss_scale=4, train_wall=11, gb_free=2.8, wall=16865 2021-06-18 23:20:02 | INFO | train_inner | epoch 001: 1523 / 3002 loss=2.976, ppl=7.87, wps=5957.4, ups=0.09, wpb=64849, bsz=128, num_updates=1509, lr=9.99959e-05, gnorm=2.565, loss_scale=4, train_wall=10, gb_free=2.8, wall=16876 2021-06-18 23:20:13 | INFO | train_inner | epoch 001: 1524 / 3002 loss=3.044, ppl=8.25, wps=5835.7, ups=0.09, wpb=64887, bsz=128, num_updates=1510, lr=9.99959e-05, gnorm=3.215, loss_scale=4, train_wall=11, gb_free=2.8, wall=16887 2021-06-18 23:20:24 | INFO | train_inner | epoch 001: 1525 / 3002 loss=3.209, ppl=9.25, wps=5946.4, ups=0.09, wpb=64845, bsz=128, num_updates=1511, lr=9.99959e-05, gnorm=3.285, loss_scale=4, train_wall=10, gb_free=2.8, wall=16898 2021-06-18 23:20:35 | INFO | train_inner | epoch 001: 1526 / 3002 loss=2.986, ppl=7.92, wps=5739.9, ups=0.09, wpb=64849, bsz=128, num_updates=1512, lr=9.99959e-05, gnorm=2.664, loss_scale=4, train_wall=11, gb_free=2.8, wall=16910 2021-06-18 23:20:47 | INFO | train_inner | epoch 001: 1527 / 3002 loss=2.978, ppl=7.88, wps=5775.7, ups=0.09, wpb=64843, bsz=128, num_updates=1513, lr=9.99959e-05, gnorm=2.604, loss_scale=4, train_wall=11, gb_free=2.8, wall=16921 2021-06-18 23:20:58 | INFO | train_inner | epoch 001: 1528 / 3002 loss=3.029, ppl=8.16, wps=5819.8, ups=0.09, wpb=64811, bsz=128, num_updates=1514, lr=9.99959e-05, gnorm=2.505, loss_scale=4, train_wall=11, gb_free=2.8, wall=16932 2021-06-18 23:21:09 | INFO | train_inner | epoch 001: 1529 / 3002 loss=2.96, ppl=7.78, wps=5939.8, ups=0.09, wpb=64927, bsz=128, num_updates=1515, lr=9.99959e-05, gnorm=2.462, loss_scale=4, train_wall=10, gb_free=2.8, wall=16943 2021-06-18 23:21:20 | INFO | train_inner | epoch 001: 1530 / 3002 loss=2.882, ppl=7.37, wps=5789.5, ups=0.09, wpb=64851, bsz=128, num_updates=1516, lr=9.99959e-05, gnorm=2.488, loss_scale=4, train_wall=11, gb_free=2.8, wall=16954 2021-06-18 23:21:31 | INFO | train_inner | epoch 001: 1531 / 3002 loss=3.128, ppl=8.74, wps=5822, ups=0.09, wpb=64777, bsz=128, num_updates=1517, lr=9.99959e-05, gnorm=2.512, loss_scale=4, train_wall=11, gb_free=2.8, wall=16965 2021-06-18 23:21:42 | INFO | train_inner | epoch 001: 1532 / 3002 loss=2.852, ppl=7.22, wps=5906.8, ups=0.09, wpb=64890, bsz=128, num_updates=1518, lr=9.99959e-05, gnorm=2.936, loss_scale=4, train_wall=10, gb_free=2.8, wall=16976 2021-06-18 23:21:53 | INFO | train_inner | epoch 001: 1533 / 3002 loss=3.022, ppl=8.12, wps=5837.1, ups=0.09, wpb=64792, bsz=128, num_updates=1519, lr=9.99958e-05, gnorm=3.681, loss_scale=4, train_wall=11, gb_free=2.8, wall=16987 2021-06-18 23:22:04 | INFO | train_inner | epoch 001: 1534 / 3002 loss=3.103, ppl=8.59, wps=5776.1, ups=0.09, wpb=64865, bsz=128, num_updates=1520, lr=9.99958e-05, gnorm=2.726, loss_scale=4, train_wall=11, gb_free=2.8, wall=16999 2021-06-18 23:22:15 | INFO | train_inner | epoch 001: 1535 / 3002 loss=2.928, ppl=7.61, wps=5854.8, ups=0.09, wpb=64835, bsz=128, num_updates=1521, lr=9.99958e-05, gnorm=2.499, loss_scale=4, train_wall=11, gb_free=2.8, wall=17010 2021-06-18 23:22:26 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-18 23:22:37 | INFO | train_inner | epoch 001: 1537 / 3002 loss=2.832, ppl=7.12, wps=2958.9, ups=0.05, wpb=64934, bsz=128, num_updates=1522, lr=9.99958e-05, gnorm=2.505, loss_scale=2, train_wall=21, gb_free=2.8, wall=17032 2021-06-18 23:22:48 | INFO | train_inner | epoch 001: 1538 / 3002 loss=3.109, ppl=8.63, wps=5823.9, ups=0.09, wpb=64789, bsz=128, num_updates=1523, lr=9.99958e-05, gnorm=2.497, loss_scale=2, train_wall=11, gb_free=2.8, wall=17043 2021-06-18 23:23:00 | INFO | train_inner | epoch 001: 1539 / 3002 loss=3.029, ppl=8.16, wps=5802.7, ups=0.09, wpb=64896, bsz=128, num_updates=1524, lr=9.99958e-05, gnorm=2.527, loss_scale=2, train_wall=11, gb_free=2.8, wall=17054 2021-06-18 23:23:11 | INFO | train_inner | epoch 001: 1540 / 3002 loss=3.283, ppl=9.73, wps=5838, ups=0.09, wpb=64867, bsz=128, num_updates=1525, lr=9.99958e-05, gnorm=2.507, loss_scale=2, train_wall=11, gb_free=2.8, wall=17065 2021-06-18 23:23:22 | INFO | train_inner | epoch 001: 1541 / 3002 loss=2.85, ppl=7.21, wps=5944.2, ups=0.09, wpb=64803, bsz=128, num_updates=1526, lr=9.99958e-05, gnorm=2.415, loss_scale=2, train_wall=10, gb_free=2.8, wall=17076 2021-06-18 23:23:33 | INFO | train_inner | epoch 001: 1542 / 3002 loss=2.908, ppl=7.51, wps=5842.1, ups=0.09, wpb=64849, bsz=128, num_updates=1527, lr=9.99958e-05, gnorm=2.587, loss_scale=2, train_wall=11, gb_free=2.8, wall=17087 2021-06-18 23:23:44 | INFO | train_inner | epoch 001: 1543 / 3002 loss=3.007, ppl=8.04, wps=5808, ups=0.09, wpb=64805, bsz=128, num_updates=1528, lr=9.99958e-05, gnorm=2.541, loss_scale=2, train_wall=11, gb_free=2.8, wall=17098 2021-06-18 23:23:55 | INFO | train_inner | epoch 001: 1544 / 3002 loss=2.976, ppl=7.87, wps=5709.8, ups=0.09, wpb=64773, bsz=128, num_updates=1529, lr=9.99958e-05, gnorm=2.386, loss_scale=2, train_wall=11, gb_free=2.8, wall=17110 2021-06-18 23:24:06 | INFO | train_inner | epoch 001: 1545 / 3002 loss=2.985, ppl=7.92, wps=5803.3, ups=0.09, wpb=64867, bsz=128, num_updates=1530, lr=9.99958e-05, gnorm=2.805, loss_scale=2, train_wall=11, gb_free=2.8, wall=17121 2021-06-18 23:24:17 | INFO | train_inner | epoch 001: 1546 / 3002 loss=3.025, ppl=8.14, wps=5940.3, ups=0.09, wpb=64820, bsz=128, num_updates=1531, lr=9.99958e-05, gnorm=2.498, loss_scale=2, train_wall=10, gb_free=2.8, wall=17132 2021-06-18 23:24:28 | INFO | train_inner | epoch 001: 1547 / 3002 loss=3.07, ppl=8.4, wps=5871, ups=0.09, wpb=64833, bsz=128, num_updates=1532, lr=9.99957e-05, gnorm=3.562, loss_scale=2, train_wall=11, gb_free=2.8, wall=17143 2021-06-18 23:24:39 | INFO | train_inner | epoch 001: 1548 / 3002 loss=2.849, ppl=7.2, wps=5836.9, ups=0.09, wpb=64854, bsz=128, num_updates=1533, lr=9.99957e-05, gnorm=2.586, loss_scale=2, train_wall=11, gb_free=2.8, wall=17154 2021-06-18 23:24:51 | INFO | train_inner | epoch 001: 1549 / 3002 loss=2.983, ppl=7.91, wps=5756, ups=0.09, wpb=64825, bsz=128, num_updates=1534, lr=9.99957e-05, gnorm=2.622, loss_scale=2, train_wall=11, gb_free=2.8, wall=17165 2021-06-18 23:25:01 | INFO | train_inner | epoch 001: 1550 / 3002 loss=3.115, ppl=8.66, wps=6056.1, ups=0.09, wpb=64812, bsz=128, num_updates=1535, lr=9.99957e-05, gnorm=9.555, loss_scale=2, train_wall=10, gb_free=2.8, wall=17176 2021-06-18 23:25:12 | INFO | train_inner | epoch 001: 1551 / 3002 loss=3.066, ppl=8.38, wps=5898.5, ups=0.09, wpb=64891, bsz=128, num_updates=1536, lr=9.99957e-05, gnorm=2.832, loss_scale=2, train_wall=11, gb_free=2.8, wall=17187 2021-06-18 23:25:23 | INFO | train_inner | epoch 001: 1552 / 3002 loss=2.996, ppl=7.98, wps=5871.5, ups=0.09, wpb=64845, bsz=128, num_updates=1537, lr=9.99957e-05, gnorm=2.488, loss_scale=2, train_wall=11, gb_free=2.8, wall=17198 2021-06-18 23:25:35 | INFO | train_inner | epoch 001: 1553 / 3002 loss=3.068, ppl=8.39, wps=5707.2, ups=0.09, wpb=64800, bsz=128, num_updates=1538, lr=9.99957e-05, gnorm=63.536, loss_scale=2, train_wall=11, gb_free=2.8, wall=17209 2021-06-18 23:25:46 | INFO | train_inner | epoch 001: 1554 / 3002 loss=3.097, ppl=8.56, wps=5784.1, ups=0.09, wpb=64812, bsz=128, num_updates=1539, lr=9.99957e-05, gnorm=4.141, loss_scale=2, train_wall=11, gb_free=2.8, wall=17220 2021-06-18 23:25:57 | INFO | train_inner | epoch 001: 1555 / 3002 loss=3.136, ppl=8.79, wps=5874.5, ups=0.09, wpb=64785, bsz=128, num_updates=1540, lr=9.99957e-05, gnorm=2.686, loss_scale=2, train_wall=11, gb_free=2.8, wall=17231 2021-06-18 23:26:08 | INFO | train_inner | epoch 001: 1556 / 3002 loss=2.872, ppl=7.32, wps=5880, ups=0.09, wpb=64799, bsz=128, num_updates=1541, lr=9.99957e-05, gnorm=2.623, loss_scale=2, train_wall=11, gb_free=2.8, wall=17242 2021-06-18 23:26:19 | INFO | train_inner | epoch 001: 1557 / 3002 loss=2.903, ppl=7.48, wps=5860.5, ups=0.09, wpb=64938, bsz=128, num_updates=1542, lr=9.99957e-05, gnorm=5.994, loss_scale=2, train_wall=11, gb_free=2.8, wall=17254 2021-06-18 23:26:30 | INFO | train_inner | epoch 001: 1558 / 3002 loss=3.02, ppl=8.11, wps=5819, ups=0.09, wpb=64880, bsz=128, num_updates=1543, lr=9.99957e-05, gnorm=2.853, loss_scale=2, train_wall=11, gb_free=2.8, wall=17265 2021-06-18 23:26:41 | INFO | train_inner | epoch 001: 1559 / 3002 loss=3.03, ppl=8.17, wps=5871.2, ups=0.09, wpb=64776, bsz=128, num_updates=1544, lr=9.99956e-05, gnorm=2.836, loss_scale=2, train_wall=11, gb_free=2.8, wall=17276 2021-06-18 23:26:52 | INFO | train_inner | epoch 001: 1560 / 3002 loss=3.035, ppl=8.19, wps=5849.1, ups=0.09, wpb=64824, bsz=128, num_updates=1545, lr=9.99956e-05, gnorm=5.599, loss_scale=2, train_wall=11, gb_free=2.8, wall=17287 2021-06-18 23:27:04 | INFO | train_inner | epoch 001: 1561 / 3002 loss=2.995, ppl=7.97, wps=5815.3, ups=0.09, wpb=64851, bsz=128, num_updates=1546, lr=9.99956e-05, gnorm=3.515, loss_scale=2, train_wall=11, gb_free=2.8, wall=17298 2021-06-18 23:27:15 | INFO | train_inner | epoch 001: 1562 / 3002 loss=3.098, ppl=8.56, wps=5919.2, ups=0.09, wpb=64812, bsz=128, num_updates=1547, lr=9.99956e-05, gnorm=2.885, loss_scale=2, train_wall=10, gb_free=2.8, wall=17309 2021-06-18 23:27:26 | INFO | train_inner | epoch 001: 1563 / 3002 loss=2.969, ppl=7.83, wps=5908.5, ups=0.09, wpb=64836, bsz=128, num_updates=1548, lr=9.99956e-05, gnorm=2.922, loss_scale=2, train_wall=11, gb_free=2.8, wall=17320 2021-06-18 23:27:37 | INFO | train_inner | epoch 001: 1564 / 3002 loss=2.841, ppl=7.16, wps=5852.6, ups=0.09, wpb=64877, bsz=128, num_updates=1549, lr=9.99956e-05, gnorm=3.033, loss_scale=2, train_wall=11, gb_free=2.8, wall=17331 2021-06-18 23:27:48 | INFO | train_inner | epoch 001: 1565 / 3002 loss=3.162, ppl=8.95, wps=5873.9, ups=0.09, wpb=64750, bsz=128, num_updates=1550, lr=9.99956e-05, gnorm=2.953, loss_scale=2, train_wall=11, gb_free=2.8, wall=17342 2021-06-18 23:27:59 | INFO | train_inner | epoch 001: 1566 / 3002 loss=3.138, ppl=8.8, wps=5851.7, ups=0.09, wpb=64800, bsz=128, num_updates=1551, lr=9.99956e-05, gnorm=2.782, loss_scale=2, train_wall=11, gb_free=2.8, wall=17353 2021-06-18 23:28:10 | INFO | train_inner | epoch 001: 1567 / 3002 loss=3.13, ppl=8.76, wps=5865.1, ups=0.09, wpb=64875, bsz=128, num_updates=1552, lr=9.99956e-05, gnorm=2.772, loss_scale=2, train_wall=11, gb_free=2.8, wall=17364 2021-06-18 23:28:21 | INFO | train_inner | epoch 001: 1568 / 3002 loss=3.091, ppl=8.52, wps=5787.4, ups=0.09, wpb=64794, bsz=128, num_updates=1553, lr=9.99956e-05, gnorm=2.71, loss_scale=2, train_wall=11, gb_free=2.8, wall=17375 2021-06-18 23:28:32 | INFO | train_inner | epoch 001: 1569 / 3002 loss=3.298, ppl=9.83, wps=5946, ups=0.09, wpb=64795, bsz=128, num_updates=1554, lr=9.99956e-05, gnorm=2.498, loss_scale=2, train_wall=10, gb_free=2.8, wall=17386 2021-06-18 23:28:43 | INFO | train_inner | epoch 001: 1570 / 3002 loss=3.198, ppl=9.18, wps=5816.5, ups=0.09, wpb=64808, bsz=128, num_updates=1555, lr=9.99956e-05, gnorm=13.703, loss_scale=2, train_wall=11, gb_free=2.8, wall=17397 2021-06-18 23:28:54 | INFO | train_inner | epoch 001: 1571 / 3002 loss=2.928, ppl=7.61, wps=5817, ups=0.09, wpb=64766, bsz=128, num_updates=1556, lr=9.99956e-05, gnorm=2.63, loss_scale=2, train_wall=11, gb_free=2.8, wall=17408 2021-06-18 23:29:05 | INFO | train_inner | epoch 001: 1572 / 3002 loss=3.218, ppl=9.3, wps=5814.6, ups=0.09, wpb=64677, bsz=128, num_updates=1557, lr=9.99955e-05, gnorm=2.692, loss_scale=2, train_wall=11, gb_free=2.8, wall=17420 2021-06-18 23:29:16 | INFO | train_inner | epoch 001: 1573 / 3002 loss=3.05, ppl=8.28, wps=5826.8, ups=0.09, wpb=64812, bsz=128, num_updates=1558, lr=9.99955e-05, gnorm=9.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=17431 2021-06-18 23:29:28 | INFO | train_inner | epoch 001: 1574 / 3002 loss=3.036, ppl=8.2, wps=5741.9, ups=0.09, wpb=64763, bsz=128, num_updates=1559, lr=9.99955e-05, gnorm=2.569, loss_scale=2, train_wall=11, gb_free=2.8, wall=17442 2021-06-18 23:29:38 | INFO | train_inner | epoch 001: 1575 / 3002 loss=3.206, ppl=9.23, wps=5985.7, ups=0.09, wpb=64886, bsz=128, num_updates=1560, lr=9.99955e-05, gnorm=3.495, loss_scale=2, train_wall=10, gb_free=2.8, wall=17453 2021-06-18 23:29:49 | INFO | train_inner | epoch 001: 1576 / 3002 loss=2.977, ppl=7.87, wps=5915.7, ups=0.09, wpb=64854, bsz=128, num_updates=1561, lr=9.99955e-05, gnorm=2.825, loss_scale=2, train_wall=11, gb_free=2.8, wall=17464 2021-06-18 23:30:00 | INFO | train_inner | epoch 001: 1577 / 3002 loss=2.991, ppl=7.95, wps=5880.1, ups=0.09, wpb=64856, bsz=128, num_updates=1562, lr=9.99955e-05, gnorm=2.716, loss_scale=2, train_wall=11, gb_free=2.8, wall=17475 2021-06-18 23:30:11 | INFO | train_inner | epoch 001: 1578 / 3002 loss=3.008, ppl=8.04, wps=5925.5, ups=0.09, wpb=64864, bsz=128, num_updates=1563, lr=9.99955e-05, gnorm=3.335, loss_scale=2, train_wall=10, gb_free=2.8, wall=17486 2021-06-18 23:30:23 | INFO | train_inner | epoch 001: 1579 / 3002 loss=3.049, ppl=8.28, wps=5849.7, ups=0.09, wpb=64844, bsz=128, num_updates=1564, lr=9.99955e-05, gnorm=2.854, loss_scale=2, train_wall=11, gb_free=2.8, wall=17497 2021-06-18 23:30:34 | INFO | train_inner | epoch 001: 1580 / 3002 loss=3.01, ppl=8.06, wps=5860.8, ups=0.09, wpb=64824, bsz=128, num_updates=1565, lr=9.99955e-05, gnorm=3.112, loss_scale=2, train_wall=11, gb_free=2.8, wall=17508 2021-06-18 23:30:45 | INFO | train_inner | epoch 001: 1581 / 3002 loss=2.981, ppl=7.9, wps=5866.9, ups=0.09, wpb=64756, bsz=128, num_updates=1566, lr=9.99955e-05, gnorm=2.643, loss_scale=2, train_wall=11, gb_free=2.8, wall=17519 2021-06-18 23:30:56 | INFO | train_inner | epoch 001: 1582 / 3002 loss=3.121, ppl=8.7, wps=5848.6, ups=0.09, wpb=64788, bsz=128, num_updates=1567, lr=9.99955e-05, gnorm=2.643, loss_scale=2, train_wall=11, gb_free=2.8, wall=17530 2021-06-18 23:31:07 | INFO | train_inner | epoch 001: 1583 / 3002 loss=2.983, ppl=7.9, wps=5909, ups=0.09, wpb=64693, bsz=128, num_updates=1568, lr=9.99955e-05, gnorm=2.561, loss_scale=2, train_wall=10, gb_free=2.8, wall=17541 2021-06-18 23:31:17 | INFO | train_inner | epoch 001: 1584 / 3002 loss=3.097, ppl=8.56, wps=5973.1, ups=0.09, wpb=64817, bsz=128, num_updates=1569, lr=9.99954e-05, gnorm=2.654, loss_scale=2, train_wall=10, gb_free=2.8, wall=17552 2021-06-18 23:31:29 | INFO | train_inner | epoch 001: 1585 / 3002 loss=2.993, ppl=7.96, wps=5826.8, ups=0.09, wpb=64859, bsz=128, num_updates=1570, lr=9.99954e-05, gnorm=2.581, loss_scale=2, train_wall=11, gb_free=2.8, wall=17563 2021-06-18 23:31:40 | INFO | train_inner | epoch 001: 1586 / 3002 loss=3.125, ppl=8.73, wps=5914.1, ups=0.09, wpb=64806, bsz=128, num_updates=1571, lr=9.99954e-05, gnorm=2.651, loss_scale=2, train_wall=10, gb_free=2.8, wall=17574 2021-06-18 23:31:51 | INFO | train_inner | epoch 001: 1587 / 3002 loss=3.026, ppl=8.14, wps=5772, ups=0.09, wpb=64815, bsz=128, num_updates=1572, lr=9.99954e-05, gnorm=2.628, loss_scale=2, train_wall=11, gb_free=2.8, wall=17585 2021-06-18 23:32:02 | INFO | train_inner | epoch 001: 1588 / 3002 loss=3.267, ppl=9.63, wps=5803.5, ups=0.09, wpb=64793, bsz=128, num_updates=1573, lr=9.99954e-05, gnorm=2.805, loss_scale=2, train_wall=11, gb_free=2.8, wall=17596 2021-06-18 23:32:13 | INFO | train_inner | epoch 001: 1589 / 3002 loss=3.244, ppl=9.47, wps=5896.9, ups=0.09, wpb=64815, bsz=128, num_updates=1574, lr=9.99954e-05, gnorm=2.505, loss_scale=2, train_wall=11, gb_free=2.8, wall=17607 2021-06-18 23:32:24 | INFO | train_inner | epoch 001: 1590 / 3002 loss=2.836, ppl=7.14, wps=5872.8, ups=0.09, wpb=64791, bsz=128, num_updates=1575, lr=9.99954e-05, gnorm=2.517, loss_scale=2, train_wall=11, gb_free=2.8, wall=17618 2021-06-18 23:32:35 | INFO | train_inner | epoch 001: 1591 / 3002 loss=3.066, ppl=8.37, wps=5924.2, ups=0.09, wpb=64799, bsz=128, num_updates=1576, lr=9.99954e-05, gnorm=2.697, loss_scale=2, train_wall=10, gb_free=2.8, wall=17629 2021-06-18 23:32:46 | INFO | train_inner | epoch 001: 1592 / 3002 loss=3.097, ppl=8.56, wps=5836.4, ups=0.09, wpb=64775, bsz=128, num_updates=1577, lr=9.99954e-05, gnorm=2.864, loss_scale=2, train_wall=11, gb_free=2.8, wall=17640 2021-06-18 23:32:57 | INFO | train_inner | epoch 001: 1593 / 3002 loss=3.105, ppl=8.6, wps=5828.4, ups=0.09, wpb=64776, bsz=128, num_updates=1578, lr=9.99954e-05, gnorm=2.58, loss_scale=2, train_wall=11, gb_free=2.8, wall=17651 2021-06-18 23:33:08 | INFO | train_inner | epoch 001: 1594 / 3002 loss=3.121, ppl=8.7, wps=5910.3, ups=0.09, wpb=64896, bsz=128, num_updates=1579, lr=9.99954e-05, gnorm=2.934, loss_scale=2, train_wall=11, gb_free=2.8, wall=17662 2021-06-18 23:33:19 | INFO | train_inner | epoch 001: 1595 / 3002 loss=2.948, ppl=7.72, wps=5824.1, ups=0.09, wpb=64788, bsz=128, num_updates=1580, lr=9.99954e-05, gnorm=3.248, loss_scale=2, train_wall=11, gb_free=2.8, wall=17674 2021-06-18 23:33:30 | INFO | train_inner | epoch 001: 1596 / 3002 loss=2.931, ppl=7.62, wps=5903.6, ups=0.09, wpb=64886, bsz=128, num_updates=1581, lr=9.99954e-05, gnorm=2.928, loss_scale=2, train_wall=11, gb_free=2.8, wall=17685 2021-06-18 23:33:41 | INFO | train_inner | epoch 001: 1597 / 3002 loss=3.123, ppl=8.71, wps=5798.1, ups=0.09, wpb=64739, bsz=128, num_updates=1582, lr=9.99953e-05, gnorm=2.533, loss_scale=2, train_wall=11, gb_free=2.8, wall=17696 2021-06-18 23:33:52 | INFO | train_inner | epoch 001: 1598 / 3002 loss=3.1, ppl=8.58, wps=5853.4, ups=0.09, wpb=64864, bsz=128, num_updates=1583, lr=9.99953e-05, gnorm=2.634, loss_scale=2, train_wall=11, gb_free=2.8, wall=17707 2021-06-18 23:34:04 | INFO | train_inner | epoch 001: 1599 / 3002 loss=3.007, ppl=8.04, wps=5766.4, ups=0.09, wpb=64785, bsz=128, num_updates=1584, lr=9.99953e-05, gnorm=2.631, loss_scale=2, train_wall=11, gb_free=2.8, wall=17718 2021-06-18 23:34:15 | INFO | train_inner | epoch 001: 1600 / 3002 loss=2.982, ppl=7.9, wps=5831.4, ups=0.09, wpb=64840, bsz=128, num_updates=1585, lr=9.99953e-05, gnorm=2.571, loss_scale=2, train_wall=11, gb_free=2.8, wall=17729 2021-06-18 23:34:26 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 2021-06-18 23:34:37 | INFO | train_inner | epoch 001: 1602 / 3002 loss=2.947, ppl=7.71, wps=2928.3, ups=0.05, wpb=64832, bsz=128, num_updates=1586, lr=9.99953e-05, gnorm=2.505, loss_scale=1, train_wall=21, gb_free=2.8, wall=17751 2021-06-18 23:34:48 | INFO | train_inner | epoch 001: 1603 / 3002 loss=2.917, ppl=7.55, wps=5953.6, ups=0.09, wpb=64832, bsz=128, num_updates=1587, lr=9.99953e-05, gnorm=2.646, loss_scale=1, train_wall=10, gb_free=2.8, wall=17762 2021-06-18 23:34:59 | INFO | train_inner | epoch 001: 1604 / 3002 loss=3.043, ppl=8.24, wps=5925.6, ups=0.09, wpb=64892, bsz=128, num_updates=1588, lr=9.99953e-05, gnorm=2.794, loss_scale=1, train_wall=11, gb_free=2.8, wall=17773 2021-06-18 23:35:10 | INFO | train_inner | epoch 001: 1605 / 3002 loss=2.92, ppl=7.57, wps=5988.5, ups=0.09, wpb=64954, bsz=128, num_updates=1589, lr=9.99953e-05, gnorm=2.495, loss_scale=1, train_wall=10, gb_free=2.8, wall=17784 2021-06-18 23:35:21 | INFO | train_inner | epoch 001: 1606 / 3002 loss=3.038, ppl=8.22, wps=5861.8, ups=0.09, wpb=64776, bsz=128, num_updates=1590, lr=9.99953e-05, gnorm=2.461, loss_scale=1, train_wall=11, gb_free=2.8, wall=17795 2021-06-18 23:35:32 | INFO | train_inner | epoch 001: 1607 / 3002 loss=2.975, ppl=7.86, wps=5848.4, ups=0.09, wpb=64907, bsz=128, num_updates=1591, lr=9.99953e-05, gnorm=2.59, loss_scale=1, train_wall=11, gb_free=2.8, wall=17806 2021-06-18 23:35:43 | INFO | train_inner | epoch 001: 1608 / 3002 loss=3.123, ppl=8.71, wps=5987.8, ups=0.09, wpb=64875, bsz=128, num_updates=1592, lr=9.99953e-05, gnorm=2.531, loss_scale=1, train_wall=10, gb_free=2.8, wall=17817 2021-06-18 23:35:54 | INFO | train_inner | epoch 001: 1609 / 3002 loss=3.114, ppl=8.66, wps=5775.2, ups=0.09, wpb=64739, bsz=128, num_updates=1593, lr=9.99953e-05, gnorm=9.578, loss_scale=1, train_wall=11, gb_free=2.8, wall=17828 2021-06-18 23:36:05 | INFO | train_inner | epoch 001: 1610 / 3002 loss=2.958, ppl=7.77, wps=5857.2, ups=0.09, wpb=64853, bsz=128, num_updates=1594, lr=9.99952e-05, gnorm=2.507, loss_scale=1, train_wall=11, gb_free=2.8, wall=17839 2021-06-18 23:36:16 | INFO | train_inner | epoch 001: 1611 / 3002 loss=2.963, ppl=7.8, wps=5917, ups=0.09, wpb=64843, bsz=128, num_updates=1595, lr=9.99952e-05, gnorm=2.722, loss_scale=1, train_wall=11, gb_free=2.8, wall=17850 2021-06-18 23:36:27 | INFO | train_inner | epoch 001: 1612 / 3002 loss=3.13, ppl=8.76, wps=5806.9, ups=0.09, wpb=64797, bsz=128, num_updates=1596, lr=9.99952e-05, gnorm=2.466, loss_scale=1, train_wall=11, gb_free=2.8, wall=17861 2021-06-18 23:36:38 | INFO | train_inner | epoch 001: 1613 / 3002 loss=3.272, ppl=9.66, wps=5790.6, ups=0.09, wpb=64791, bsz=128, num_updates=1597, lr=9.99952e-05, gnorm=2.613, loss_scale=1, train_wall=11, gb_free=2.8, wall=17873 2021-06-18 23:36:49 | INFO | train_inner | epoch 001: 1614 / 3002 loss=2.977, ppl=7.87, wps=5831.5, ups=0.09, wpb=64796, bsz=128, num_updates=1598, lr=9.99952e-05, gnorm=2.689, loss_scale=1, train_wall=11, gb_free=2.8, wall=17884 2021-06-18 23:37:00 | INFO | train_inner | epoch 001: 1615 / 3002 loss=3.005, ppl=8.03, wps=5838.8, ups=0.09, wpb=64858, bsz=128, num_updates=1599, lr=9.99952e-05, gnorm=3.288, loss_scale=1, train_wall=11, gb_free=2.8, wall=17895 2021-06-18 23:37:12 | INFO | train_inner | epoch 001: 1616 / 3002 loss=2.988, ppl=7.93, wps=5843.3, ups=0.09, wpb=64885, bsz=128, num_updates=1600, lr=9.99952e-05, gnorm=2.515, loss_scale=1, train_wall=11, gb_free=2.8, wall=17906 2021-06-18 23:37:23 | INFO | train_inner | epoch 001: 1617 / 3002 loss=3.044, ppl=8.25, wps=5834.7, ups=0.09, wpb=64732, bsz=128, num_updates=1601, lr=9.99952e-05, gnorm=2.598, loss_scale=1, train_wall=11, gb_free=2.8, wall=17917 2021-06-18 23:37:34 | INFO | train_inner | epoch 001: 1618 / 3002 loss=2.889, ppl=7.41, wps=5909.9, ups=0.09, wpb=64862, bsz=128, num_updates=1602, lr=9.99952e-05, gnorm=2.52, loss_scale=1, train_wall=11, gb_free=2.8, wall=17928 2021-06-18 23:37:45 | INFO | train_inner | epoch 001: 1619 / 3002 loss=3.14, ppl=8.82, wps=5849, ups=0.09, wpb=64832, bsz=128, num_updates=1603, lr=9.99952e-05, gnorm=5.379, loss_scale=1, train_wall=11, gb_free=2.8, wall=17939 2021-06-18 23:37:56 | INFO | train_inner | epoch 001: 1620 / 3002 loss=2.873, ppl=7.33, wps=5943.9, ups=0.09, wpb=64870, bsz=128, num_updates=1604, lr=9.99952e-05, gnorm=2.966, loss_scale=1, train_wall=10, gb_free=2.8, wall=17950 2021-06-18 23:38:07 | INFO | train_inner | epoch 001: 1621 / 3002 loss=3.091, ppl=8.52, wps=5907, ups=0.09, wpb=64906, bsz=128, num_updates=1605, lr=9.99952e-05, gnorm=2.656, loss_scale=1, train_wall=11, gb_free=2.8, wall=17961 2021-06-18 23:38:18 | INFO | train_inner | epoch 001: 1622 / 3002 loss=2.997, ppl=7.98, wps=5719.6, ups=0.09, wpb=64781, bsz=128, num_updates=1606, lr=9.99952e-05, gnorm=2.541, loss_scale=1, train_wall=11, gb_free=2.8, wall=17972 2021-06-18 23:38:29 | INFO | train_inner | epoch 001: 1623 / 3002 loss=3.034, ppl=8.19, wps=5860.4, ups=0.09, wpb=64793, bsz=128, num_updates=1607, lr=9.99951e-05, gnorm=2.558, loss_scale=1, train_wall=11, gb_free=2.8, wall=17983 2021-06-18 23:38:40 | INFO | train_inner | epoch 001: 1624 / 3002 loss=2.938, ppl=7.66, wps=5856.7, ups=0.09, wpb=64830, bsz=128, num_updates=1608, lr=9.99951e-05, gnorm=2.696, loss_scale=1, train_wall=11, gb_free=2.8, wall=17994 2021-06-18 23:38:51 | INFO | train_inner | epoch 001: 1625 / 3002 loss=2.958, ppl=7.77, wps=5876.3, ups=0.09, wpb=64857, bsz=128, num_updates=1609, lr=9.99951e-05, gnorm=8.196, loss_scale=1, train_wall=11, gb_free=2.8, wall=18005 2021-06-18 23:39:02 | INFO | train_inner | epoch 001: 1626 / 3002 loss=2.973, ppl=7.85, wps=5946.2, ups=0.09, wpb=64861, bsz=128, num_updates=1610, lr=9.99951e-05, gnorm=2.712, loss_scale=1, train_wall=10, gb_free=2.8, wall=18016 2021-06-18 23:39:13 | INFO | train_inner | epoch 001: 1627 / 3002 loss=3.331, ppl=10.07, wps=5892.3, ups=0.09, wpb=64926, bsz=128, num_updates=1611, lr=9.99951e-05, gnorm=2.87, loss_scale=1, train_wall=11, gb_free=2.8, wall=18027 2021-06-18 23:39:24 | INFO | train_inner | epoch 001: 1628 / 3002 loss=2.975, ppl=7.86, wps=5874.8, ups=0.09, wpb=64923, bsz=128, num_updates=1612, lr=9.99951e-05, gnorm=11.498, loss_scale=1, train_wall=11, gb_free=2.8, wall=18038 2021-06-18 23:39:35 | INFO | train_inner | epoch 001: 1629 / 3002 loss=2.99, ppl=7.95, wps=5819.3, ups=0.09, wpb=64834, bsz=128, num_updates=1613, lr=9.99951e-05, gnorm=2.663, loss_scale=1, train_wall=11, gb_free=2.8, wall=18050 2021-06-18 23:39:47 | INFO | train_inner | epoch 001: 1630 / 3002 loss=3.08, ppl=8.46, wps=5755.8, ups=0.09, wpb=64846, bsz=128, num_updates=1614, lr=9.99951e-05, gnorm=2.579, loss_scale=1, train_wall=11, gb_free=2.8, wall=18061 2021-06-18 23:39:58 | INFO | train_inner | epoch 001: 1631 / 3002 loss=3.05, ppl=8.28, wps=5827.1, ups=0.09, wpb=64842, bsz=128, num_updates=1615, lr=9.99951e-05, gnorm=2.534, loss_scale=1, train_wall=11, gb_free=2.8, wall=18072 2021-06-18 23:40:09 | INFO | train_inner | epoch 001: 1632 / 3002 loss=3.002, ppl=8.01, wps=5909.1, ups=0.09, wpb=64868, bsz=128, num_updates=1616, lr=9.99951e-05, gnorm=2.76, loss_scale=1, train_wall=11, gb_free=2.8, wall=18083 2021-06-18 23:40:20 | INFO | train_inner | epoch 001: 1633 / 3002 loss=2.986, ppl=7.92, wps=5825.6, ups=0.09, wpb=64847, bsz=128, num_updates=1617, lr=9.99951e-05, gnorm=3.307, loss_scale=1, train_wall=11, gb_free=2.8, wall=18094 2021-06-18 23:40:31 | INFO | train_inner | epoch 001: 1634 / 3002 loss=3.009, ppl=8.05, wps=5995.5, ups=0.09, wpb=64871, bsz=128, num_updates=1618, lr=9.99951e-05, gnorm=7.813, loss_scale=1, train_wall=10, gb_free=2.8, wall=18105 2021-06-18 23:40:42 | INFO | train_inner | epoch 001: 1635 / 3002 loss=3.034, ppl=8.19, wps=5836.6, ups=0.09, wpb=64721, bsz=128, num_updates=1619, lr=9.9995e-05, gnorm=2.487, loss_scale=1, train_wall=11, gb_free=2.8, wall=18116 2021-06-18 23:40:52 | INFO | train_inner | epoch 001: 1636 / 3002 loss=2.939, ppl=7.67, wps=6023.3, ups=0.09, wpb=64851, bsz=128, num_updates=1620, lr=9.9995e-05, gnorm=2.636, loss_scale=1, train_wall=10, gb_free=2.8, wall=18127 2021-06-18 23:41:04 | INFO | train_inner | epoch 001: 1637 / 3002 loss=2.944, ppl=7.7, wps=5822, ups=0.09, wpb=64716, bsz=128, num_updates=1621, lr=9.9995e-05, gnorm=2.672, loss_scale=1, train_wall=11, gb_free=2.8, wall=18138 2021-06-18 23:41:15 | INFO | train_inner | epoch 001: 1638 / 3002 loss=3.11, ppl=8.63, wps=5845.7, ups=0.09, wpb=64842, bsz=128, num_updates=1622, lr=9.9995e-05, gnorm=2.617, loss_scale=1, train_wall=11, gb_free=2.8, wall=18149 2021-06-18 23:41:26 | INFO | train_inner | epoch 001: 1639 / 3002 loss=3.062, ppl=8.35, wps=5892.3, ups=0.09, wpb=64816, bsz=128, num_updates=1623, lr=9.9995e-05, gnorm=2.669, loss_scale=1, train_wall=11, gb_free=2.8, wall=18160 2021-06-18 23:41:37 | INFO | train_inner | epoch 001: 1640 / 3002 loss=2.984, ppl=7.91, wps=5810.9, ups=0.09, wpb=64656, bsz=128, num_updates=1624, lr=9.9995e-05, gnorm=11.143, loss_scale=1, train_wall=11, gb_free=2.8, wall=18171 2021-06-18 23:41:48 | INFO | train_inner | epoch 001: 1641 / 3002 loss=3.009, ppl=8.05, wps=5854.1, ups=0.09, wpb=64862, bsz=128, num_updates=1625, lr=9.9995e-05, gnorm=2.843, loss_scale=1, train_wall=11, gb_free=2.8, wall=18182 2021-06-18 23:41:59 | INFO | train_inner | epoch 001: 1642 / 3002 loss=3, ppl=8, wps=5741.3, ups=0.09, wpb=64779, bsz=128, num_updates=1626, lr=9.9995e-05, gnorm=6.007, loss_scale=1, train_wall=11, gb_free=2.8, wall=18193 2021-06-18 23:42:10 | INFO | train_inner | epoch 001: 1643 / 3002 loss=2.84, ppl=7.16, wps=5934.6, ups=0.09, wpb=64881, bsz=128, num_updates=1627, lr=9.9995e-05, gnorm=2.57, loss_scale=1, train_wall=11, gb_free=2.8, wall=18204 2021-06-18 23:42:21 | INFO | train_inner | epoch 001: 1644 / 3002 loss=2.999, ppl=7.99, wps=5726, ups=0.09, wpb=64806, bsz=128, num_updates=1628, lr=9.9995e-05, gnorm=16.856, loss_scale=1, train_wall=11, gb_free=2.8, wall=18216 2021-06-18 23:42:33 | INFO | train_inner | epoch 001: 1645 / 3002 loss=3.106, ppl=8.61, wps=5796.2, ups=0.09, wpb=64746, bsz=128, num_updates=1629, lr=9.9995e-05, gnorm=3.088, loss_scale=1, train_wall=11, gb_free=2.8, wall=18227 2021-06-18 23:42:44 | INFO | train_inner | epoch 001: 1646 / 3002 loss=2.997, ppl=7.98, wps=5862.9, ups=0.09, wpb=64849, bsz=128, num_updates=1630, lr=9.9995e-05, gnorm=3.315, loss_scale=1, train_wall=11, gb_free=2.8, wall=18238 2021-06-18 23:42:55 | INFO | train_inner | epoch 001: 1647 / 3002 loss=3.07, ppl=8.4, wps=5840.1, ups=0.09, wpb=64767, bsz=128, num_updates=1631, lr=9.9995e-05, gnorm=3.858, loss_scale=1, train_wall=11, gb_free=2.8, wall=18249 2021-06-18 23:43:06 | INFO | train_inner | epoch 001: 1648 / 3002 loss=3.05, ppl=8.28, wps=5793.6, ups=0.09, wpb=64788, bsz=128, num_updates=1632, lr=9.99949e-05, gnorm=2.645, loss_scale=1, train_wall=11, gb_free=2.8, wall=18260 2021-06-18 23:43:17 | INFO | train_inner | epoch 001: 1649 / 3002 loss=2.983, ppl=7.9, wps=5859.6, ups=0.09, wpb=64751, bsz=128, num_updates=1633, lr=9.99949e-05, gnorm=2.652, loss_scale=1, train_wall=11, gb_free=2.8, wall=18271 2021-06-18 23:43:28 | INFO | train_inner | epoch 001: 1650 / 3002 loss=3.133, ppl=8.78, wps=5836, ups=0.09, wpb=64896, bsz=128, num_updates=1634, lr=9.99949e-05, gnorm=4.184, loss_scale=1, train_wall=11, gb_free=2.8, wall=18282 2021-06-18 23:43:39 | INFO | train_inner | epoch 001: 1651 / 3002 loss=3.144, ppl=8.84, wps=5887.4, ups=0.09, wpb=64886, bsz=128, num_updates=1635, lr=9.99949e-05, gnorm=2.628, loss_scale=1, train_wall=11, gb_free=2.8, wall=18293 2021-06-18 23:43:50 | INFO | train_inner | epoch 001: 1652 / 3002 loss=2.981, ppl=7.89, wps=5824.1, ups=0.09, wpb=64833, bsz=128, num_updates=1636, lr=9.99949e-05, gnorm=4.294, loss_scale=1, train_wall=11, gb_free=2.8, wall=18305 2021-06-18 23:44:01 | INFO | train_inner | epoch 001: 1653 / 3002 loss=3.044, ppl=8.25, wps=5905.2, ups=0.09, wpb=64821, bsz=128, num_updates=1637, lr=9.99949e-05, gnorm=2.91, loss_scale=1, train_wall=11, gb_free=2.8, wall=18316 2021-06-18 23:44:12 | INFO | train_inner | epoch 001: 1654 / 3002 loss=3.075, ppl=8.43, wps=5925.7, ups=0.09, wpb=64912, bsz=128, num_updates=1638, lr=9.99949e-05, gnorm=4.845, loss_scale=1, train_wall=10, gb_free=2.8, wall=18326 2021-06-18 23:44:23 | INFO | train_inner | epoch 001: 1655 / 3002 loss=2.987, ppl=7.93, wps=5896.3, ups=0.09, wpb=64816, bsz=128, num_updates=1639, lr=9.99949e-05, gnorm=2.97, loss_scale=1, train_wall=11, gb_free=2.8, wall=18337 2021-06-18 23:44:34 | INFO | train_inner | epoch 001: 1656 / 3002 loss=3.12, ppl=8.69, wps=5826.9, ups=0.09, wpb=64759, bsz=128, num_updates=1640, lr=9.99949e-05, gnorm=5.134, loss_scale=1, train_wall=11, gb_free=2.8, wall=18349 2021-06-18 23:44:45 | INFO | train_inner | epoch 001: 1657 / 3002 loss=3.091, ppl=8.52, wps=5825.4, ups=0.09, wpb=64746, bsz=128, num_updates=1641, lr=9.99949e-05, gnorm=3.586, loss_scale=1, train_wall=11, gb_free=2.8, wall=18360 2021-06-18 23:44:56 | INFO | train_inner | epoch 001: 1658 / 3002 loss=2.931, ppl=7.63, wps=5978.6, ups=0.09, wpb=64863, bsz=128, num_updates=1642, lr=9.99949e-05, gnorm=2.688, loss_scale=1, train_wall=10, gb_free=2.8, wall=18371 2021-06-18 23:45:07 | INFO | train_inner | epoch 001: 1659 / 3002 loss=2.987, ppl=7.93, wps=5835.5, ups=0.09, wpb=64851, bsz=128, num_updates=1643, lr=9.99949e-05, gnorm=5.319, loss_scale=1, train_wall=11, gb_free=2.8, wall=18382 2021-06-18 23:45:18 | INFO | train_inner | epoch 001: 1660 / 3002 loss=2.911, ppl=7.52, wps=5826.1, ups=0.09, wpb=64882, bsz=128, num_updates=1644, lr=9.99948e-05, gnorm=2.829, loss_scale=1, train_wall=11, gb_free=2.8, wall=18393 2021-06-18 23:45:30 | INFO | train_inner | epoch 001: 1661 / 3002 loss=3.115, ppl=8.67, wps=5846.8, ups=0.09, wpb=64809, bsz=128, num_updates=1645, lr=9.99948e-05, gnorm=3.361, loss_scale=1, train_wall=11, gb_free=2.8, wall=18404 2021-06-18 23:45:41 | INFO | train_inner | epoch 001: 1662 / 3002 loss=3.052, ppl=8.29, wps=5805.3, ups=0.09, wpb=64773, bsz=128, num_updates=1646, lr=9.99948e-05, gnorm=2.963, loss_scale=1, train_wall=11, gb_free=2.8, wall=18415 2021-06-18 23:45:52 | INFO | train_inner | epoch 001: 1663 / 3002 loss=2.971, ppl=7.84, wps=5816.4, ups=0.09, wpb=64757, bsz=128, num_updates=1647, lr=9.99948e-05, gnorm=20.616, loss_scale=1, train_wall=11, gb_free=2.8, wall=18426 2021-06-18 23:46:03 | INFO | train_inner | epoch 001: 1664 / 3002 loss=2.941, ppl=7.68, wps=5935.4, ups=0.09, wpb=64924, bsz=128, num_updates=1648, lr=9.99948e-05, gnorm=5.569, loss_scale=1, train_wall=10, gb_free=2.8, wall=18437 2021-06-18 23:46:14 | INFO | train_inner | epoch 001: 1665 / 3002 loss=3.153, ppl=8.89, wps=5828.2, ups=0.09, wpb=64916, bsz=128, num_updates=1649, lr=9.99948e-05, gnorm=2.974, loss_scale=1, train_wall=11, gb_free=2.8, wall=18448 2021-06-18 23:46:25 | INFO | train_inner | epoch 001: 1666 / 3002 loss=3.011, ppl=8.06, wps=5916.7, ups=0.09, wpb=64763, bsz=128, num_updates=1650, lr=9.99948e-05, gnorm=3.257, loss_scale=1, train_wall=10, gb_free=2.8, wall=18459 2021-06-18 23:46:36 | INFO | train_inner | epoch 001: 1667 / 3002 loss=3.12, ppl=8.69, wps=5811.7, ups=0.09, wpb=64862, bsz=128, num_updates=1651, lr=9.99948e-05, gnorm=4.284, loss_scale=1, train_wall=11, gb_free=2.8, wall=18470 2021-06-18 23:46:47 | INFO | train_inner | epoch 001: 1668 / 3002 loss=3.055, ppl=8.31, wps=5862.3, ups=0.09, wpb=64862, bsz=128, num_updates=1652, lr=9.99948e-05, gnorm=4.348, loss_scale=1, train_wall=11, gb_free=2.8, wall=18481 2021-06-18 23:46:58 | INFO | train_inner | epoch 001: 1669 / 3002 loss=3.234, ppl=9.41, wps=5818.6, ups=0.09, wpb=64826, bsz=128, num_updates=1653, lr=9.99948e-05, gnorm=3.104, loss_scale=1, train_wall=11, gb_free=2.8, wall=18493 2021-06-18 23:47:09 | INFO | train_inner | epoch 001: 1670 / 3002 loss=3.137, ppl=8.8, wps=5809.1, ups=0.09, wpb=64886, bsz=128, num_updates=1654, lr=9.99948e-05, gnorm=3.818, loss_scale=1, train_wall=11, gb_free=2.8, wall=18504 2021-06-18 23:47:21 | INFO | train_inner | epoch 001: 1671 / 3002 loss=3.073, ppl=8.41, wps=5838.5, ups=0.09, wpb=64854, bsz=128, num_updates=1655, lr=9.99948e-05, gnorm=13.437, loss_scale=1, train_wall=11, gb_free=2.8, wall=18515 2021-06-18 23:47:32 | INFO | train_inner | epoch 001: 1672 / 3002 loss=3.1, ppl=8.57, wps=5884.9, ups=0.09, wpb=64834, bsz=128, num_updates=1656, lr=9.99948e-05, gnorm=12.132, loss_scale=1, train_wall=11, gb_free=2.8, wall=18526 2021-06-18 23:47:43 | INFO | train_inner | epoch 001: 1673 / 3002 loss=3.026, ppl=8.14, wps=5895.2, ups=0.09, wpb=64812, bsz=128, num_updates=1657, lr=9.99947e-05, gnorm=4.085, loss_scale=1, train_wall=11, gb_free=2.8, wall=18537 2021-06-18 23:47:54 | INFO | train_inner | epoch 001: 1674 / 3002 loss=3.054, ppl=8.31, wps=5884.9, ups=0.09, wpb=64940, bsz=128, num_updates=1658, lr=9.99947e-05, gnorm=2.961, loss_scale=1, train_wall=11, gb_free=2.8, wall=18548 2021-06-18 23:48:05 | INFO | train_inner | epoch 001: 1675 / 3002 loss=3.039, ppl=8.22, wps=5797.6, ups=0.09, wpb=64808, bsz=128, num_updates=1659, lr=9.99947e-05, gnorm=2.963, loss_scale=1, train_wall=11, gb_free=2.8, wall=18559 2021-06-18 23:48:16 | INFO | train_inner | epoch 001: 1676 / 3002 loss=3.228, ppl=9.37, wps=5794.7, ups=0.09, wpb=64806, bsz=128, num_updates=1660, lr=9.99947e-05, gnorm=2.849, loss_scale=1, train_wall=11, gb_free=2.8, wall=18570 2021-06-18 23:48:27 | INFO | train_inner | epoch 001: 1677 / 3002 loss=3.028, ppl=8.16, wps=5756.2, ups=0.09, wpb=64778, bsz=128, num_updates=1661, lr=9.99947e-05, gnorm=3.304, loss_scale=1, train_wall=11, gb_free=2.8, wall=18582 2021-06-18 23:48:38 | INFO | train_inner | epoch 001: 1678 / 3002 loss=3.034, ppl=8.19, wps=5881, ups=0.09, wpb=64814, bsz=128, num_updates=1662, lr=9.99947e-05, gnorm=3.761, loss_scale=1, train_wall=11, gb_free=2.8, wall=18593 2021-06-18 23:48:49 | INFO | train_inner | epoch 001: 1679 / 3002 loss=3.173, ppl=9.02, wps=5914.4, ups=0.09, wpb=64851, bsz=128, num_updates=1663, lr=9.99947e-05, gnorm=3.492, loss_scale=1, train_wall=11, gb_free=2.8, wall=18603 2021-06-18 23:49:00 | INFO | train_inner | epoch 001: 1680 / 3002 loss=3.057, ppl=8.32, wps=5899.2, ups=0.09, wpb=64829, bsz=128, num_updates=1664, lr=9.99947e-05, gnorm=2.804, loss_scale=1, train_wall=11, gb_free=2.8, wall=18614 2021-06-18 23:49:11 | INFO | train_inner | epoch 001: 1681 / 3002 loss=3.116, ppl=8.67, wps=5812.3, ups=0.09, wpb=64846, bsz=128, num_updates=1665, lr=9.99947e-05, gnorm=2.75, loss_scale=1, train_wall=11, gb_free=2.8, wall=18626 2021-06-18 23:49:22 | INFO | train_inner | epoch 001: 1682 / 3002 loss=3.068, ppl=8.39, wps=5849.1, ups=0.09, wpb=64791, bsz=128, num_updates=1666, lr=9.99947e-05, gnorm=3.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=18637 2021-06-18 23:49:34 | INFO | train_inner | epoch 001: 1683 / 3002 loss=3.105, ppl=8.6, wps=5794.9, ups=0.09, wpb=64879, bsz=128, num_updates=1667, lr=9.99947e-05, gnorm=2.691, loss_scale=1, train_wall=11, gb_free=2.8, wall=18648 2021-06-18 23:49:45 | INFO | train_inner | epoch 001: 1684 / 3002 loss=2.862, ppl=7.27, wps=5747.8, ups=0.09, wpb=64855, bsz=128, num_updates=1668, lr=9.99947e-05, gnorm=2.928, loss_scale=1, train_wall=11, gb_free=2.8, wall=18659 2021-06-18 23:49:56 | INFO | train_inner | epoch 001: 1685 / 3002 loss=3.168, ppl=8.99, wps=5824.5, ups=0.09, wpb=64825, bsz=128, num_updates=1669, lr=9.99946e-05, gnorm=3.221, loss_scale=1, train_wall=11, gb_free=2.8, wall=18670 2021-06-18 23:50:07 | INFO | train_inner | epoch 001: 1686 / 3002 loss=2.915, ppl=7.54, wps=5791.5, ups=0.09, wpb=64832, bsz=128, num_updates=1670, lr=9.99946e-05, gnorm=3.035, loss_scale=1, train_wall=11, gb_free=2.8, wall=18682 2021-06-18 23:50:18 | INFO | train_inner | epoch 001: 1687 / 3002 loss=2.921, ppl=7.58, wps=5808.8, ups=0.09, wpb=64874, bsz=128, num_updates=1671, lr=9.99946e-05, gnorm=2.644, loss_scale=1, train_wall=11, gb_free=2.8, wall=18693 2021-06-18 23:50:29 | INFO | train_inner | epoch 001: 1688 / 3002 loss=2.942, ppl=7.69, wps=5829.1, ups=0.09, wpb=64792, bsz=128, num_updates=1672, lr=9.99946e-05, gnorm=7.437, loss_scale=1, train_wall=11, gb_free=2.8, wall=18704 2021-06-18 23:50:41 | INFO | train_inner | epoch 001: 1689 / 3002 loss=2.837, ppl=7.15, wps=5767.4, ups=0.09, wpb=64835, bsz=128, num_updates=1673, lr=9.99946e-05, gnorm=2.465, loss_scale=1, train_wall=11, gb_free=2.8, wall=18715 2021-06-18 23:50:52 | INFO | train_inner | epoch 001: 1690 / 3002 loss=2.98, ppl=7.89, wps=5902.4, ups=0.09, wpb=64870, bsz=128, num_updates=1674, lr=9.99946e-05, gnorm=2.502, loss_scale=1, train_wall=11, gb_free=2.8, wall=18726 2021-06-18 23:51:03 | INFO | train_inner | epoch 001: 1691 / 3002 loss=3.162, ppl=8.95, wps=5863.3, ups=0.09, wpb=64870, bsz=128, num_updates=1675, lr=9.99946e-05, gnorm=3.965, loss_scale=1, train_wall=11, gb_free=2.8, wall=18737 2021-06-18 23:51:14 | INFO | train_inner | epoch 001: 1692 / 3002 loss=3.092, ppl=8.53, wps=5963.9, ups=0.09, wpb=64807, bsz=128, num_updates=1676, lr=9.99946e-05, gnorm=2.678, loss_scale=1, train_wall=10, gb_free=2.8, wall=18748 2021-06-18 23:51:25 | INFO | train_inner | epoch 001: 1693 / 3002 loss=3.042, ppl=8.24, wps=5846.1, ups=0.09, wpb=64806, bsz=128, num_updates=1677, lr=9.99946e-05, gnorm=2.599, loss_scale=1, train_wall=11, gb_free=2.8, wall=18759 2021-06-18 23:51:36 | INFO | train_inner | epoch 001: 1694 / 3002 loss=3.108, ppl=8.62, wps=5772.1, ups=0.09, wpb=64775, bsz=128, num_updates=1678, lr=9.99946e-05, gnorm=2.721, loss_scale=1, train_wall=11, gb_free=2.8, wall=18770 2021-06-18 23:51:47 | INFO | train_inner | epoch 001: 1695 / 3002 loss=3.131, ppl=8.76, wps=5924, ups=0.09, wpb=64758, bsz=128, num_updates=1679, lr=9.99946e-05, gnorm=2.688, loss_scale=1, train_wall=10, gb_free=2.8, wall=18781 2021-06-18 23:51:58 | INFO | train_inner | epoch 001: 1696 / 3002 loss=2.915, ppl=7.54, wps=5789.5, ups=0.09, wpb=64698, bsz=128, num_updates=1680, lr=9.99946e-05, gnorm=2.714, loss_scale=1, train_wall=11, gb_free=2.8, wall=18792 2021-06-18 23:52:09 | INFO | train_inner | epoch 001: 1697 / 3002 loss=3.01, ppl=8.06, wps=5880.6, ups=0.09, wpb=64905, bsz=128, num_updates=1681, lr=9.99946e-05, gnorm=6.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=18803 2021-06-18 23:52:20 | INFO | train_inner | epoch 001: 1698 / 3002 loss=3.076, ppl=8.43, wps=5820.5, ups=0.09, wpb=64869, bsz=128, num_updates=1682, lr=9.99945e-05, gnorm=2.641, loss_scale=1, train_wall=11, gb_free=2.8, wall=18815 2021-06-18 23:52:31 | INFO | train_inner | epoch 001: 1699 / 3002 loss=3.104, ppl=8.6, wps=5805.9, ups=0.09, wpb=64745, bsz=128, num_updates=1683, lr=9.99945e-05, gnorm=2.617, loss_scale=1, train_wall=11, gb_free=2.8, wall=18826 2021-06-18 23:52:42 | INFO | train_inner | epoch 001: 1700 / 3002 loss=3.042, ppl=8.24, wps=5870.2, ups=0.09, wpb=64781, bsz=128, num_updates=1684, lr=9.99945e-05, gnorm=2.468, loss_scale=1, train_wall=11, gb_free=2.8, wall=18837 2021-06-18 23:52:53 | INFO | train_inner | epoch 001: 1701 / 3002 loss=2.931, ppl=7.63, wps=5945.4, ups=0.09, wpb=64927, bsz=128, num_updates=1685, lr=9.99945e-05, gnorm=2.513, loss_scale=1, train_wall=10, gb_free=2.8, wall=18848 2021-06-18 23:53:04 | INFO | train_inner | epoch 001: 1702 / 3002 loss=2.978, ppl=7.88, wps=5911.8, ups=0.09, wpb=64832, bsz=128, num_updates=1686, lr=9.99945e-05, gnorm=2.724, loss_scale=1, train_wall=11, gb_free=2.8, wall=18859 2021-06-18 23:53:16 | INFO | train_inner | epoch 001: 1703 / 3002 loss=3.084, ppl=8.48, wps=5761.8, ups=0.09, wpb=64793, bsz=128, num_updates=1687, lr=9.99945e-05, gnorm=64.165, loss_scale=1, train_wall=11, gb_free=2.8, wall=18870 2021-06-18 23:53:27 | INFO | train_inner | epoch 001: 1704 / 3002 loss=2.933, ppl=7.64, wps=5881.1, ups=0.09, wpb=64854, bsz=128, num_updates=1688, lr=9.99945e-05, gnorm=2.794, loss_scale=1, train_wall=11, gb_free=2.8, wall=18881 2021-06-18 23:53:37 | INFO | train_inner | epoch 001: 1705 / 3002 loss=2.982, ppl=7.9, wps=5965.7, ups=0.09, wpb=64797, bsz=128, num_updates=1689, lr=9.99945e-05, gnorm=2.75, loss_scale=1, train_wall=10, gb_free=2.8, wall=18892 2021-06-18 23:53:48 | INFO | train_inner | epoch 001: 1706 / 3002 loss=2.961, ppl=7.79, wps=5921.9, ups=0.09, wpb=64793, bsz=128, num_updates=1690, lr=9.99945e-05, gnorm=2.787, loss_scale=1, train_wall=10, gb_free=2.8, wall=18903 2021-06-18 23:54:00 | INFO | train_inner | epoch 001: 1707 / 3002 loss=3.134, ppl=8.78, wps=5828, ups=0.09, wpb=64864, bsz=128, num_updates=1691, lr=9.99945e-05, gnorm=3.538, loss_scale=1, train_wall=11, gb_free=2.8, wall=18914 2021-06-18 23:54:11 | INFO | train_inner | epoch 001: 1708 / 3002 loss=3.175, ppl=9.03, wps=5756.5, ups=0.09, wpb=64818, bsz=128, num_updates=1692, lr=9.99945e-05, gnorm=2.91, loss_scale=1, train_wall=11, gb_free=2.8, wall=18925 2021-06-18 23:54:22 | INFO | train_inner | epoch 001: 1709 / 3002 loss=3.096, ppl=8.55, wps=5789.9, ups=0.09, wpb=64767, bsz=128, num_updates=1693, lr=9.99945e-05, gnorm=3.989, loss_scale=1, train_wall=11, gb_free=2.8, wall=18936 2021-06-18 23:54:33 | INFO | train_inner | epoch 001: 1710 / 3002 loss=2.906, ppl=7.5, wps=5920.9, ups=0.09, wpb=64824, bsz=128, num_updates=1694, lr=9.99944e-05, gnorm=5.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=18947 2021-06-18 23:54:44 | INFO | train_inner | epoch 001: 1711 / 3002 loss=3.212, ppl=9.27, wps=5838.9, ups=0.09, wpb=64796, bsz=128, num_updates=1695, lr=9.99944e-05, gnorm=4.767, loss_scale=1, train_wall=11, gb_free=2.8, wall=18958 2021-06-18 23:54:55 | INFO | train_inner | epoch 001: 1712 / 3002 loss=3.172, ppl=9.01, wps=5775.1, ups=0.09, wpb=64784, bsz=128, num_updates=1696, lr=9.99944e-05, gnorm=3.241, loss_scale=1, train_wall=11, gb_free=2.8, wall=18970 2021-06-18 23:55:06 | INFO | train_inner | epoch 001: 1713 / 3002 loss=3.097, ppl=8.56, wps=5837.5, ups=0.09, wpb=64810, bsz=128, num_updates=1697, lr=9.99944e-05, gnorm=3.108, loss_scale=1, train_wall=11, gb_free=2.8, wall=18981 2021-06-18 23:55:17 | INFO | train_inner | epoch 001: 1714 / 3002 loss=3.057, ppl=8.32, wps=5906, ups=0.09, wpb=64903, bsz=128, num_updates=1698, lr=9.99944e-05, gnorm=2.999, loss_scale=1, train_wall=11, gb_free=2.8, wall=18992 2021-06-18 23:55:28 | INFO | train_inner | epoch 001: 1715 / 3002 loss=3.107, ppl=8.62, wps=5862.2, ups=0.09, wpb=64805, bsz=128, num_updates=1699, lr=9.99944e-05, gnorm=4.313, loss_scale=1, train_wall=11, gb_free=2.8, wall=19003 2021-06-18 23:55:40 | INFO | train_inner | epoch 001: 1716 / 3002 loss=3.109, ppl=8.63, wps=5820.1, ups=0.09, wpb=64881, bsz=128, num_updates=1700, lr=9.99944e-05, gnorm=26.696, loss_scale=1, train_wall=11, gb_free=2.8, wall=19014 2021-06-18 23:55:51 | INFO | train_inner | epoch 001: 1717 / 3002 loss=3.18, ppl=9.06, wps=5839.1, ups=0.09, wpb=64818, bsz=128, num_updates=1701, lr=9.99944e-05, gnorm=2.94, loss_scale=1, train_wall=11, gb_free=2.8, wall=19025 2021-06-18 23:56:02 | INFO | train_inner | epoch 001: 1718 / 3002 loss=3.432, ppl=10.8, wps=5852.7, ups=0.09, wpb=64716, bsz=128, num_updates=1702, lr=9.99944e-05, gnorm=3.374, loss_scale=1, train_wall=11, gb_free=2.8, wall=19036 2021-06-18 23:56:13 | INFO | train_inner | epoch 001: 1719 / 3002 loss=3.067, ppl=8.38, wps=5843, ups=0.09, wpb=64866, bsz=128, num_updates=1703, lr=9.99944e-05, gnorm=3.578, loss_scale=1, train_wall=11, gb_free=2.8, wall=19047 2021-06-18 23:56:24 | INFO | train_inner | epoch 001: 1720 / 3002 loss=3.183, ppl=9.08, wps=5775.7, ups=0.09, wpb=64794, bsz=128, num_updates=1704, lr=9.99944e-05, gnorm=8.423, loss_scale=1, train_wall=11, gb_free=2.8, wall=19058 2021-06-18 23:56:35 | INFO | train_inner | epoch 001: 1721 / 3002 loss=3.381, ppl=10.41, wps=5896.3, ups=0.09, wpb=64786, bsz=128, num_updates=1705, lr=9.99944e-05, gnorm=3.839, loss_scale=1, train_wall=11, gb_free=2.8, wall=19069 2021-06-18 23:56:46 | INFO | train_inner | epoch 001: 1722 / 3002 loss=3.31, ppl=9.92, wps=5888.1, ups=0.09, wpb=64777, bsz=128, num_updates=1706, lr=9.99944e-05, gnorm=10.966, loss_scale=1, train_wall=11, gb_free=2.8, wall=19080 2021-06-18 23:56:57 | INFO | train_inner | epoch 001: 1723 / 3002 loss=3.52, ppl=11.47, wps=5882.7, ups=0.09, wpb=64821, bsz=128, num_updates=1707, lr=9.99943e-05, gnorm=4.222, loss_scale=1, train_wall=11, gb_free=2.8, wall=19091 2021-06-18 23:57:08 | INFO | train_inner | epoch 001: 1724 / 3002 loss=3.735, ppl=13.32, wps=5923.7, ups=0.09, wpb=64842, bsz=128, num_updates=1708, lr=9.99943e-05, gnorm=9.778, loss_scale=1, train_wall=10, gb_free=2.8, wall=19102 2021-06-18 23:57:19 | INFO | train_inner | epoch 001: 1725 / 3002 loss=3.844, ppl=14.36, wps=5792.6, ups=0.09, wpb=64824, bsz=128, num_updates=1709, lr=9.99943e-05, gnorm=16.061, loss_scale=1, train_wall=11, gb_free=2.8, wall=19113 2021-06-18 23:57:30 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 2021-06-18 23:57:41 | INFO | train_inner | epoch 001: 1727 / 3002 loss=4.135, ppl=17.57, wps=2929.5, ups=0.05, wpb=64719, bsz=128, num_updates=1710, lr=9.99943e-05, gnorm=10.088, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=19136 2021-06-18 23:57:52 | INFO | train_inner | epoch 001: 1728 / 3002 loss=4.113, ppl=17.31, wps=5938, ups=0.09, wpb=64940, bsz=128, num_updates=1711, lr=9.99943e-05, gnorm=45.337, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=19147 2021-06-18 23:58:03 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 2021-06-18 23:58:14 | INFO | train_inner | epoch 001: 1730 / 3002 loss=3.665, ppl=12.69, wps=2936.3, ups=0.05, wpb=64953, bsz=128, num_updates=1712, lr=9.99943e-05, gnorm=7.258, loss_scale=0.25, train_wall=21, gb_free=2.8, wall=19169 2021-06-18 23:58:25 | INFO | train_inner | epoch 001: 1731 / 3002 loss=3.702, ppl=13.01, wps=5935.6, ups=0.09, wpb=64871, bsz=128, num_updates=1713, lr=9.99943e-05, gnorm=9.71, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19180 2021-06-18 23:58:36 | INFO | train_inner | epoch 001: 1732 / 3002 loss=3.404, ppl=10.58, wps=5752, ups=0.09, wpb=64858, bsz=128, num_updates=1714, lr=9.99943e-05, gnorm=6.149, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19191 2021-06-18 23:58:47 | INFO | train_inner | epoch 001: 1733 / 3002 loss=3.257, ppl=9.56, wps=5954.8, ups=0.09, wpb=64852, bsz=128, num_updates=1715, lr=9.99943e-05, gnorm=3.413, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19202 2021-06-18 23:58:59 | INFO | train_inner | epoch 001: 1734 / 3002 loss=3.295, ppl=9.82, wps=5782.1, ups=0.09, wpb=64857, bsz=128, num_updates=1716, lr=9.99943e-05, gnorm=3.314, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19213 2021-06-18 23:59:10 | INFO | train_inner | epoch 001: 1735 / 3002 loss=3.201, ppl=9.2, wps=5782.1, ups=0.09, wpb=64789, bsz=128, num_updates=1717, lr=9.99943e-05, gnorm=4.598, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19224 2021-06-18 23:59:21 | INFO | train_inner | epoch 001: 1736 / 3002 loss=3.194, ppl=9.15, wps=5917, ups=0.09, wpb=64852, bsz=128, num_updates=1718, lr=9.99943e-05, gnorm=35.067, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19235 2021-06-18 23:59:32 | INFO | train_inner | epoch 001: 1737 / 3002 loss=3.171, ppl=9.01, wps=5843.2, ups=0.09, wpb=64874, bsz=128, num_updates=1719, lr=9.99942e-05, gnorm=2.931, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19246 2021-06-18 23:59:43 | INFO | train_inner | epoch 001: 1738 / 3002 loss=3.152, ppl=8.89, wps=5892.1, ups=0.09, wpb=64907, bsz=128, num_updates=1720, lr=9.99942e-05, gnorm=3.115, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19257 2021-06-18 23:59:54 | INFO | train_inner | epoch 001: 1739 / 3002 loss=3.033, ppl=8.19, wps=5848.6, ups=0.09, wpb=64912, bsz=128, num_updates=1721, lr=9.99942e-05, gnorm=3.471, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19268 2021-06-19 00:00:05 | INFO | train_inner | epoch 001: 1740 / 3002 loss=3.237, ppl=9.43, wps=5976.3, ups=0.09, wpb=64817, bsz=128, num_updates=1722, lr=9.99942e-05, gnorm=4.523, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19279 2021-06-19 00:00:16 | INFO | train_inner | epoch 001: 1741 / 3002 loss=3.224, ppl=9.34, wps=5956.6, ups=0.09, wpb=64859, bsz=128, num_updates=1723, lr=9.99942e-05, gnorm=3.378, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19290 2021-06-19 00:00:27 | INFO | train_inner | epoch 001: 1742 / 3002 loss=3.151, ppl=8.88, wps=5810.4, ups=0.09, wpb=64905, bsz=128, num_updates=1724, lr=9.99942e-05, gnorm=3.436, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19301 2021-06-19 00:00:38 | INFO | train_inner | epoch 001: 1743 / 3002 loss=3.118, ppl=8.68, wps=5902.1, ups=0.09, wpb=64854, bsz=128, num_updates=1725, lr=9.99942e-05, gnorm=2.942, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19312 2021-06-19 00:00:49 | INFO | train_inner | epoch 001: 1744 / 3002 loss=3.193, ppl=9.15, wps=5813.5, ups=0.09, wpb=64898, bsz=128, num_updates=1726, lr=9.99942e-05, gnorm=3.021, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19323 2021-06-19 00:01:00 | INFO | train_inner | epoch 001: 1745 / 3002 loss=3.161, ppl=8.95, wps=5835.7, ups=0.09, wpb=64852, bsz=128, num_updates=1727, lr=9.99942e-05, gnorm=3.31, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19334 2021-06-19 00:01:11 | INFO | train_inner | epoch 001: 1746 / 3002 loss=2.996, ppl=7.98, wps=5867.4, ups=0.09, wpb=64839, bsz=128, num_updates=1728, lr=9.99942e-05, gnorm=2.993, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19346 2021-06-19 00:01:22 | INFO | train_inner | epoch 001: 1747 / 3002 loss=3.186, ppl=9.1, wps=5892.9, ups=0.09, wpb=64756, bsz=128, num_updates=1729, lr=9.99942e-05, gnorm=2.918, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19357 2021-06-19 00:01:34 | INFO | train_inner | epoch 001: 1748 / 3002 loss=3.056, ppl=8.31, wps=5689.9, ups=0.09, wpb=64788, bsz=128, num_updates=1730, lr=9.99942e-05, gnorm=3.052, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19368 2021-06-19 00:01:45 | INFO | train_inner | epoch 001: 1749 / 3002 loss=3.041, ppl=8.23, wps=5875.2, ups=0.09, wpb=64886, bsz=128, num_updates=1731, lr=9.99942e-05, gnorm=3.623, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19379 2021-06-19 00:01:56 | INFO | train_inner | epoch 001: 1750 / 3002 loss=3.282, ppl=9.73, wps=5902, ups=0.09, wpb=64848, bsz=128, num_updates=1732, lr=9.99941e-05, gnorm=3.898, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19390 2021-06-19 00:02:06 | INFO | train_inner | epoch 001: 1751 / 3002 loss=3.043, ppl=8.24, wps=5997, ups=0.09, wpb=64853, bsz=128, num_updates=1733, lr=9.99941e-05, gnorm=2.746, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19401 2021-06-19 00:02:17 | INFO | train_inner | epoch 001: 1752 / 3002 loss=3.084, ppl=8.48, wps=5918.5, ups=0.09, wpb=64860, bsz=128, num_updates=1734, lr=9.99941e-05, gnorm=3.193, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19412 2021-06-19 00:02:28 | INFO | train_inner | epoch 001: 1753 / 3002 loss=3.2, ppl=9.19, wps=5969.2, ups=0.09, wpb=64782, bsz=128, num_updates=1735, lr=9.99941e-05, gnorm=2.82, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19423 2021-06-19 00:02:39 | INFO | train_inner | epoch 001: 1754 / 3002 loss=3.351, ppl=10.2, wps=5854.7, ups=0.09, wpb=64757, bsz=128, num_updates=1736, lr=9.99941e-05, gnorm=2.8, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19434 2021-06-19 00:02:50 | INFO | train_inner | epoch 001: 1755 / 3002 loss=3.295, ppl=9.82, wps=5884.9, ups=0.09, wpb=64749, bsz=128, num_updates=1737, lr=9.99941e-05, gnorm=2.881, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19445 2021-06-19 00:03:01 | INFO | train_inner | epoch 001: 1756 / 3002 loss=3.044, ppl=8.25, wps=5863.5, ups=0.09, wpb=64793, bsz=128, num_updates=1738, lr=9.99941e-05, gnorm=2.78, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19456 2021-06-19 00:03:12 | INFO | train_inner | epoch 001: 1757 / 3002 loss=2.934, ppl=7.64, wps=5993.3, ups=0.09, wpb=64916, bsz=128, num_updates=1739, lr=9.99941e-05, gnorm=3.69, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19467 2021-06-19 00:03:23 | INFO | train_inner | epoch 001: 1758 / 3002 loss=2.981, ppl=7.89, wps=5794.3, ups=0.09, wpb=64853, bsz=128, num_updates=1740, lr=9.99941e-05, gnorm=2.846, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19478 2021-06-19 00:03:35 | INFO | train_inner | epoch 001: 1759 / 3002 loss=2.977, ppl=7.87, wps=5815.9, ups=0.09, wpb=64812, bsz=128, num_updates=1741, lr=9.99941e-05, gnorm=2.578, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19489 2021-06-19 00:03:46 | INFO | train_inner | epoch 001: 1760 / 3002 loss=3.139, ppl=8.81, wps=5841.2, ups=0.09, wpb=64792, bsz=128, num_updates=1742, lr=9.99941e-05, gnorm=3.855, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19500 2021-06-19 00:03:57 | INFO | train_inner | epoch 001: 1761 / 3002 loss=2.986, ppl=7.92, wps=5942.4, ups=0.09, wpb=64829, bsz=128, num_updates=1743, lr=9.99941e-05, gnorm=2.547, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19511 2021-06-19 00:04:08 | INFO | train_inner | epoch 001: 1762 / 3002 loss=3.107, ppl=8.61, wps=5894.2, ups=0.09, wpb=64758, bsz=128, num_updates=1744, lr=9.9994e-05, gnorm=3.229, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19522 2021-06-19 00:04:19 | INFO | train_inner | epoch 001: 1763 / 3002 loss=3.056, ppl=8.31, wps=5749.4, ups=0.09, wpb=64785, bsz=128, num_updates=1745, lr=9.9994e-05, gnorm=2.599, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19533 2021-06-19 00:04:30 | INFO | train_inner | epoch 001: 1764 / 3002 loss=3.028, ppl=8.16, wps=5858.1, ups=0.09, wpb=64962, bsz=128, num_updates=1746, lr=9.9994e-05, gnorm=3.204, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19544 2021-06-19 00:04:41 | INFO | train_inner | epoch 001: 1765 / 3002 loss=3.057, ppl=8.32, wps=5767, ups=0.09, wpb=64723, bsz=128, num_updates=1747, lr=9.9994e-05, gnorm=2.601, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19555 2021-06-19 00:04:52 | INFO | train_inner | epoch 001: 1766 / 3002 loss=3.051, ppl=8.29, wps=5840, ups=0.09, wpb=64882, bsz=128, num_updates=1748, lr=9.9994e-05, gnorm=2.824, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19567 2021-06-19 00:05:04 | INFO | train_inner | epoch 001: 1767 / 3002 loss=2.97, ppl=7.84, wps=5707, ups=0.09, wpb=64786, bsz=128, num_updates=1749, lr=9.9994e-05, gnorm=2.62, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19578 2021-06-19 00:05:15 | INFO | train_inner | epoch 001: 1768 / 3002 loss=3.088, ppl=8.5, wps=5694.9, ups=0.09, wpb=64791, bsz=128, num_updates=1750, lr=9.9994e-05, gnorm=2.521, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19589 2021-06-19 00:05:26 | INFO | train_inner | epoch 001: 1769 / 3002 loss=3.04, ppl=8.23, wps=5872.7, ups=0.09, wpb=64804, bsz=128, num_updates=1751, lr=9.9994e-05, gnorm=3.4, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19600 2021-06-19 00:05:37 | INFO | train_inner | epoch 001: 1770 / 3002 loss=3.122, ppl=8.7, wps=5875.3, ups=0.09, wpb=64823, bsz=128, num_updates=1752, lr=9.9994e-05, gnorm=2.83, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19611 2021-06-19 00:05:48 | INFO | train_inner | epoch 001: 1771 / 3002 loss=3.049, ppl=8.27, wps=5813.8, ups=0.09, wpb=64779, bsz=128, num_updates=1753, lr=9.9994e-05, gnorm=3.766, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19622 2021-06-19 00:05:59 | INFO | train_inner | epoch 001: 1772 / 3002 loss=2.91, ppl=7.52, wps=6013.3, ups=0.09, wpb=64935, bsz=128, num_updates=1754, lr=9.9994e-05, gnorm=2.731, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19633 2021-06-19 00:06:10 | INFO | train_inner | epoch 001: 1773 / 3002 loss=3.032, ppl=8.18, wps=5822.2, ups=0.09, wpb=64770, bsz=128, num_updates=1755, lr=9.9994e-05, gnorm=2.727, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19644 2021-06-19 00:06:21 | INFO | train_inner | epoch 001: 1774 / 3002 loss=2.963, ppl=7.8, wps=5833.8, ups=0.09, wpb=64861, bsz=128, num_updates=1756, lr=9.9994e-05, gnorm=2.51, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19656 2021-06-19 00:06:32 | INFO | train_inner | epoch 001: 1775 / 3002 loss=3.022, ppl=8.12, wps=5993.4, ups=0.09, wpb=64869, bsz=128, num_updates=1757, lr=9.99939e-05, gnorm=2.577, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19666 2021-06-19 00:06:43 | INFO | train_inner | epoch 001: 1776 / 3002 loss=2.952, ppl=7.74, wps=6001.6, ups=0.09, wpb=64899, bsz=128, num_updates=1758, lr=9.99939e-05, gnorm=2.91, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19677 2021-06-19 00:06:54 | INFO | train_inner | epoch 001: 1777 / 3002 loss=3.021, ppl=8.12, wps=5812.8, ups=0.09, wpb=64770, bsz=128, num_updates=1759, lr=9.99939e-05, gnorm=3.977, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19688 2021-06-19 00:07:05 | INFO | train_inner | epoch 001: 1778 / 3002 loss=3.064, ppl=8.36, wps=5864.1, ups=0.09, wpb=64879, bsz=128, num_updates=1760, lr=9.99939e-05, gnorm=4.137, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19699 2021-06-19 00:07:16 | INFO | train_inner | epoch 001: 1779 / 3002 loss=3.222, ppl=9.33, wps=5809.5, ups=0.09, wpb=64784, bsz=128, num_updates=1761, lr=9.99939e-05, gnorm=2.546, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19711 2021-06-19 00:07:27 | INFO | train_inner | epoch 001: 1780 / 3002 loss=3.182, ppl=9.07, wps=5797.1, ups=0.09, wpb=64733, bsz=128, num_updates=1762, lr=9.99939e-05, gnorm=2.743, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19722 2021-06-19 00:07:38 | INFO | train_inner | epoch 001: 1781 / 3002 loss=3.15, ppl=8.88, wps=5910.9, ups=0.09, wpb=64780, bsz=128, num_updates=1763, lr=9.99939e-05, gnorm=2.599, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19733 2021-06-19 00:07:49 | INFO | train_inner | epoch 001: 1782 / 3002 loss=2.971, ppl=7.84, wps=5820.9, ups=0.09, wpb=64757, bsz=128, num_updates=1764, lr=9.99939e-05, gnorm=19.151, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19744 2021-06-19 00:08:01 | INFO | train_inner | epoch 001: 1783 / 3002 loss=2.979, ppl=7.88, wps=5773.6, ups=0.09, wpb=64868, bsz=128, num_updates=1765, lr=9.99939e-05, gnorm=2.421, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19755 2021-06-19 00:08:12 | INFO | train_inner | epoch 001: 1784 / 3002 loss=2.987, ppl=7.93, wps=5693.4, ups=0.09, wpb=64813, bsz=128, num_updates=1766, lr=9.99939e-05, gnorm=2.422, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19766 2021-06-19 00:08:23 | INFO | train_inner | epoch 001: 1785 / 3002 loss=2.949, ppl=7.72, wps=5762.2, ups=0.09, wpb=64828, bsz=128, num_updates=1767, lr=9.99939e-05, gnorm=2.435, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19778 2021-06-19 00:08:34 | INFO | train_inner | epoch 001: 1786 / 3002 loss=2.983, ppl=7.91, wps=5797.6, ups=0.09, wpb=64735, bsz=128, num_updates=1768, lr=9.99939e-05, gnorm=2.615, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19789 2021-06-19 00:08:46 | INFO | train_inner | epoch 001: 1787 / 3002 loss=3.072, ppl=8.41, wps=5822.7, ups=0.09, wpb=64821, bsz=128, num_updates=1769, lr=9.99938e-05, gnorm=2.728, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19800 2021-06-19 00:08:57 | INFO | train_inner | epoch 001: 1788 / 3002 loss=3.047, ppl=8.26, wps=5824, ups=0.09, wpb=64963, bsz=128, num_updates=1770, lr=9.99938e-05, gnorm=2.696, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19811 2021-06-19 00:09:08 | INFO | train_inner | epoch 001: 1789 / 3002 loss=2.773, ppl=6.84, wps=5906.8, ups=0.09, wpb=64808, bsz=128, num_updates=1771, lr=9.99938e-05, gnorm=2.391, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19822 2021-06-19 00:09:19 | INFO | train_inner | epoch 001: 1790 / 3002 loss=3.061, ppl=8.35, wps=5841.2, ups=0.09, wpb=64763, bsz=128, num_updates=1772, lr=9.99938e-05, gnorm=3.35, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19833 2021-06-19 00:09:30 | INFO | train_inner | epoch 001: 1791 / 3002 loss=2.825, ppl=7.09, wps=5850.6, ups=0.09, wpb=64914, bsz=128, num_updates=1773, lr=9.99938e-05, gnorm=2.527, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19844 2021-06-19 00:09:41 | INFO | train_inner | epoch 001: 1792 / 3002 loss=3.111, ppl=8.64, wps=5966.4, ups=0.09, wpb=64910, bsz=128, num_updates=1774, lr=9.99938e-05, gnorm=2.428, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19855 2021-06-19 00:09:52 | INFO | train_inner | epoch 001: 1793 / 3002 loss=2.93, ppl=7.62, wps=5836.9, ups=0.09, wpb=64798, bsz=128, num_updates=1775, lr=9.99938e-05, gnorm=2.588, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19866 2021-06-19 00:10:03 | INFO | train_inner | epoch 001: 1794 / 3002 loss=3.016, ppl=8.09, wps=5993.4, ups=0.09, wpb=64872, bsz=128, num_updates=1776, lr=9.99938e-05, gnorm=2.634, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19877 2021-06-19 00:10:14 | INFO | train_inner | epoch 001: 1795 / 3002 loss=3.118, ppl=8.68, wps=5795.4, ups=0.09, wpb=64778, bsz=128, num_updates=1777, lr=9.99938e-05, gnorm=2.422, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19888 2021-06-19 00:10:25 | INFO | train_inner | epoch 001: 1796 / 3002 loss=2.998, ppl=7.99, wps=5872.8, ups=0.09, wpb=64829, bsz=128, num_updates=1778, lr=9.99938e-05, gnorm=2.496, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19899 2021-06-19 00:10:36 | INFO | train_inner | epoch 001: 1797 / 3002 loss=3.029, ppl=8.16, wps=5801.9, ups=0.09, wpb=64901, bsz=128, num_updates=1779, lr=9.99938e-05, gnorm=94.949, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19910 2021-06-19 00:10:47 | INFO | train_inner | epoch 001: 1798 / 3002 loss=3.008, ppl=8.04, wps=5796.6, ups=0.09, wpb=64801, bsz=128, num_updates=1780, lr=9.99938e-05, gnorm=4.068, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19922 2021-06-19 00:10:59 | INFO | train_inner | epoch 001: 1799 / 3002 loss=2.938, ppl=7.66, wps=5769, ups=0.09, wpb=64805, bsz=128, num_updates=1781, lr=9.99938e-05, gnorm=2.661, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19933 2021-06-19 00:11:10 | INFO | train_inner | epoch 001: 1800 / 3002 loss=3.035, ppl=8.2, wps=5805, ups=0.09, wpb=64789, bsz=128, num_updates=1782, lr=9.99937e-05, gnorm=2.823, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19944 2021-06-19 00:11:21 | INFO | train_inner | epoch 001: 1801 / 3002 loss=2.892, ppl=7.42, wps=5903.6, ups=0.09, wpb=64753, bsz=128, num_updates=1783, lr=9.99937e-05, gnorm=2.57, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19955 2021-06-19 00:11:32 | INFO | train_inner | epoch 001: 1802 / 3002 loss=3.006, ppl=8.03, wps=5835.2, ups=0.09, wpb=64805, bsz=128, num_updates=1784, lr=9.99937e-05, gnorm=2.668, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19966 2021-06-19 00:11:43 | INFO | train_inner | epoch 001: 1803 / 3002 loss=2.956, ppl=7.76, wps=5824.8, ups=0.09, wpb=64778, bsz=128, num_updates=1785, lr=9.99937e-05, gnorm=9.771, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19977 2021-06-19 00:11:54 | INFO | train_inner | epoch 001: 1804 / 3002 loss=3.022, ppl=8.13, wps=5835.5, ups=0.09, wpb=64825, bsz=128, num_updates=1786, lr=9.99937e-05, gnorm=2.574, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19988 2021-06-19 00:12:05 | INFO | train_inner | epoch 001: 1805 / 3002 loss=2.96, ppl=7.78, wps=5861.1, ups=0.09, wpb=64828, bsz=128, num_updates=1787, lr=9.99937e-05, gnorm=2.627, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19999 2021-06-19 00:12:16 | INFO | train_inner | epoch 001: 1806 / 3002 loss=2.991, ppl=7.95, wps=5946.4, ups=0.09, wpb=64778, bsz=128, num_updates=1788, lr=9.99937e-05, gnorm=3.034, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=20010 2021-06-19 00:12:27 | INFO | train_inner | epoch 001: 1807 / 3002 loss=3.208, ppl=9.24, wps=5847, ups=0.09, wpb=64829, bsz=128, num_updates=1789, lr=9.99937e-05, gnorm=17.241, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=20021 2021-06-19 00:12:38 | INFO | train_inner | epoch 001: 1808 / 3002 loss=2.835, ppl=7.13, wps=5891.4, ups=0.09, wpb=64852, bsz=128, num_updates=1790, lr=9.99937e-05, gnorm=3.667, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=20032 2021-06-19 00:12:49 | INFO | train_inner | epoch 001: 1809 / 3002 loss=3.263, ppl=9.6, wps=5821.3, ups=0.09, wpb=64854, bsz=128, num_updates=1791, lr=9.99937e-05, gnorm=3.927, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=20044 2021-06-19 00:13:00 | INFO | train_inner | epoch 001: 1810 / 3002 loss=3.11, ppl=8.64, wps=5833.2, ups=0.09, wpb=64750, bsz=128, num_updates=1792, lr=9.99937e-05, gnorm=3.059, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=20055 2021-06-19 00:13:11 | INFO | train_inner | epoch 001: 1811 / 3002 loss=3.283, ppl=9.73, wps=5926.1, ups=0.09, wpb=64847, bsz=128, num_updates=1793, lr=9.99937e-05, gnorm=3.78, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=20066 2021-06-19 00:13:22 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.125 2021-06-19 00:13:33 | INFO | train_inner | epoch 001: 1813 / 3002 loss=3.151, ppl=8.88, wps=2921.8, ups=0.05, wpb=64827, bsz=128, num_updates=1794, lr=9.99936e-05, gnorm=7.908, loss_scale=0.125, train_wall=21, gb_free=2.8, wall=20088 2021-06-19 00:13:44 | INFO | train_inner | epoch 001: 1814 / 3002 loss=3.224, ppl=9.34, wps=5852.5, ups=0.09, wpb=64731, bsz=128, num_updates=1795, lr=9.99936e-05, gnorm=4.006, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20099 2021-06-19 00:13:55 | INFO | train_inner | epoch 001: 1815 / 3002 loss=3.123, ppl=8.71, wps=5901, ups=0.09, wpb=64777, bsz=128, num_updates=1796, lr=9.99936e-05, gnorm=3.518, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20110 2021-06-19 00:14:07 | INFO | train_inner | epoch 001: 1816 / 3002 loss=2.886, ppl=7.39, wps=5770, ups=0.09, wpb=64922, bsz=128, num_updates=1797, lr=9.99936e-05, gnorm=2.673, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20121 2021-06-19 00:14:18 | INFO | train_inner | epoch 001: 1817 / 3002 loss=3.286, ppl=9.75, wps=5903.6, ups=0.09, wpb=64779, bsz=128, num_updates=1798, lr=9.99936e-05, gnorm=3.401, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20132 2021-06-19 00:14:29 | INFO | train_inner | epoch 001: 1818 / 3002 loss=2.889, ppl=7.41, wps=5857.9, ups=0.09, wpb=64864, bsz=128, num_updates=1799, lr=9.99936e-05, gnorm=5.695, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20143 2021-06-19 00:14:40 | INFO | train_inner | epoch 001: 1819 / 3002 loss=3.005, ppl=8.03, wps=5877.7, ups=0.09, wpb=64848, bsz=128, num_updates=1800, lr=9.99936e-05, gnorm=11.377, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20154 2021-06-19 00:14:51 | INFO | train_inner | epoch 001: 1820 / 3002 loss=2.904, ppl=7.49, wps=5774.1, ups=0.09, wpb=64857, bsz=128, num_updates=1801, lr=9.99936e-05, gnorm=29.521, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20165 2021-06-19 00:15:02 | INFO | train_inner | epoch 001: 1821 / 3002 loss=3.015, ppl=8.08, wps=5883, ups=0.09, wpb=64774, bsz=128, num_updates=1802, lr=9.99936e-05, gnorm=2.678, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20176 2021-06-19 00:15:13 | INFO | train_inner | epoch 001: 1822 / 3002 loss=3.171, ppl=9, wps=5929.5, ups=0.09, wpb=64799, bsz=128, num_updates=1803, lr=9.99936e-05, gnorm=5.156, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20187 2021-06-19 00:15:24 | INFO | train_inner | epoch 001: 1823 / 3002 loss=2.935, ppl=7.65, wps=5869, ups=0.09, wpb=64894, bsz=128, num_updates=1804, lr=9.99936e-05, gnorm=3.071, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20198 2021-06-19 00:15:35 | INFO | train_inner | epoch 001: 1824 / 3002 loss=3.01, ppl=8.05, wps=5918.1, ups=0.09, wpb=64831, bsz=128, num_updates=1805, lr=9.99936e-05, gnorm=2.587, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20209 2021-06-19 00:15:46 | INFO | train_inner | epoch 001: 1825 / 3002 loss=2.939, ppl=7.67, wps=5927.5, ups=0.09, wpb=64849, bsz=128, num_updates=1806, lr=9.99936e-05, gnorm=2.663, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20220 2021-06-19 00:15:57 | INFO | train_inner | epoch 001: 1826 / 3002 loss=2.96, ppl=7.78, wps=5931.3, ups=0.09, wpb=64817, bsz=128, num_updates=1807, lr=9.99935e-05, gnorm=2.505, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20231 2021-06-19 00:16:08 | INFO | train_inner | epoch 001: 1827 / 3002 loss=2.967, ppl=7.82, wps=5760.8, ups=0.09, wpb=64834, bsz=128, num_updates=1808, lr=9.99935e-05, gnorm=2.701, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20242 2021-06-19 00:16:19 | INFO | train_inner | epoch 001: 1828 / 3002 loss=3.084, ppl=8.48, wps=5956.2, ups=0.09, wpb=64838, bsz=128, num_updates=1809, lr=9.99935e-05, gnorm=2.669, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20253 2021-06-19 00:16:30 | INFO | train_inner | epoch 001: 1829 / 3002 loss=3.021, ppl=8.12, wps=5969, ups=0.09, wpb=64887, bsz=128, num_updates=1810, lr=9.99935e-05, gnorm=2.715, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20264 2021-06-19 00:16:41 | INFO | train_inner | epoch 001: 1830 / 3002 loss=2.942, ppl=7.69, wps=5730.5, ups=0.09, wpb=64809, bsz=128, num_updates=1811, lr=9.99935e-05, gnorm=2.632, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20276 2021-06-19 00:16:52 | INFO | train_inner | epoch 001: 1831 / 3002 loss=3.095, ppl=8.55, wps=5883.1, ups=0.09, wpb=64801, bsz=128, num_updates=1812, lr=9.99935e-05, gnorm=2.908, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20287 2021-06-19 00:17:03 | INFO | train_inner | epoch 001: 1832 / 3002 loss=3.042, ppl=8.23, wps=5794.3, ups=0.09, wpb=64805, bsz=128, num_updates=1813, lr=9.99935e-05, gnorm=2.822, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20298 2021-06-19 00:17:14 | INFO | train_inner | epoch 001: 1833 / 3002 loss=3.146, ppl=8.85, wps=5852.8, ups=0.09, wpb=64880, bsz=128, num_updates=1814, lr=9.99935e-05, gnorm=2.673, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20309 2021-06-19 00:17:26 | INFO | train_inner | epoch 001: 1834 / 3002 loss=3.111, ppl=8.64, wps=5835.7, ups=0.09, wpb=64800, bsz=128, num_updates=1815, lr=9.99935e-05, gnorm=2.546, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20320 2021-06-19 00:17:37 | INFO | train_inner | epoch 001: 1835 / 3002 loss=2.858, ppl=7.25, wps=5839.8, ups=0.09, wpb=64789, bsz=128, num_updates=1816, lr=9.99935e-05, gnorm=2.46, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20331 2021-06-19 00:17:48 | INFO | train_inner | epoch 001: 1836 / 3002 loss=3.003, ppl=8.02, wps=5905.8, ups=0.09, wpb=64812, bsz=128, num_updates=1817, lr=9.99935e-05, gnorm=2.529, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20342 2021-06-19 00:17:59 | INFO | train_inner | epoch 001: 1837 / 3002 loss=3.038, ppl=8.21, wps=5721.9, ups=0.09, wpb=64855, bsz=128, num_updates=1818, lr=9.99935e-05, gnorm=2.579, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20353 2021-06-19 00:18:10 | INFO | train_inner | epoch 001: 1838 / 3002 loss=3.137, ppl=8.79, wps=5919.3, ups=0.09, wpb=64899, bsz=128, num_updates=1819, lr=9.99934e-05, gnorm=2.654, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20364 2021-06-19 00:18:21 | INFO | train_inner | epoch 001: 1839 / 3002 loss=3.007, ppl=8.04, wps=5854.9, ups=0.09, wpb=64899, bsz=128, num_updates=1820, lr=9.99934e-05, gnorm=2.895, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20375 2021-06-19 00:18:32 | INFO | train_inner | epoch 001: 1840 / 3002 loss=3.022, ppl=8.12, wps=5802.2, ups=0.09, wpb=64842, bsz=128, num_updates=1821, lr=9.99934e-05, gnorm=2.566, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20387 2021-06-19 00:18:43 | INFO | train_inner | epoch 001: 1841 / 3002 loss=2.979, ppl=7.88, wps=5926.7, ups=0.09, wpb=64842, bsz=128, num_updates=1822, lr=9.99934e-05, gnorm=2.517, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20397 2021-06-19 00:18:54 | INFO | train_inner | epoch 001: 1842 / 3002 loss=2.971, ppl=7.84, wps=5822.3, ups=0.09, wpb=64870, bsz=128, num_updates=1823, lr=9.99934e-05, gnorm=2.538, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20409 2021-06-19 00:19:06 | INFO | train_inner | epoch 001: 1843 / 3002 loss=3.07, ppl=8.4, wps=5753.5, ups=0.09, wpb=64828, bsz=128, num_updates=1824, lr=9.99934e-05, gnorm=2.682, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20420 2021-06-19 00:19:17 | INFO | train_inner | epoch 001: 1844 / 3002 loss=2.921, ppl=7.58, wps=5747.6, ups=0.09, wpb=64816, bsz=128, num_updates=1825, lr=9.99934e-05, gnorm=2.559, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20431 2021-06-19 00:19:28 | INFO | train_inner | epoch 001: 1845 / 3002 loss=2.992, ppl=7.95, wps=5839.3, ups=0.09, wpb=64781, bsz=128, num_updates=1826, lr=9.99934e-05, gnorm=2.526, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20442 2021-06-19 00:19:39 | INFO | train_inner | epoch 001: 1846 / 3002 loss=2.873, ppl=7.33, wps=5886.6, ups=0.09, wpb=64815, bsz=128, num_updates=1827, lr=9.99934e-05, gnorm=2.554, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20453 2021-06-19 00:19:50 | INFO | train_inner | epoch 001: 1847 / 3002 loss=3.013, ppl=8.07, wps=5837, ups=0.09, wpb=64828, bsz=128, num_updates=1828, lr=9.99934e-05, gnorm=2.505, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20464 2021-06-19 00:20:01 | INFO | train_inner | epoch 001: 1848 / 3002 loss=2.914, ppl=7.54, wps=5810.3, ups=0.09, wpb=64805, bsz=128, num_updates=1829, lr=9.99934e-05, gnorm=2.53, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20476 2021-06-19 00:20:12 | INFO | train_inner | epoch 001: 1849 / 3002 loss=3.057, ppl=8.32, wps=5733.8, ups=0.09, wpb=64721, bsz=128, num_updates=1830, lr=9.99934e-05, gnorm=3.218, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20487 2021-06-19 00:20:24 | INFO | train_inner | epoch 001: 1850 / 3002 loss=3.017, ppl=8.1, wps=5861.6, ups=0.09, wpb=64801, bsz=128, num_updates=1831, lr=9.99934e-05, gnorm=2.489, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20498 2021-06-19 00:20:35 | INFO | train_inner | epoch 001: 1851 / 3002 loss=2.843, ppl=7.18, wps=5889.5, ups=0.09, wpb=64866, bsz=128, num_updates=1832, lr=9.99933e-05, gnorm=2.475, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20509 2021-06-19 00:20:46 | INFO | train_inner | epoch 001: 1852 / 3002 loss=2.895, ppl=7.44, wps=5844.4, ups=0.09, wpb=64789, bsz=128, num_updates=1833, lr=9.99933e-05, gnorm=2.492, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20520 2021-06-19 00:20:57 | INFO | train_inner | epoch 001: 1853 / 3002 loss=3.104, ppl=8.6, wps=5888.7, ups=0.09, wpb=64826, bsz=128, num_updates=1834, lr=9.99933e-05, gnorm=22.599, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20531 2021-06-19 00:21:08 | INFO | train_inner | epoch 001: 1854 / 3002 loss=2.952, ppl=7.74, wps=5923.2, ups=0.09, wpb=64834, bsz=128, num_updates=1835, lr=9.99933e-05, gnorm=3.309, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20542 2021-06-19 00:21:18 | INFO | train_inner | epoch 001: 1855 / 3002 loss=3.145, ppl=8.85, wps=5941.8, ups=0.09, wpb=64767, bsz=128, num_updates=1836, lr=9.99933e-05, gnorm=5.689, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20553 2021-06-19 00:21:30 | INFO | train_inner | epoch 001: 1856 / 3002 loss=3.062, ppl=8.35, wps=5868.2, ups=0.09, wpb=64770, bsz=128, num_updates=1837, lr=9.99933e-05, gnorm=2.53, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20564 2021-06-19 00:21:40 | INFO | train_inner | epoch 001: 1857 / 3002 loss=3.032, ppl=8.18, wps=5934.4, ups=0.09, wpb=64898, bsz=128, num_updates=1838, lr=9.99933e-05, gnorm=2.603, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20575 2021-06-19 00:21:52 | INFO | train_inner | epoch 001: 1858 / 3002 loss=2.972, ppl=7.85, wps=5821.2, ups=0.09, wpb=64890, bsz=128, num_updates=1839, lr=9.99933e-05, gnorm=2.593, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20586 2021-06-19 00:22:03 | INFO | train_inner | epoch 001: 1859 / 3002 loss=3.015, ppl=8.08, wps=5898.9, ups=0.09, wpb=64868, bsz=128, num_updates=1840, lr=9.99933e-05, gnorm=2.667, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20597 2021-06-19 00:22:14 | INFO | train_inner | epoch 001: 1860 / 3002 loss=3.077, ppl=8.44, wps=5780, ups=0.09, wpb=64793, bsz=128, num_updates=1841, lr=9.99933e-05, gnorm=2.558, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20608 2021-06-19 00:22:25 | INFO | train_inner | epoch 001: 1861 / 3002 loss=2.864, ppl=7.28, wps=6006.3, ups=0.09, wpb=64837, bsz=128, num_updates=1842, lr=9.99933e-05, gnorm=2.501, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20619 2021-06-19 00:22:36 | INFO | train_inner | epoch 001: 1862 / 3002 loss=3.013, ppl=8.07, wps=5758.1, ups=0.09, wpb=64791, bsz=128, num_updates=1843, lr=9.99933e-05, gnorm=2.781, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20630 2021-06-19 00:22:47 | INFO | train_inner | epoch 001: 1863 / 3002 loss=2.97, ppl=7.83, wps=5901.5, ups=0.09, wpb=64802, bsz=128, num_updates=1844, lr=9.99932e-05, gnorm=2.558, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20641 2021-06-19 00:22:58 | INFO | train_inner | epoch 001: 1864 / 3002 loss=3.062, ppl=8.35, wps=5819.4, ups=0.09, wpb=64884, bsz=128, num_updates=1845, lr=9.99932e-05, gnorm=2.567, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20652 2021-06-19 00:23:09 | INFO | train_inner | epoch 001: 1865 / 3002 loss=2.902, ppl=7.48, wps=5857, ups=0.09, wpb=64854, bsz=128, num_updates=1846, lr=9.99932e-05, gnorm=2.439, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20663 2021-06-19 00:23:20 | INFO | train_inner | epoch 001: 1866 / 3002 loss=2.935, ppl=7.65, wps=5895.5, ups=0.09, wpb=64821, bsz=128, num_updates=1847, lr=9.99932e-05, gnorm=2.575, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20674 2021-06-19 00:23:31 | INFO | train_inner | epoch 001: 1867 / 3002 loss=3.006, ppl=8.04, wps=5958.4, ups=0.09, wpb=64771, bsz=128, num_updates=1848, lr=9.99932e-05, gnorm=2.529, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20685 2021-06-19 00:23:42 | INFO | train_inner | epoch 001: 1868 / 3002 loss=3.046, ppl=8.26, wps=5856.7, ups=0.09, wpb=64820, bsz=128, num_updates=1849, lr=9.99932e-05, gnorm=6.084, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20696 2021-06-19 00:23:53 | INFO | train_inner | epoch 001: 1869 / 3002 loss=3.053, ppl=8.3, wps=5802.3, ups=0.09, wpb=64830, bsz=128, num_updates=1850, lr=9.99932e-05, gnorm=2.682, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20708 2021-06-19 00:24:04 | INFO | train_inner | epoch 001: 1870 / 3002 loss=2.966, ppl=7.81, wps=5836.5, ups=0.09, wpb=64758, bsz=128, num_updates=1851, lr=9.99932e-05, gnorm=2.529, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20719 2021-06-19 00:24:15 | INFO | train_inner | epoch 001: 1871 / 3002 loss=3.034, ppl=8.19, wps=5929.8, ups=0.09, wpb=64844, bsz=128, num_updates=1852, lr=9.99932e-05, gnorm=3.745, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20730 2021-06-19 00:24:26 | INFO | train_inner | epoch 001: 1872 / 3002 loss=2.92, ppl=7.57, wps=5850.1, ups=0.09, wpb=64818, bsz=128, num_updates=1853, lr=9.99932e-05, gnorm=5.764, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20741 2021-06-19 00:24:37 | INFO | train_inner | epoch 001: 1873 / 3002 loss=3.039, ppl=8.22, wps=5925.1, ups=0.09, wpb=64820, bsz=128, num_updates=1854, lr=9.99932e-05, gnorm=2.78, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20752 2021-06-19 00:24:48 | INFO | train_inner | epoch 001: 1874 / 3002 loss=3.023, ppl=8.13, wps=5851.9, ups=0.09, wpb=64802, bsz=128, num_updates=1855, lr=9.99932e-05, gnorm=2.628, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20763 2021-06-19 00:25:00 | INFO | train_inner | epoch 001: 1875 / 3002 loss=3.161, ppl=8.94, wps=5729.3, ups=0.09, wpb=64862, bsz=128, num_updates=1856, lr=9.99932e-05, gnorm=3.533, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20774 2021-06-19 00:25:11 | INFO | train_inner | epoch 001: 1876 / 3002 loss=3.03, ppl=8.17, wps=5905.3, ups=0.09, wpb=64822, bsz=128, num_updates=1857, lr=9.99931e-05, gnorm=2.537, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20785 2021-06-19 00:25:22 | INFO | train_inner | epoch 001: 1877 / 3002 loss=2.895, ppl=7.44, wps=5891.5, ups=0.09, wpb=64885, bsz=128, num_updates=1858, lr=9.99931e-05, gnorm=2.482, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20796 2021-06-19 00:25:33 | INFO | train_inner | epoch 001: 1878 / 3002 loss=2.976, ppl=7.87, wps=5856.6, ups=0.09, wpb=64850, bsz=128, num_updates=1859, lr=9.99931e-05, gnorm=2.531, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20807 2021-06-19 00:25:44 | INFO | train_inner | epoch 001: 1879 / 3002 loss=2.899, ppl=7.46, wps=5840.7, ups=0.09, wpb=64797, bsz=128, num_updates=1860, lr=9.99931e-05, gnorm=2.515, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20818 2021-06-19 00:25:55 | INFO | train_inner | epoch 001: 1880 / 3002 loss=2.978, ppl=7.88, wps=5898.3, ups=0.09, wpb=64907, bsz=128, num_updates=1861, lr=9.99931e-05, gnorm=2.548, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20829 2021-06-19 00:26:06 | INFO | train_inner | epoch 001: 1881 / 3002 loss=2.832, ppl=7.12, wps=5910.2, ups=0.09, wpb=64766, bsz=128, num_updates=1862, lr=9.99931e-05, gnorm=2.728, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20840 2021-06-19 00:26:17 | INFO | train_inner | epoch 001: 1882 / 3002 loss=2.84, ppl=7.16, wps=5829.9, ups=0.09, wpb=64798, bsz=128, num_updates=1863, lr=9.99931e-05, gnorm=3.077, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20851 2021-06-19 00:26:28 | INFO | train_inner | epoch 001: 1883 / 3002 loss=3.024, ppl=8.13, wps=5755.3, ups=0.09, wpb=64817, bsz=128, num_updates=1864, lr=9.99931e-05, gnorm=2.444, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20862 2021-06-19 00:26:39 | INFO | train_inner | epoch 001: 1884 / 3002 loss=3.021, ppl=8.12, wps=5873.2, ups=0.09, wpb=64848, bsz=128, num_updates=1865, lr=9.99931e-05, gnorm=2.471, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20874 2021-06-19 00:26:50 | INFO | train_inner | epoch 001: 1885 / 3002 loss=2.885, ppl=7.39, wps=5799.9, ups=0.09, wpb=64821, bsz=128, num_updates=1866, lr=9.99931e-05, gnorm=2.449, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20885 2021-06-19 00:27:01 | INFO | train_inner | epoch 001: 1886 / 3002 loss=2.855, ppl=7.23, wps=5913.6, ups=0.09, wpb=64833, bsz=128, num_updates=1867, lr=9.99931e-05, gnorm=2.399, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20896 2021-06-19 00:27:12 | INFO | train_inner | epoch 001: 1887 / 3002 loss=2.92, ppl=7.57, wps=5808.8, ups=0.09, wpb=64825, bsz=128, num_updates=1868, lr=9.99931e-05, gnorm=2.558, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20907 2021-06-19 00:27:24 | INFO | train_inner | epoch 001: 1888 / 3002 loss=2.967, ppl=7.82, wps=5856.9, ups=0.09, wpb=64885, bsz=128, num_updates=1869, lr=9.9993e-05, gnorm=2.519, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20918 2021-06-19 00:27:35 | INFO | train_inner | epoch 001: 1889 / 3002 loss=2.933, ppl=7.64, wps=5872.8, ups=0.09, wpb=64872, bsz=128, num_updates=1870, lr=9.9993e-05, gnorm=2.483, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20929 2021-06-19 00:27:46 | INFO | train_inner | epoch 001: 1890 / 3002 loss=2.945, ppl=7.7, wps=5803.3, ups=0.09, wpb=64818, bsz=128, num_updates=1871, lr=9.9993e-05, gnorm=2.527, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20940 2021-06-19 00:27:57 | INFO | train_inner | epoch 001: 1891 / 3002 loss=2.965, ppl=7.81, wps=5801.4, ups=0.09, wpb=64855, bsz=128, num_updates=1872, lr=9.9993e-05, gnorm=5.326, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20951 2021-06-19 00:28:08 | INFO | train_inner | epoch 001: 1892 / 3002 loss=3.049, ppl=8.28, wps=5789.5, ups=0.09, wpb=64883, bsz=128, num_updates=1873, lr=9.9993e-05, gnorm=2.426, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20962 2021-06-19 00:28:19 | INFO | train_inner | epoch 001: 1893 / 3002 loss=3.065, ppl=8.37, wps=5820.9, ups=0.09, wpb=64845, bsz=128, num_updates=1874, lr=9.9993e-05, gnorm=2.549, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20974 2021-06-19 00:28:30 | INFO | train_inner | epoch 001: 1894 / 3002 loss=3.13, ppl=8.76, wps=5943.1, ups=0.09, wpb=64947, bsz=128, num_updates=1875, lr=9.9993e-05, gnorm=2.684, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20985 2021-06-19 00:28:41 | INFO | train_inner | epoch 001: 1895 / 3002 loss=3.153, ppl=8.89, wps=5919.5, ups=0.09, wpb=64796, bsz=128, num_updates=1876, lr=9.9993e-05, gnorm=3.244, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20995 2021-06-19 00:28:52 | INFO | train_inner | epoch 001: 1896 / 3002 loss=3.117, ppl=8.67, wps=5994.1, ups=0.09, wpb=64806, bsz=128, num_updates=1877, lr=9.9993e-05, gnorm=2.523, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21006 2021-06-19 00:29:03 | INFO | train_inner | epoch 001: 1897 / 3002 loss=3.06, ppl=8.34, wps=5826.9, ups=0.09, wpb=64856, bsz=128, num_updates=1878, lr=9.9993e-05, gnorm=2.556, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21017 2021-06-19 00:29:14 | INFO | train_inner | epoch 001: 1898 / 3002 loss=2.787, ppl=6.9, wps=5722.3, ups=0.09, wpb=64827, bsz=128, num_updates=1879, lr=9.9993e-05, gnorm=4.946, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21029 2021-06-19 00:29:26 | INFO | train_inner | epoch 001: 1899 / 3002 loss=3.147, ppl=8.86, wps=5814.9, ups=0.09, wpb=64827, bsz=128, num_updates=1880, lr=9.9993e-05, gnorm=2.513, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21040 2021-06-19 00:29:37 | INFO | train_inner | epoch 001: 1900 / 3002 loss=2.836, ppl=7.14, wps=5823.8, ups=0.09, wpb=64839, bsz=128, num_updates=1881, lr=9.9993e-05, gnorm=2.45, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21051 2021-06-19 00:29:48 | INFO | train_inner | epoch 001: 1901 / 3002 loss=2.994, ppl=7.97, wps=5963.1, ups=0.09, wpb=64827, bsz=128, num_updates=1882, lr=9.99929e-05, gnorm=2.63, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21062 2021-06-19 00:29:58 | INFO | train_inner | epoch 001: 1902 / 3002 loss=2.892, ppl=7.43, wps=6005.6, ups=0.09, wpb=64841, bsz=128, num_updates=1883, lr=9.99929e-05, gnorm=2.37, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21073 2021-06-19 00:30:10 | INFO | train_inner | epoch 001: 1903 / 3002 loss=3.093, ppl=8.53, wps=5811.1, ups=0.09, wpb=64779, bsz=128, num_updates=1884, lr=9.99929e-05, gnorm=2.556, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21084 2021-06-19 00:30:21 | INFO | train_inner | epoch 001: 1904 / 3002 loss=2.951, ppl=7.73, wps=5802.4, ups=0.09, wpb=64815, bsz=128, num_updates=1885, lr=9.99929e-05, gnorm=2.512, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21095 2021-06-19 00:30:32 | INFO | train_inner | epoch 001: 1905 / 3002 loss=2.912, ppl=7.53, wps=5875.7, ups=0.09, wpb=64821, bsz=128, num_updates=1886, lr=9.99929e-05, gnorm=57.194, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21106 2021-06-19 00:30:43 | INFO | train_inner | epoch 001: 1906 / 3002 loss=3.07, ppl=8.4, wps=5859.8, ups=0.09, wpb=64766, bsz=128, num_updates=1887, lr=9.99929e-05, gnorm=2.36, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21117 2021-06-19 00:30:54 | INFO | train_inner | epoch 001: 1907 / 3002 loss=3.104, ppl=8.6, wps=5872.9, ups=0.09, wpb=64875, bsz=128, num_updates=1888, lr=9.99929e-05, gnorm=2.473, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21128 2021-06-19 00:31:05 | INFO | train_inner | epoch 001: 1908 / 3002 loss=2.918, ppl=7.56, wps=5902.9, ups=0.09, wpb=64864, bsz=128, num_updates=1889, lr=9.99929e-05, gnorm=2.459, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21139 2021-06-19 00:31:16 | INFO | train_inner | epoch 001: 1909 / 3002 loss=2.969, ppl=7.83, wps=5888, ups=0.09, wpb=64860, bsz=128, num_updates=1890, lr=9.99929e-05, gnorm=2.732, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21150 2021-06-19 00:31:27 | INFO | train_inner | epoch 001: 1910 / 3002 loss=3.274, ppl=9.67, wps=5933.4, ups=0.09, wpb=64821, bsz=128, num_updates=1891, lr=9.99929e-05, gnorm=2.542, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21161 2021-06-19 00:31:38 | INFO | train_inner | epoch 001: 1911 / 3002 loss=3.015, ppl=8.08, wps=5831.9, ups=0.09, wpb=64869, bsz=128, num_updates=1892, lr=9.99929e-05, gnorm=2.511, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21172 2021-06-19 00:31:49 | INFO | train_inner | epoch 001: 1912 / 3002 loss=3.021, ppl=8.12, wps=5840.8, ups=0.09, wpb=64788, bsz=128, num_updates=1893, lr=9.99929e-05, gnorm=2.522, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21183 2021-06-19 00:32:00 | INFO | train_inner | epoch 001: 1913 / 3002 loss=2.983, ppl=7.9, wps=5815.8, ups=0.09, wpb=64781, bsz=128, num_updates=1894, lr=9.99928e-05, gnorm=2.551, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21194 2021-06-19 00:32:11 | INFO | train_inner | epoch 001: 1914 / 3002 loss=3.112, ppl=8.65, wps=5771.1, ups=0.09, wpb=64847, bsz=128, num_updates=1895, lr=9.99928e-05, gnorm=2.686, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21206 2021-06-19 00:32:23 | INFO | train_inner | epoch 001: 1915 / 3002 loss=2.948, ppl=7.72, wps=5821.9, ups=0.09, wpb=64880, bsz=128, num_updates=1896, lr=9.99928e-05, gnorm=2.634, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21217 2021-06-19 00:32:33 | INFO | train_inner | epoch 001: 1916 / 3002 loss=2.907, ppl=7.5, wps=5928.5, ups=0.09, wpb=64905, bsz=128, num_updates=1897, lr=9.99928e-05, gnorm=2.69, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21228 2021-06-19 00:32:45 | INFO | train_inner | epoch 001: 1917 / 3002 loss=2.961, ppl=7.79, wps=5733, ups=0.09, wpb=64821, bsz=128, num_updates=1898, lr=9.99928e-05, gnorm=2.654, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21239 2021-06-19 00:32:56 | INFO | train_inner | epoch 001: 1918 / 3002 loss=2.988, ppl=7.93, wps=5941.7, ups=0.09, wpb=64824, bsz=128, num_updates=1899, lr=9.99928e-05, gnorm=2.561, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21250 2021-06-19 00:33:07 | INFO | train_inner | epoch 001: 1919 / 3002 loss=3.03, ppl=8.17, wps=5958.5, ups=0.09, wpb=64835, bsz=128, num_updates=1900, lr=9.99928e-05, gnorm=2.553, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21261 2021-06-19 00:33:18 | INFO | train_inner | epoch 001: 1920 / 3002 loss=2.944, ppl=7.7, wps=5844.7, ups=0.09, wpb=64828, bsz=128, num_updates=1901, lr=9.99928e-05, gnorm=2.597, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21272 2021-06-19 00:33:28 | INFO | train_inner | epoch 001: 1921 / 3002 loss=2.993, ppl=7.96, wps=5976.4, ups=0.09, wpb=64883, bsz=128, num_updates=1902, lr=9.99928e-05, gnorm=2.435, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21283 2021-06-19 00:33:40 | INFO | train_inner | epoch 001: 1922 / 3002 loss=2.77, ppl=6.82, wps=5750.3, ups=0.09, wpb=64862, bsz=128, num_updates=1903, lr=9.99928e-05, gnorm=11.058, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21294 2021-06-19 00:33:51 | INFO | train_inner | epoch 001: 1923 / 3002 loss=3.096, ppl=8.55, wps=5805.1, ups=0.09, wpb=64782, bsz=128, num_updates=1904, lr=9.99928e-05, gnorm=2.444, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21305 2021-06-19 00:34:02 | INFO | train_inner | epoch 001: 1924 / 3002 loss=2.992, ppl=7.95, wps=5845.2, ups=0.09, wpb=64866, bsz=128, num_updates=1905, lr=9.99928e-05, gnorm=11.591, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21316 2021-06-19 00:34:13 | INFO | train_inner | epoch 001: 1925 / 3002 loss=3.083, ppl=8.48, wps=5816.8, ups=0.09, wpb=64766, bsz=128, num_updates=1906, lr=9.99928e-05, gnorm=3.462, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21328 2021-06-19 00:34:24 | INFO | train_inner | epoch 001: 1926 / 3002 loss=2.92, ppl=7.57, wps=5870.8, ups=0.09, wpb=64814, bsz=128, num_updates=1907, lr=9.99927e-05, gnorm=2.473, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21339 2021-06-19 00:34:35 | INFO | train_inner | epoch 001: 1927 / 3002 loss=2.937, ppl=7.66, wps=5784.3, ups=0.09, wpb=64816, bsz=128, num_updates=1908, lr=9.99927e-05, gnorm=2.553, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21350 2021-06-19 00:34:47 | INFO | train_inner | epoch 001: 1928 / 3002 loss=3.027, ppl=8.15, wps=5830.7, ups=0.09, wpb=64817, bsz=128, num_updates=1909, lr=9.99927e-05, gnorm=4.282, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21361 2021-06-19 00:34:58 | INFO | train_inner | epoch 001: 1929 / 3002 loss=3.16, ppl=8.94, wps=5823.3, ups=0.09, wpb=64735, bsz=128, num_updates=1910, lr=9.99927e-05, gnorm=2.515, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21372 2021-06-19 00:35:09 | INFO | train_inner | epoch 001: 1930 / 3002 loss=2.829, ppl=7.1, wps=5850.4, ups=0.09, wpb=64777, bsz=128, num_updates=1911, lr=9.99927e-05, gnorm=2.47, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21383 2021-06-19 00:35:20 | INFO | train_inner | epoch 001: 1931 / 3002 loss=2.877, ppl=7.34, wps=5776.9, ups=0.09, wpb=64755, bsz=128, num_updates=1912, lr=9.99927e-05, gnorm=2.642, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21394 2021-06-19 00:35:31 | INFO | train_inner | epoch 001: 1932 / 3002 loss=3.046, ppl=8.26, wps=5949.1, ups=0.09, wpb=64846, bsz=128, num_updates=1913, lr=9.99927e-05, gnorm=2.511, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21405 2021-06-19 00:35:42 | INFO | train_inner | epoch 001: 1933 / 3002 loss=3.018, ppl=8.1, wps=5725.1, ups=0.09, wpb=64874, bsz=128, num_updates=1914, lr=9.99927e-05, gnorm=2.447, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21416 2021-06-19 00:35:53 | INFO | train_inner | epoch 001: 1934 / 3002 loss=2.897, ppl=7.45, wps=5978.3, ups=0.09, wpb=64884, bsz=128, num_updates=1915, lr=9.99927e-05, gnorm=2.491, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21427 2021-06-19 00:36:04 | INFO | train_inner | epoch 001: 1935 / 3002 loss=2.944, ppl=7.69, wps=5874.6, ups=0.09, wpb=64816, bsz=128, num_updates=1916, lr=9.99927e-05, gnorm=2.532, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21438 2021-06-19 00:36:15 | INFO | train_inner | epoch 001: 1936 / 3002 loss=3.038, ppl=8.21, wps=5988.9, ups=0.09, wpb=64810, bsz=128, num_updates=1917, lr=9.99927e-05, gnorm=2.667, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21449 2021-06-19 00:36:26 | INFO | train_inner | epoch 001: 1937 / 3002 loss=3.082, ppl=8.47, wps=5879.2, ups=0.09, wpb=64803, bsz=128, num_updates=1918, lr=9.99927e-05, gnorm=2.56, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21460 2021-06-19 00:36:37 | INFO | train_inner | epoch 001: 1938 / 3002 loss=3.008, ppl=8.05, wps=5795.4, ups=0.09, wpb=64800, bsz=128, num_updates=1919, lr=9.99926e-05, gnorm=4.185, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21471 2021-06-19 00:36:48 | INFO | train_inner | epoch 001: 1939 / 3002 loss=3.007, ppl=8.04, wps=5915.3, ups=0.09, wpb=64821, bsz=128, num_updates=1920, lr=9.99926e-05, gnorm=2.893, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21482 2021-06-19 00:36:59 | INFO | train_inner | epoch 001: 1940 / 3002 loss=2.823, ppl=7.08, wps=5898.5, ups=0.09, wpb=64869, bsz=128, num_updates=1921, lr=9.99926e-05, gnorm=2.458, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21493 2021-06-19 00:37:10 | INFO | train_inner | epoch 001: 1941 / 3002 loss=2.963, ppl=7.8, wps=5802.4, ups=0.09, wpb=64888, bsz=128, num_updates=1922, lr=9.99926e-05, gnorm=2.416, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21505 2021-06-19 00:37:21 | INFO | train_inner | epoch 001: 1942 / 3002 loss=3.043, ppl=8.24, wps=5902.8, ups=0.09, wpb=64812, bsz=128, num_updates=1923, lr=9.99926e-05, gnorm=2.337, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21516 2021-06-19 00:37:32 | INFO | train_inner | epoch 001: 1943 / 3002 loss=2.961, ppl=7.78, wps=6029.6, ups=0.09, wpb=64855, bsz=128, num_updates=1924, lr=9.99926e-05, gnorm=2.431, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21526 2021-06-19 00:37:43 | INFO | train_inner | epoch 001: 1944 / 3002 loss=2.927, ppl=7.6, wps=5888.4, ups=0.09, wpb=64880, bsz=128, num_updates=1925, lr=9.99926e-05, gnorm=2.468, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21537 2021-06-19 00:37:54 | INFO | train_inner | epoch 001: 1945 / 3002 loss=2.868, ppl=7.3, wps=5810.8, ups=0.09, wpb=64837, bsz=128, num_updates=1926, lr=9.99926e-05, gnorm=2.565, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21548 2021-06-19 00:38:05 | INFO | train_inner | epoch 001: 1946 / 3002 loss=2.889, ppl=7.41, wps=5870, ups=0.09, wpb=64898, bsz=128, num_updates=1927, lr=9.99926e-05, gnorm=2.425, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21560 2021-06-19 00:38:16 | INFO | train_inner | epoch 001: 1947 / 3002 loss=2.867, ppl=7.29, wps=5784.7, ups=0.09, wpb=64825, bsz=128, num_updates=1928, lr=9.99926e-05, gnorm=2.474, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21571 2021-06-19 00:38:27 | INFO | train_inner | epoch 001: 1948 / 3002 loss=2.964, ppl=7.81, wps=5965.3, ups=0.09, wpb=64893, bsz=128, num_updates=1929, lr=9.99926e-05, gnorm=2.535, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21582 2021-06-19 00:38:38 | INFO | train_inner | epoch 001: 1949 / 3002 loss=3.022, ppl=8.13, wps=5915.8, ups=0.09, wpb=64781, bsz=128, num_updates=1930, lr=9.99926e-05, gnorm=3.234, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21593 2021-06-19 00:38:49 | INFO | train_inner | epoch 001: 1950 / 3002 loss=3.006, ppl=8.04, wps=5836.8, ups=0.09, wpb=64796, bsz=128, num_updates=1931, lr=9.99926e-05, gnorm=2.448, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21604 2021-06-19 00:39:00 | INFO | train_inner | epoch 001: 1951 / 3002 loss=2.919, ppl=7.56, wps=5894.5, ups=0.09, wpb=64844, bsz=128, num_updates=1932, lr=9.99925e-05, gnorm=2.38, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21615 2021-06-19 00:39:11 | INFO | train_inner | epoch 001: 1952 / 3002 loss=3.129, ppl=8.75, wps=5940.3, ups=0.09, wpb=64809, bsz=128, num_updates=1933, lr=9.99925e-05, gnorm=2.455, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21626 2021-06-19 00:39:22 | INFO | train_inner | epoch 001: 1953 / 3002 loss=2.998, ppl=7.99, wps=5902.7, ups=0.09, wpb=64819, bsz=128, num_updates=1934, lr=9.99925e-05, gnorm=2.452, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21637 2021-06-19 00:39:33 | INFO | train_inner | epoch 001: 1954 / 3002 loss=3.025, ppl=8.14, wps=5919.4, ups=0.09, wpb=64740, bsz=128, num_updates=1935, lr=9.99925e-05, gnorm=2.402, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21647 2021-06-19 00:39:44 | INFO | train_inner | epoch 001: 1955 / 3002 loss=2.923, ppl=7.59, wps=5963.2, ups=0.09, wpb=64718, bsz=128, num_updates=1936, lr=9.99925e-05, gnorm=2.631, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21658 2021-06-19 00:39:55 | INFO | train_inner | epoch 001: 1956 / 3002 loss=3.124, ppl=8.72, wps=5823.9, ups=0.09, wpb=64880, bsz=128, num_updates=1937, lr=9.99925e-05, gnorm=2.543, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21669 2021-06-19 00:40:06 | INFO | train_inner | epoch 001: 1957 / 3002 loss=3, ppl=8, wps=5989.9, ups=0.09, wpb=64816, bsz=128, num_updates=1938, lr=9.99925e-05, gnorm=2.477, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21680 2021-06-19 00:40:17 | INFO | train_inner | epoch 001: 1958 / 3002 loss=2.962, ppl=7.79, wps=5824.6, ups=0.09, wpb=64900, bsz=128, num_updates=1939, lr=9.99925e-05, gnorm=2.745, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21691 2021-06-19 00:40:28 | INFO | train_inner | epoch 001: 1959 / 3002 loss=2.964, ppl=7.8, wps=5924.3, ups=0.09, wpb=64846, bsz=128, num_updates=1940, lr=9.99925e-05, gnorm=2.454, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21702 2021-06-19 00:40:39 | INFO | train_inner | epoch 001: 1960 / 3002 loss=2.912, ppl=7.53, wps=5928.2, ups=0.09, wpb=64873, bsz=128, num_updates=1941, lr=9.99925e-05, gnorm=2.544, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21713 2021-06-19 00:40:50 | INFO | train_inner | epoch 001: 1961 / 3002 loss=2.973, ppl=7.85, wps=5844.8, ups=0.09, wpb=64791, bsz=128, num_updates=1942, lr=9.99925e-05, gnorm=2.394, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21724 2021-06-19 00:41:01 | INFO | train_inner | epoch 001: 1962 / 3002 loss=2.971, ppl=7.84, wps=5875.8, ups=0.09, wpb=64798, bsz=128, num_updates=1943, lr=9.99925e-05, gnorm=2.504, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21735 2021-06-19 00:41:12 | INFO | train_inner | epoch 001: 1963 / 3002 loss=2.985, ppl=7.92, wps=5776.2, ups=0.09, wpb=64885, bsz=128, num_updates=1944, lr=9.99924e-05, gnorm=3.041, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21747 2021-06-19 00:41:24 | INFO | train_inner | epoch 001: 1964 / 3002 loss=2.829, ppl=7.1, wps=5808.7, ups=0.09, wpb=64820, bsz=128, num_updates=1945, lr=9.99924e-05, gnorm=2.353, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21758 2021-06-19 00:41:35 | INFO | train_inner | epoch 001: 1965 / 3002 loss=3.085, ppl=8.48, wps=5782.4, ups=0.09, wpb=64822, bsz=128, num_updates=1946, lr=9.99924e-05, gnorm=2.503, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21769 2021-06-19 00:41:46 | INFO | train_inner | epoch 001: 1966 / 3002 loss=2.937, ppl=7.66, wps=5820.8, ups=0.09, wpb=64851, bsz=128, num_updates=1947, lr=9.99924e-05, gnorm=2.426, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21780 2021-06-19 00:41:56 | INFO | train_inner | epoch 001: 1967 / 3002 loss=3.036, ppl=8.2, wps=6139.8, ups=0.09, wpb=64876, bsz=128, num_updates=1948, lr=9.99924e-05, gnorm=2.466, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21791 2021-06-19 00:42:08 | INFO | train_inner | epoch 001: 1968 / 3002 loss=2.865, ppl=7.28, wps=5741.7, ups=0.09, wpb=64802, bsz=128, num_updates=1949, lr=9.99924e-05, gnorm=2.654, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21802 2021-06-19 00:42:19 | INFO | train_inner | epoch 001: 1969 / 3002 loss=3.01, ppl=8.06, wps=5946.7, ups=0.09, wpb=64826, bsz=128, num_updates=1950, lr=9.99924e-05, gnorm=2.4, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21813 2021-06-19 00:42:30 | INFO | train_inner | epoch 001: 1970 / 3002 loss=3.101, ppl=8.58, wps=5796.7, ups=0.09, wpb=64776, bsz=128, num_updates=1951, lr=9.99924e-05, gnorm=2.48, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21824 2021-06-19 00:42:41 | INFO | train_inner | epoch 001: 1971 / 3002 loss=2.943, ppl=7.69, wps=5800.9, ups=0.09, wpb=64837, bsz=128, num_updates=1952, lr=9.99924e-05, gnorm=3.076, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21835 2021-06-19 00:42:52 | INFO | train_inner | epoch 001: 1972 / 3002 loss=3.077, ppl=8.44, wps=5934.7, ups=0.09, wpb=64826, bsz=128, num_updates=1953, lr=9.99924e-05, gnorm=2.469, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21846 2021-06-19 00:43:03 | INFO | train_inner | epoch 001: 1973 / 3002 loss=3.085, ppl=8.49, wps=5861, ups=0.09, wpb=64860, bsz=128, num_updates=1954, lr=9.99924e-05, gnorm=2.424, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21857 2021-06-19 00:43:14 | INFO | train_inner | epoch 001: 1974 / 3002 loss=3.08, ppl=8.46, wps=5767.3, ups=0.09, wpb=64906, bsz=128, num_updates=1955, lr=9.99924e-05, gnorm=2.527, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21869 2021-06-19 00:43:25 | INFO | train_inner | epoch 001: 1975 / 3002 loss=3.099, ppl=8.57, wps=5741.5, ups=0.09, wpb=64737, bsz=128, num_updates=1956, lr=9.99924e-05, gnorm=2.462, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21880 2021-06-19 00:43:37 | INFO | train_inner | epoch 001: 1976 / 3002 loss=2.971, ppl=7.84, wps=5803.1, ups=0.09, wpb=64836, bsz=128, num_updates=1957, lr=9.99923e-05, gnorm=2.616, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21891 2021-06-19 00:43:48 | INFO | train_inner | epoch 001: 1977 / 3002 loss=2.963, ppl=7.8, wps=5826.1, ups=0.09, wpb=64856, bsz=128, num_updates=1958, lr=9.99923e-05, gnorm=2.42, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21902 2021-06-19 00:43:59 | INFO | train_inner | epoch 001: 1978 / 3002 loss=2.716, ppl=6.57, wps=5950.7, ups=0.09, wpb=64876, bsz=128, num_updates=1959, lr=9.99923e-05, gnorm=2.405, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21913 2021-06-19 00:44:10 | INFO | train_inner | epoch 001: 1979 / 3002 loss=3.052, ppl=8.3, wps=5925.3, ups=0.09, wpb=64833, bsz=128, num_updates=1960, lr=9.99923e-05, gnorm=2.515, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21924 2021-06-19 00:44:21 | INFO | train_inner | epoch 001: 1980 / 3002 loss=2.98, ppl=7.89, wps=5944.2, ups=0.09, wpb=64804, bsz=128, num_updates=1961, lr=9.99923e-05, gnorm=8.12, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21935 2021-06-19 00:44:31 | INFO | train_inner | epoch 001: 1981 / 3002 loss=2.849, ppl=7.2, wps=5972.2, ups=0.09, wpb=64886, bsz=128, num_updates=1962, lr=9.99923e-05, gnorm=2.407, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21946 2021-06-19 00:44:42 | INFO | train_inner | epoch 001: 1982 / 3002 loss=2.913, ppl=7.53, wps=5911.8, ups=0.09, wpb=64904, bsz=128, num_updates=1963, lr=9.99923e-05, gnorm=2.407, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21957 2021-06-19 00:44:54 | INFO | train_inner | epoch 001: 1983 / 3002 loss=2.96, ppl=7.78, wps=5750, ups=0.09, wpb=64802, bsz=128, num_updates=1964, lr=9.99923e-05, gnorm=2.405, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21968 2021-06-19 00:45:05 | INFO | train_inner | epoch 001: 1984 / 3002 loss=3.053, ppl=8.3, wps=5813.1, ups=0.09, wpb=64880, bsz=128, num_updates=1965, lr=9.99923e-05, gnorm=2.495, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21979 2021-06-19 00:45:16 | INFO | train_inner | epoch 001: 1985 / 3002 loss=2.979, ppl=7.88, wps=5729.6, ups=0.09, wpb=64733, bsz=128, num_updates=1966, lr=9.99923e-05, gnorm=2.317, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21990 2021-06-19 00:45:27 | INFO | train_inner | epoch 001: 1986 / 3002 loss=2.789, ppl=6.91, wps=5973.9, ups=0.09, wpb=64865, bsz=128, num_updates=1967, lr=9.99923e-05, gnorm=2.372, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22001 2021-06-19 00:45:38 | INFO | train_inner | epoch 001: 1987 / 3002 loss=3.035, ppl=8.19, wps=5858.5, ups=0.09, wpb=64733, bsz=128, num_updates=1968, lr=9.99923e-05, gnorm=2.484, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22012 2021-06-19 00:45:49 | INFO | train_inner | epoch 001: 1988 / 3002 loss=2.972, ppl=7.85, wps=5939, ups=0.09, wpb=64862, bsz=128, num_updates=1969, lr=9.99922e-05, gnorm=2.495, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22023 2021-06-19 00:46:00 | INFO | train_inner | epoch 001: 1989 / 3002 loss=2.977, ppl=7.88, wps=5839.4, ups=0.09, wpb=64802, bsz=128, num_updates=1970, lr=9.99922e-05, gnorm=2.609, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22034 2021-06-19 00:46:11 | INFO | train_inner | epoch 001: 1990 / 3002 loss=2.839, ppl=7.15, wps=5774.7, ups=0.09, wpb=64878, bsz=128, num_updates=1971, lr=9.99922e-05, gnorm=2.446, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22046 2021-06-19 00:46:23 | INFO | train_inner | epoch 001: 1991 / 3002 loss=2.979, ppl=7.88, wps=5761.7, ups=0.09, wpb=64776, bsz=128, num_updates=1972, lr=9.99922e-05, gnorm=2.461, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22057 2021-06-19 00:46:34 | INFO | train_inner | epoch 001: 1992 / 3002 loss=2.991, ppl=7.95, wps=5805.8, ups=0.09, wpb=64851, bsz=128, num_updates=1973, lr=9.99922e-05, gnorm=2.305, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22068 2021-06-19 00:46:45 | INFO | train_inner | epoch 001: 1993 / 3002 loss=3.077, ppl=8.44, wps=5926.3, ups=0.09, wpb=64758, bsz=128, num_updates=1974, lr=9.99922e-05, gnorm=3.856, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22079 2021-06-19 00:46:56 | INFO | train_inner | epoch 001: 1994 / 3002 loss=3.055, ppl=8.31, wps=5859.3, ups=0.09, wpb=64832, bsz=128, num_updates=1975, lr=9.99922e-05, gnorm=2.508, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22090 2021-06-19 00:47:07 | INFO | train_inner | epoch 001: 1995 / 3002 loss=2.921, ppl=7.58, wps=5884.9, ups=0.09, wpb=64802, bsz=128, num_updates=1976, lr=9.99922e-05, gnorm=2.569, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22101 2021-06-19 00:47:18 | INFO | train_inner | epoch 001: 1996 / 3002 loss=2.929, ppl=7.62, wps=5765.5, ups=0.09, wpb=64845, bsz=128, num_updates=1977, lr=9.99922e-05, gnorm=2.449, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22112 2021-06-19 00:47:29 | INFO | train_inner | epoch 001: 1997 / 3002 loss=2.905, ppl=7.49, wps=5981, ups=0.09, wpb=64893, bsz=128, num_updates=1978, lr=9.99922e-05, gnorm=2.474, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22123 2021-06-19 00:47:40 | INFO | train_inner | epoch 001: 1998 / 3002 loss=3.117, ppl=8.67, wps=5814.5, ups=0.09, wpb=64846, bsz=128, num_updates=1979, lr=9.99922e-05, gnorm=2.759, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22134 2021-06-19 00:47:51 | INFO | train_inner | epoch 001: 1999 / 3002 loss=2.961, ppl=7.79, wps=5892.1, ups=0.09, wpb=64837, bsz=128, num_updates=1980, lr=9.99922e-05, gnorm=2.87, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22145 2021-06-19 00:48:02 | INFO | train_inner | epoch 001: 2000 / 3002 loss=2.818, ppl=7.05, wps=5824.7, ups=0.09, wpb=64807, bsz=128, num_updates=1981, lr=9.99922e-05, gnorm=2.394, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22156 2021-06-19 00:48:13 | INFO | train_inner | epoch 001: 2001 / 3002 loss=3.102, ppl=8.59, wps=6036.7, ups=0.09, wpb=64923, bsz=128, num_updates=1982, lr=9.99921e-05, gnorm=2.414, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22167 2021-06-19 00:48:24 | INFO | train_inner | epoch 001: 2002 / 3002 loss=2.958, ppl=7.77, wps=5893.5, ups=0.09, wpb=64787, bsz=128, num_updates=1983, lr=9.99921e-05, gnorm=2.389, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22178 2021-06-19 00:48:35 | INFO | train_inner | epoch 001: 2003 / 3002 loss=3.054, ppl=8.3, wps=5922, ups=0.09, wpb=64841, bsz=128, num_updates=1984, lr=9.99921e-05, gnorm=2.528, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22189 2021-06-19 00:48:46 | INFO | train_inner | epoch 001: 2004 / 3002 loss=2.891, ppl=7.42, wps=5852.1, ups=0.09, wpb=64863, bsz=128, num_updates=1985, lr=9.99921e-05, gnorm=2.364, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22200 2021-06-19 00:48:57 | INFO | train_inner | epoch 001: 2005 / 3002 loss=2.881, ppl=7.37, wps=5939.3, ups=0.09, wpb=64901, bsz=128, num_updates=1986, lr=9.99921e-05, gnorm=2.401, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22211 2021-06-19 00:49:08 | INFO | train_inner | epoch 001: 2006 / 3002 loss=2.885, ppl=7.39, wps=6043.8, ups=0.09, wpb=64906, bsz=128, num_updates=1987, lr=9.99921e-05, gnorm=2.395, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22222 2021-06-19 00:49:19 | INFO | train_inner | epoch 001: 2007 / 3002 loss=2.996, ppl=7.98, wps=5803.1, ups=0.09, wpb=64850, bsz=128, num_updates=1988, lr=9.99921e-05, gnorm=2.462, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22233 2021-06-19 00:49:30 | INFO | train_inner | epoch 001: 2008 / 3002 loss=2.929, ppl=7.62, wps=5908, ups=0.09, wpb=64776, bsz=128, num_updates=1989, lr=9.99921e-05, gnorm=2.466, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22244 2021-06-19 00:49:41 | INFO | train_inner | epoch 001: 2009 / 3002 loss=3.007, ppl=8.04, wps=5759.1, ups=0.09, wpb=64851, bsz=128, num_updates=1990, lr=9.99921e-05, gnorm=3.37, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22255 2021-06-19 00:49:52 | INFO | train_inner | epoch 001: 2010 / 3002 loss=2.874, ppl=7.33, wps=5790.5, ups=0.09, wpb=64780, bsz=128, num_updates=1991, lr=9.99921e-05, gnorm=2.469, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22266 2021-06-19 00:50:03 | INFO | train_inner | epoch 001: 2011 / 3002 loss=2.946, ppl=7.7, wps=5809, ups=0.09, wpb=64699, bsz=128, num_updates=1992, lr=9.99921e-05, gnorm=2.539, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22278 2021-06-19 00:50:14 | INFO | train_inner | epoch 001: 2012 / 3002 loss=3.156, ppl=8.92, wps=5872.7, ups=0.09, wpb=64773, bsz=128, num_updates=1993, lr=9.99921e-05, gnorm=2.467, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22289 2021-06-19 00:50:25 | INFO | train_inner | epoch 001: 2013 / 3002 loss=2.961, ppl=7.79, wps=6015.6, ups=0.09, wpb=64725, bsz=128, num_updates=1994, lr=9.9992e-05, gnorm=2.334, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22299 2021-06-19 00:50:36 | INFO | train_inner | epoch 001: 2014 / 3002 loss=2.977, ppl=7.87, wps=5939.9, ups=0.09, wpb=64786, bsz=128, num_updates=1995, lr=9.9992e-05, gnorm=2.446, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22310 2021-06-19 00:50:47 | INFO | train_inner | epoch 001: 2015 / 3002 loss=2.909, ppl=7.51, wps=5799.9, ups=0.09, wpb=64820, bsz=128, num_updates=1996, lr=9.9992e-05, gnorm=2.329, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22321 2021-06-19 00:50:58 | INFO | train_inner | epoch 001: 2016 / 3002 loss=2.954, ppl=7.75, wps=5845.7, ups=0.09, wpb=64731, bsz=128, num_updates=1997, lr=9.9992e-05, gnorm=2.316, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22333 2021-06-19 00:51:09 | INFO | train_inner | epoch 001: 2017 / 3002 loss=2.865, ppl=7.28, wps=5774, ups=0.09, wpb=64825, bsz=128, num_updates=1998, lr=9.9992e-05, gnorm=2.479, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22344 2021-06-19 00:51:20 | INFO | train_inner | epoch 001: 2018 / 3002 loss=3.19, ppl=9.13, wps=5865.7, ups=0.09, wpb=64731, bsz=128, num_updates=1999, lr=9.9992e-05, gnorm=2.542, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22355 2021-06-19 00:51:32 | INFO | train_inner | epoch 001: 2019 / 3002 loss=3.006, ppl=8.04, wps=5796.8, ups=0.09, wpb=64804, bsz=128, num_updates=2000, lr=9.9992e-05, gnorm=2.461, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22366 2021-06-19 00:51:43 | INFO | train_inner | epoch 001: 2020 / 3002 loss=2.982, ppl=7.9, wps=5886.8, ups=0.09, wpb=64875, bsz=128, num_updates=2001, lr=9.9992e-05, gnorm=2.544, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22377 2021-06-19 00:51:54 | INFO | train_inner | epoch 001: 2021 / 3002 loss=2.964, ppl=7.8, wps=5809.6, ups=0.09, wpb=64752, bsz=128, num_updates=2002, lr=9.9992e-05, gnorm=3.551, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22388 2021-06-19 00:52:05 | INFO | train_inner | epoch 001: 2022 / 3002 loss=2.919, ppl=7.56, wps=5841.2, ups=0.09, wpb=64846, bsz=128, num_updates=2003, lr=9.9992e-05, gnorm=2.459, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22399 2021-06-19 00:52:16 | INFO | train_inner | epoch 001: 2023 / 3002 loss=3.082, ppl=8.47, wps=5858.2, ups=0.09, wpb=64780, bsz=128, num_updates=2004, lr=9.9992e-05, gnorm=2.577, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22410 2021-06-19 00:52:27 | INFO | train_inner | epoch 001: 2024 / 3002 loss=2.943, ppl=7.69, wps=5879.6, ups=0.09, wpb=64881, bsz=128, num_updates=2005, lr=9.9992e-05, gnorm=2.494, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22421 2021-06-19 00:52:38 | INFO | train_inner | epoch 001: 2025 / 3002 loss=2.907, ppl=7.5, wps=5918.7, ups=0.09, wpb=64868, bsz=128, num_updates=2006, lr=9.9992e-05, gnorm=2.386, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22432 2021-06-19 00:52:49 | INFO | train_inner | epoch 001: 2026 / 3002 loss=2.851, ppl=7.21, wps=5931.2, ups=0.09, wpb=64884, bsz=128, num_updates=2007, lr=9.99919e-05, gnorm=2.61, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22443 2021-06-19 00:53:00 | INFO | train_inner | epoch 001: 2027 / 3002 loss=3.01, ppl=8.06, wps=5838.1, ups=0.09, wpb=64850, bsz=128, num_updates=2008, lr=9.99919e-05, gnorm=2.554, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22454 2021-06-19 00:53:11 | INFO | train_inner | epoch 001: 2028 / 3002 loss=2.845, ppl=7.18, wps=5831.8, ups=0.09, wpb=64806, bsz=128, num_updates=2009, lr=9.99919e-05, gnorm=2.492, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22465 2021-06-19 00:53:22 | INFO | train_inner | epoch 001: 2029 / 3002 loss=2.869, ppl=7.31, wps=5874.1, ups=0.09, wpb=64883, bsz=128, num_updates=2010, lr=9.99919e-05, gnorm=2.351, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22477 2021-06-19 00:53:33 | INFO | train_inner | epoch 001: 2030 / 3002 loss=2.889, ppl=7.41, wps=5887.7, ups=0.09, wpb=64782, bsz=128, num_updates=2011, lr=9.99919e-05, gnorm=2.504, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22488 2021-06-19 00:53:44 | INFO | train_inner | epoch 001: 2031 / 3002 loss=2.979, ppl=7.88, wps=5846.1, ups=0.09, wpb=64799, bsz=128, num_updates=2012, lr=9.99919e-05, gnorm=2.377, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22499 2021-06-19 00:53:56 | INFO | train_inner | epoch 001: 2032 / 3002 loss=2.815, ppl=7.04, wps=5752.2, ups=0.09, wpb=64790, bsz=128, num_updates=2013, lr=9.99919e-05, gnorm=2.422, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22510 2021-06-19 00:54:07 | INFO | train_inner | epoch 001: 2033 / 3002 loss=2.907, ppl=7.5, wps=5746.2, ups=0.09, wpb=64865, bsz=128, num_updates=2014, lr=9.99919e-05, gnorm=2.485, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22521 2021-06-19 00:54:18 | INFO | train_inner | epoch 001: 2034 / 3002 loss=2.859, ppl=7.26, wps=5901.4, ups=0.09, wpb=64819, bsz=128, num_updates=2015, lr=9.99919e-05, gnorm=3.47, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22532 2021-06-19 00:54:29 | INFO | train_inner | epoch 001: 2035 / 3002 loss=2.858, ppl=7.25, wps=5988.4, ups=0.09, wpb=64861, bsz=128, num_updates=2016, lr=9.99919e-05, gnorm=2.375, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22543 2021-06-19 00:54:40 | INFO | train_inner | epoch 001: 2036 / 3002 loss=2.902, ppl=7.47, wps=5808.9, ups=0.09, wpb=64763, bsz=128, num_updates=2017, lr=9.99919e-05, gnorm=2.474, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22554 2021-06-19 00:54:51 | INFO | train_inner | epoch 001: 2037 / 3002 loss=2.798, ppl=6.96, wps=5915.1, ups=0.09, wpb=64880, bsz=128, num_updates=2018, lr=9.99919e-05, gnorm=2.37, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22565 2021-06-19 00:55:02 | INFO | train_inner | epoch 001: 2038 / 3002 loss=2.956, ppl=7.76, wps=5813.1, ups=0.09, wpb=64705, bsz=128, num_updates=2019, lr=9.99918e-05, gnorm=2.376, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22576 2021-06-19 00:55:13 | INFO | train_inner | epoch 001: 2039 / 3002 loss=2.862, ppl=7.27, wps=5874.3, ups=0.09, wpb=64807, bsz=128, num_updates=2020, lr=9.99918e-05, gnorm=2.928, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22587 2021-06-19 00:55:24 | INFO | train_inner | epoch 001: 2040 / 3002 loss=2.946, ppl=7.71, wps=5901.4, ups=0.09, wpb=64843, bsz=128, num_updates=2021, lr=9.99918e-05, gnorm=2.405, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22598 2021-06-19 00:55:35 | INFO | train_inner | epoch 001: 2041 / 3002 loss=2.969, ppl=7.83, wps=5975.4, ups=0.09, wpb=64793, bsz=128, num_updates=2022, lr=9.99918e-05, gnorm=2.688, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22609 2021-06-19 00:55:46 | INFO | train_inner | epoch 001: 2042 / 3002 loss=2.933, ppl=7.63, wps=5817, ups=0.09, wpb=64804, bsz=128, num_updates=2023, lr=9.99918e-05, gnorm=2.466, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22620 2021-06-19 00:55:57 | INFO | train_inner | epoch 001: 2043 / 3002 loss=2.911, ppl=7.52, wps=5840, ups=0.09, wpb=64885, bsz=128, num_updates=2024, lr=9.99918e-05, gnorm=2.554, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22631 2021-06-19 00:56:08 | INFO | train_inner | epoch 001: 2044 / 3002 loss=2.856, ppl=7.24, wps=5876.8, ups=0.09, wpb=64839, bsz=128, num_updates=2025, lr=9.99918e-05, gnorm=2.408, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22642 2021-06-19 00:56:19 | INFO | train_inner | epoch 001: 2045 / 3002 loss=2.87, ppl=7.31, wps=5892.7, ups=0.09, wpb=64867, bsz=128, num_updates=2026, lr=9.99918e-05, gnorm=2.394, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22653 2021-06-19 00:56:30 | INFO | train_inner | epoch 001: 2046 / 3002 loss=3.015, ppl=8.08, wps=5943.3, ups=0.09, wpb=64913, bsz=128, num_updates=2027, lr=9.99918e-05, gnorm=2.603, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22664 2021-06-19 00:56:41 | INFO | train_inner | epoch 001: 2047 / 3002 loss=2.954, ppl=7.75, wps=5820.6, ups=0.09, wpb=64866, bsz=128, num_updates=2028, lr=9.99918e-05, gnorm=2.359, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22675 2021-06-19 00:56:52 | INFO | train_inner | epoch 001: 2048 / 3002 loss=2.92, ppl=7.57, wps=5985, ups=0.09, wpb=64849, bsz=128, num_updates=2029, lr=9.99918e-05, gnorm=2.555, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22686 2021-06-19 00:57:03 | INFO | train_inner | epoch 001: 2049 / 3002 loss=3.071, ppl=8.4, wps=5704.3, ups=0.09, wpb=64839, bsz=128, num_updates=2030, lr=9.99918e-05, gnorm=2.47, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22698 2021-06-19 00:57:14 | INFO | train_inner | epoch 001: 2050 / 3002 loss=2.894, ppl=7.43, wps=5818.3, ups=0.09, wpb=64864, bsz=128, num_updates=2031, lr=9.99918e-05, gnorm=2.423, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22709 2021-06-19 00:57:26 | INFO | train_inner | epoch 001: 2051 / 3002 loss=3.001, ppl=8, wps=5804.9, ups=0.09, wpb=64858, bsz=128, num_updates=2032, lr=9.99917e-05, gnorm=2.423, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22720 2021-06-19 00:57:36 | INFO | train_inner | epoch 001: 2052 / 3002 loss=2.945, ppl=7.7, wps=6015.7, ups=0.09, wpb=64889, bsz=128, num_updates=2033, lr=9.99917e-05, gnorm=2.509, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22731 2021-06-19 00:57:47 | INFO | train_inner | epoch 001: 2053 / 3002 loss=2.891, ppl=7.42, wps=5902.3, ups=0.09, wpb=64829, bsz=128, num_updates=2034, lr=9.99917e-05, gnorm=6.112, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22742 2021-06-19 00:57:59 | INFO | train_inner | epoch 001: 2054 / 3002 loss=2.991, ppl=7.95, wps=5775.5, ups=0.09, wpb=64757, bsz=128, num_updates=2035, lr=9.99917e-05, gnorm=2.383, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22753 2021-06-19 00:58:10 | INFO | train_inner | epoch 001: 2055 / 3002 loss=2.806, ppl=6.99, wps=5941.3, ups=0.09, wpb=64804, bsz=128, num_updates=2036, lr=9.99917e-05, gnorm=2.337, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22764 2021-06-19 00:58:20 | INFO | train_inner | epoch 001: 2056 / 3002 loss=3.061, ppl=8.35, wps=5955.7, ups=0.09, wpb=64833, bsz=128, num_updates=2037, lr=9.99917e-05, gnorm=2.444, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22775 2021-06-19 00:58:32 | INFO | train_inner | epoch 001: 2057 / 3002 loss=3.032, ppl=8.18, wps=5805.4, ups=0.09, wpb=64830, bsz=128, num_updates=2038, lr=9.99917e-05, gnorm=2.547, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22786 2021-06-19 00:58:43 | INFO | train_inner | epoch 001: 2058 / 3002 loss=2.773, ppl=6.84, wps=5924.2, ups=0.09, wpb=64806, bsz=128, num_updates=2039, lr=9.99917e-05, gnorm=2.394, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22797 2021-06-19 00:58:54 | INFO | train_inner | epoch 001: 2059 / 3002 loss=2.974, ppl=7.86, wps=5831.4, ups=0.09, wpb=64871, bsz=128, num_updates=2040, lr=9.99917e-05, gnorm=2.425, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22808 2021-06-19 00:59:05 | INFO | train_inner | epoch 001: 2060 / 3002 loss=2.851, ppl=7.22, wps=5880.4, ups=0.09, wpb=64851, bsz=128, num_updates=2041, lr=9.99917e-05, gnorm=2.465, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22819 2021-06-19 00:59:16 | INFO | train_inner | epoch 001: 2061 / 3002 loss=3.044, ppl=8.25, wps=5879, ups=0.09, wpb=64814, bsz=128, num_updates=2042, lr=9.99917e-05, gnorm=2.418, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22830 2021-06-19 00:59:27 | INFO | train_inner | epoch 001: 2062 / 3002 loss=3.028, ppl=8.16, wps=5859.8, ups=0.09, wpb=64789, bsz=128, num_updates=2043, lr=9.99917e-05, gnorm=2.457, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22841 2021-06-19 00:59:38 | INFO | train_inner | epoch 001: 2063 / 3002 loss=2.781, ppl=6.87, wps=5898.8, ups=0.09, wpb=64827, bsz=128, num_updates=2044, lr=9.99916e-05, gnorm=2.369, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22852 2021-06-19 00:59:49 | INFO | train_inner | epoch 001: 2064 / 3002 loss=2.93, ppl=7.62, wps=5896.5, ups=0.09, wpb=64782, bsz=128, num_updates=2045, lr=9.99916e-05, gnorm=2.453, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22863 2021-06-19 01:00:00 | INFO | train_inner | epoch 001: 2065 / 3002 loss=2.928, ppl=7.61, wps=5872.2, ups=0.09, wpb=64853, bsz=128, num_updates=2046, lr=9.99916e-05, gnorm=2.448, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22874 2021-06-19 01:00:11 | INFO | train_inner | epoch 001: 2066 / 3002 loss=2.97, ppl=7.83, wps=5789.7, ups=0.09, wpb=64842, bsz=128, num_updates=2047, lr=9.99916e-05, gnorm=2.715, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22885 2021-06-19 01:00:22 | INFO | train_inner | epoch 001: 2067 / 3002 loss=2.791, ppl=6.92, wps=5896, ups=0.09, wpb=64847, bsz=128, num_updates=2048, lr=9.99916e-05, gnorm=2.359, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22896 2021-06-19 01:00:33 | INFO | train_inner | epoch 001: 2068 / 3002 loss=2.899, ppl=7.46, wps=5942.4, ups=0.09, wpb=64901, bsz=128, num_updates=2049, lr=9.99916e-05, gnorm=2.369, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=22907 2021-06-19 01:00:44 | INFO | train_inner | epoch 001: 2069 / 3002 loss=2.996, ppl=7.98, wps=5790.9, ups=0.09, wpb=64774, bsz=128, num_updates=2050, lr=9.99916e-05, gnorm=2.434, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=22918 2021-06-19 01:00:55 | INFO | train_inner | epoch 001: 2070 / 3002 loss=3.017, ppl=8.09, wps=6008.6, ups=0.09, wpb=64884, bsz=128, num_updates=2051, lr=9.99916e-05, gnorm=2.486, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=22929 2021-06-19 01:01:06 | INFO | train_inner | epoch 001: 2071 / 3002 loss=2.885, ppl=7.39, wps=6046.8, ups=0.09, wpb=64843, bsz=128, num_updates=2052, lr=9.99916e-05, gnorm=2.393, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=22940 2021-06-19 01:01:17 | INFO | train_inner | epoch 001: 2072 / 3002 loss=3.005, ppl=8.03, wps=5884.4, ups=0.09, wpb=64772, bsz=128, num_updates=2053, lr=9.99916e-05, gnorm=2.435, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=22951 2021-06-19 01:01:28 | INFO | train_inner | epoch 001: 2073 / 3002 loss=2.947, ppl=7.71, wps=5940.2, ups=0.09, wpb=64909, bsz=128, num_updates=2054, lr=9.99916e-05, gnorm=2.298, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=22962 2021-06-19 01:01:39 | INFO | train_inner | epoch 001: 2074 / 3002 loss=3.008, ppl=8.04, wps=5842.8, ups=0.09, wpb=64859, bsz=128, num_updates=2055, lr=9.99916e-05, gnorm=2.42, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=22973 2021-06-19 01:01:50 | INFO | train_inner | epoch 001: 2075 / 3002 loss=3.093, ppl=8.53, wps=5894.5, ups=0.09, wpb=64897, bsz=128, num_updates=2056, lr=9.99916e-05, gnorm=3.097, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=22984 2021-06-19 01:02:01 | INFO | train_inner | epoch 001: 2076 / 3002 loss=2.987, ppl=7.93, wps=5916.7, ups=0.09, wpb=64853, bsz=128, num_updates=2057, lr=9.99915e-05, gnorm=2.352, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=22995 2021-06-19 01:02:12 | INFO | train_inner | epoch 001: 2077 / 3002 loss=3.005, ppl=8.03, wps=5830.8, ups=0.09, wpb=64817, bsz=128, num_updates=2058, lr=9.99915e-05, gnorm=2.371, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23006 2021-06-19 01:02:23 | INFO | train_inner | epoch 001: 2078 / 3002 loss=2.733, ppl=6.65, wps=5860.5, ups=0.09, wpb=64880, bsz=128, num_updates=2059, lr=9.99915e-05, gnorm=2.614, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23017 2021-06-19 01:02:34 | INFO | train_inner | epoch 001: 2079 / 3002 loss=2.998, ppl=7.99, wps=5862.4, ups=0.09, wpb=64793, bsz=128, num_updates=2060, lr=9.99915e-05, gnorm=2.329, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23028 2021-06-19 01:02:45 | INFO | train_inner | epoch 001: 2080 / 3002 loss=2.968, ppl=7.83, wps=5903, ups=0.09, wpb=64846, bsz=128, num_updates=2061, lr=9.99915e-05, gnorm=2.392, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23039 2021-06-19 01:02:56 | INFO | train_inner | epoch 001: 2081 / 3002 loss=2.943, ppl=7.69, wps=5809.5, ups=0.09, wpb=64761, bsz=128, num_updates=2062, lr=9.99915e-05, gnorm=2.449, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23050 2021-06-19 01:03:07 | INFO | train_inner | epoch 001: 2082 / 3002 loss=3.008, ppl=8.04, wps=5976.1, ups=0.09, wpb=64883, bsz=128, num_updates=2063, lr=9.99915e-05, gnorm=2.424, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23061 2021-06-19 01:03:18 | INFO | train_inner | epoch 001: 2083 / 3002 loss=3.012, ppl=8.07, wps=5935.3, ups=0.09, wpb=64892, bsz=128, num_updates=2064, lr=9.99915e-05, gnorm=2.601, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23072 2021-06-19 01:03:29 | INFO | train_inner | epoch 001: 2084 / 3002 loss=2.88, ppl=7.36, wps=5867.3, ups=0.09, wpb=64864, bsz=128, num_updates=2065, lr=9.99915e-05, gnorm=2.382, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23083 2021-06-19 01:03:40 | INFO | train_inner | epoch 001: 2085 / 3002 loss=2.901, ppl=7.47, wps=5777.9, ups=0.09, wpb=64858, bsz=128, num_updates=2066, lr=9.99915e-05, gnorm=2.448, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23094 2021-06-19 01:03:51 | INFO | train_inner | epoch 001: 2086 / 3002 loss=2.99, ppl=7.94, wps=5873.3, ups=0.09, wpb=64874, bsz=128, num_updates=2067, lr=9.99915e-05, gnorm=2.42, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23105 2021-06-19 01:04:02 | INFO | train_inner | epoch 001: 2087 / 3002 loss=3.002, ppl=8.01, wps=5869.1, ups=0.09, wpb=64905, bsz=128, num_updates=2068, lr=9.99915e-05, gnorm=2.569, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23116 2021-06-19 01:04:13 | INFO | train_inner | epoch 001: 2088 / 3002 loss=3.05, ppl=8.28, wps=5922.6, ups=0.09, wpb=64769, bsz=128, num_updates=2069, lr=9.99914e-05, gnorm=2.38, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23127 2021-06-19 01:04:24 | INFO | train_inner | epoch 001: 2089 / 3002 loss=2.904, ppl=7.49, wps=5871.2, ups=0.09, wpb=64929, bsz=128, num_updates=2070, lr=9.99914e-05, gnorm=2.358, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23138 2021-06-19 01:04:35 | INFO | train_inner | epoch 001: 2090 / 3002 loss=2.779, ppl=6.86, wps=5844.3, ups=0.09, wpb=64805, bsz=128, num_updates=2071, lr=9.99914e-05, gnorm=2.426, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23150 2021-06-19 01:04:46 | INFO | train_inner | epoch 001: 2091 / 3002 loss=2.824, ppl=7.08, wps=5779.2, ups=0.09, wpb=64796, bsz=128, num_updates=2072, lr=9.99914e-05, gnorm=2.644, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23161 2021-06-19 01:04:58 | INFO | train_inner | epoch 001: 2092 / 3002 loss=2.845, ppl=7.19, wps=5851.5, ups=0.09, wpb=64863, bsz=128, num_updates=2073, lr=9.99914e-05, gnorm=2.543, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23172 2021-06-19 01:05:09 | INFO | train_inner | epoch 001: 2093 / 3002 loss=2.867, ppl=7.29, wps=5738.8, ups=0.09, wpb=64813, bsz=128, num_updates=2074, lr=9.99914e-05, gnorm=2.545, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23183 2021-06-19 01:05:20 | INFO | train_inner | epoch 001: 2094 / 3002 loss=2.906, ppl=7.5, wps=5858.4, ups=0.09, wpb=64794, bsz=128, num_updates=2075, lr=9.99914e-05, gnorm=2.463, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23194 2021-06-19 01:05:31 | INFO | train_inner | epoch 001: 2095 / 3002 loss=2.962, ppl=7.79, wps=5860.5, ups=0.09, wpb=64769, bsz=128, num_updates=2076, lr=9.99914e-05, gnorm=2.443, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23205 2021-06-19 01:05:42 | INFO | train_inner | epoch 001: 2096 / 3002 loss=2.833, ppl=7.13, wps=5924.4, ups=0.09, wpb=64817, bsz=128, num_updates=2077, lr=9.99914e-05, gnorm=2.357, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23216 2021-06-19 01:05:53 | INFO | train_inner | epoch 001: 2097 / 3002 loss=2.793, ppl=6.93, wps=5862.7, ups=0.09, wpb=64815, bsz=128, num_updates=2078, lr=9.99914e-05, gnorm=2.48, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23227 2021-06-19 01:06:04 | INFO | train_inner | epoch 001: 2098 / 3002 loss=2.98, ppl=7.89, wps=5878.9, ups=0.09, wpb=64769, bsz=128, num_updates=2079, lr=9.99914e-05, gnorm=2.62, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23238 2021-06-19 01:06:15 | INFO | train_inner | epoch 001: 2099 / 3002 loss=2.981, ppl=7.9, wps=5818, ups=0.09, wpb=64892, bsz=128, num_updates=2080, lr=9.99914e-05, gnorm=2.853, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23249 2021-06-19 01:06:26 | INFO | train_inner | epoch 001: 2100 / 3002 loss=2.942, ppl=7.69, wps=5818.4, ups=0.09, wpb=64752, bsz=128, num_updates=2081, lr=9.99914e-05, gnorm=2.488, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23261 2021-06-19 01:06:37 | INFO | train_inner | epoch 001: 2101 / 3002 loss=3.029, ppl=8.16, wps=5850.2, ups=0.09, wpb=64780, bsz=128, num_updates=2082, lr=9.99913e-05, gnorm=2.439, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23272 2021-06-19 01:06:48 | INFO | train_inner | epoch 001: 2102 / 3002 loss=2.844, ppl=7.18, wps=5854.9, ups=0.09, wpb=64813, bsz=128, num_updates=2083, lr=9.99913e-05, gnorm=2.347, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23283 2021-06-19 01:06:59 | INFO | train_inner | epoch 001: 2103 / 3002 loss=3.134, ppl=8.78, wps=5875.5, ups=0.09, wpb=64874, bsz=128, num_updates=2084, lr=9.99913e-05, gnorm=2.983, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23294 2021-06-19 01:07:11 | INFO | train_inner | epoch 001: 2104 / 3002 loss=2.979, ppl=7.88, wps=5852.3, ups=0.09, wpb=64861, bsz=128, num_updates=2085, lr=9.99913e-05, gnorm=2.4, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23305 2021-06-19 01:07:22 | INFO | train_inner | epoch 001: 2105 / 3002 loss=2.996, ppl=7.98, wps=5845.2, ups=0.09, wpb=64815, bsz=128, num_updates=2086, lr=9.99913e-05, gnorm=2.612, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23316 2021-06-19 01:07:33 | INFO | train_inner | epoch 001: 2106 / 3002 loss=2.934, ppl=7.64, wps=5762.2, ups=0.09, wpb=64873, bsz=128, num_updates=2087, lr=9.99913e-05, gnorm=2.386, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23327 2021-06-19 01:07:44 | INFO | train_inner | epoch 001: 2107 / 3002 loss=2.853, ppl=7.22, wps=5790.9, ups=0.09, wpb=64879, bsz=128, num_updates=2088, lr=9.99913e-05, gnorm=2.364, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23338 2021-06-19 01:07:55 | INFO | train_inner | epoch 001: 2108 / 3002 loss=2.849, ppl=7.21, wps=5930.3, ups=0.09, wpb=64902, bsz=128, num_updates=2089, lr=9.99913e-05, gnorm=2.527, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23349 2021-06-19 01:08:06 | INFO | train_inner | epoch 001: 2109 / 3002 loss=2.769, ppl=6.82, wps=5897.6, ups=0.09, wpb=64804, bsz=128, num_updates=2090, lr=9.99913e-05, gnorm=2.432, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23360 2021-06-19 01:08:17 | INFO | train_inner | epoch 001: 2110 / 3002 loss=2.857, ppl=7.25, wps=5829, ups=0.09, wpb=64751, bsz=128, num_updates=2091, lr=9.99913e-05, gnorm=2.39, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23371 2021-06-19 01:08:28 | INFO | train_inner | epoch 001: 2111 / 3002 loss=2.749, ppl=6.72, wps=5842.7, ups=0.09, wpb=64849, bsz=128, num_updates=2092, lr=9.99913e-05, gnorm=2.526, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23383 2021-06-19 01:08:39 | INFO | train_inner | epoch 001: 2112 / 3002 loss=3.043, ppl=8.24, wps=5948.6, ups=0.09, wpb=64903, bsz=128, num_updates=2093, lr=9.99913e-05, gnorm=2.358, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23393 2021-06-19 01:08:50 | INFO | train_inner | epoch 001: 2113 / 3002 loss=2.885, ppl=7.39, wps=5819.8, ups=0.09, wpb=64798, bsz=128, num_updates=2094, lr=9.99912e-05, gnorm=2.43, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23405 2021-06-19 01:09:02 | INFO | train_inner | epoch 001: 2114 / 3002 loss=3.045, ppl=8.26, wps=5734.2, ups=0.09, wpb=64790, bsz=128, num_updates=2095, lr=9.99912e-05, gnorm=2.508, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23416 2021-06-19 01:09:13 | INFO | train_inner | epoch 001: 2115 / 3002 loss=2.87, ppl=7.31, wps=5761.3, ups=0.09, wpb=64854, bsz=128, num_updates=2096, lr=9.99912e-05, gnorm=2.366, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23427 2021-06-19 01:09:24 | INFO | train_inner | epoch 001: 2116 / 3002 loss=2.897, ppl=7.45, wps=5901.5, ups=0.09, wpb=64829, bsz=128, num_updates=2097, lr=9.99912e-05, gnorm=2.396, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23438 2021-06-19 01:09:35 | INFO | train_inner | epoch 001: 2117 / 3002 loss=2.963, ppl=7.8, wps=6012, ups=0.09, wpb=64871, bsz=128, num_updates=2098, lr=9.99912e-05, gnorm=2.487, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23449 2021-06-19 01:09:45 | INFO | train_inner | epoch 001: 2118 / 3002 loss=2.93, ppl=7.62, wps=5987.7, ups=0.09, wpb=64824, bsz=128, num_updates=2099, lr=9.99912e-05, gnorm=2.435, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23460 2021-06-19 01:09:56 | INFO | train_inner | epoch 001: 2119 / 3002 loss=2.778, ppl=6.86, wps=5891.3, ups=0.09, wpb=64928, bsz=128, num_updates=2100, lr=9.99912e-05, gnorm=2.36, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23471 2021-06-19 01:10:08 | INFO | train_inner | epoch 001: 2120 / 3002 loss=2.881, ppl=7.37, wps=5846.7, ups=0.09, wpb=64853, bsz=128, num_updates=2101, lr=9.99912e-05, gnorm=2.568, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23482 2021-06-19 01:10:18 | INFO | train_inner | epoch 001: 2121 / 3002 loss=3.1, ppl=8.57, wps=5932.5, ups=0.09, wpb=64842, bsz=128, num_updates=2102, lr=9.99912e-05, gnorm=2.432, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23493 2021-06-19 01:10:30 | INFO | train_inner | epoch 001: 2122 / 3002 loss=2.89, ppl=7.41, wps=5780.2, ups=0.09, wpb=64931, bsz=128, num_updates=2103, lr=9.99912e-05, gnorm=2.398, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23504 2021-06-19 01:10:41 | INFO | train_inner | epoch 001: 2123 / 3002 loss=2.953, ppl=7.74, wps=5861.3, ups=0.09, wpb=64871, bsz=128, num_updates=2104, lr=9.99912e-05, gnorm=2.368, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23515 2021-06-19 01:10:52 | INFO | train_inner | epoch 001: 2124 / 3002 loss=2.937, ppl=7.66, wps=5889.6, ups=0.09, wpb=64837, bsz=128, num_updates=2105, lr=9.99912e-05, gnorm=2.353, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23526 2021-06-19 01:11:03 | INFO | train_inner | epoch 001: 2125 / 3002 loss=3.031, ppl=8.17, wps=5946.8, ups=0.09, wpb=64766, bsz=128, num_updates=2106, lr=9.99912e-05, gnorm=2.327, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23537 2021-06-19 01:11:13 | INFO | train_inner | epoch 001: 2126 / 3002 loss=2.825, ppl=7.08, wps=6008, ups=0.09, wpb=64840, bsz=128, num_updates=2107, lr=9.99911e-05, gnorm=2.383, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23548 2021-06-19 01:11:25 | INFO | train_inner | epoch 001: 2127 / 3002 loss=2.874, ppl=7.33, wps=5854.8, ups=0.09, wpb=64770, bsz=128, num_updates=2108, lr=9.99911e-05, gnorm=2.442, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23559 2021-06-19 01:11:35 | INFO | train_inner | epoch 001: 2128 / 3002 loss=3.074, ppl=8.42, wps=5928.7, ups=0.09, wpb=64812, bsz=128, num_updates=2109, lr=9.99911e-05, gnorm=2.416, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23570 2021-06-19 01:11:46 | INFO | train_inner | epoch 001: 2129 / 3002 loss=3.103, ppl=8.59, wps=5900.5, ups=0.09, wpb=64912, bsz=128, num_updates=2110, lr=9.99911e-05, gnorm=2.396, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23581 2021-06-19 01:11:57 | INFO | train_inner | epoch 001: 2130 / 3002 loss=3.141, ppl=8.82, wps=5943.2, ups=0.09, wpb=64870, bsz=128, num_updates=2111, lr=9.99911e-05, gnorm=2.506, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23592 2021-06-19 01:12:08 | INFO | train_inner | epoch 001: 2131 / 3002 loss=2.863, ppl=7.28, wps=5892.4, ups=0.09, wpb=64905, bsz=128, num_updates=2112, lr=9.99911e-05, gnorm=2.982, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23603 2021-06-19 01:12:19 | INFO | train_inner | epoch 001: 2132 / 3002 loss=2.836, ppl=7.14, wps=5835.9, ups=0.09, wpb=64790, bsz=128, num_updates=2113, lr=9.99911e-05, gnorm=3.204, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23614 2021-06-19 01:12:30 | INFO | train_inner | epoch 001: 2133 / 3002 loss=2.818, ppl=7.05, wps=5906.6, ups=0.09, wpb=64815, bsz=128, num_updates=2114, lr=9.99911e-05, gnorm=2.351, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23625 2021-06-19 01:12:41 | INFO | train_inner | epoch 001: 2134 / 3002 loss=2.936, ppl=7.65, wps=5906.1, ups=0.09, wpb=64844, bsz=128, num_updates=2115, lr=9.99911e-05, gnorm=2.425, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23636 2021-06-19 01:12:52 | INFO | train_inner | epoch 001: 2135 / 3002 loss=2.906, ppl=7.5, wps=5961.2, ups=0.09, wpb=64872, bsz=128, num_updates=2116, lr=9.99911e-05, gnorm=2.317, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23647 2021-06-19 01:13:03 | INFO | train_inner | epoch 001: 2136 / 3002 loss=2.798, ppl=6.95, wps=5846.9, ups=0.09, wpb=64827, bsz=128, num_updates=2117, lr=9.99911e-05, gnorm=2.365, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23658 2021-06-19 01:13:15 | INFO | train_inner | epoch 001: 2137 / 3002 loss=3.009, ppl=8.05, wps=5750.5, ups=0.09, wpb=64777, bsz=128, num_updates=2118, lr=9.99911e-05, gnorm=2.481, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23669 2021-06-19 01:13:26 | INFO | train_inner | epoch 001: 2138 / 3002 loss=2.769, ppl=6.81, wps=5838, ups=0.09, wpb=64880, bsz=128, num_updates=2119, lr=9.9991e-05, gnorm=2.303, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23680 2021-06-19 01:13:37 | INFO | train_inner | epoch 001: 2139 / 3002 loss=2.886, ppl=7.39, wps=5827.9, ups=0.09, wpb=64859, bsz=128, num_updates=2120, lr=9.9991e-05, gnorm=3.227, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23691 2021-06-19 01:13:48 | INFO | train_inner | epoch 001: 2140 / 3002 loss=3.054, ppl=8.31, wps=5834.9, ups=0.09, wpb=64828, bsz=128, num_updates=2121, lr=9.9991e-05, gnorm=2.398, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23702 2021-06-19 01:13:59 | INFO | train_inner | epoch 001: 2141 / 3002 loss=3.073, ppl=8.41, wps=5912.1, ups=0.09, wpb=64823, bsz=128, num_updates=2122, lr=9.9991e-05, gnorm=2.555, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23713 2021-06-19 01:14:10 | INFO | train_inner | epoch 001: 2142 / 3002 loss=2.846, ppl=7.19, wps=5714.5, ups=0.09, wpb=64815, bsz=128, num_updates=2123, lr=9.9991e-05, gnorm=2.381, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23725 2021-06-19 01:14:21 | INFO | train_inner | epoch 001: 2143 / 3002 loss=2.858, ppl=7.25, wps=5854, ups=0.09, wpb=64805, bsz=128, num_updates=2124, lr=9.9991e-05, gnorm=2.516, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23736 2021-06-19 01:14:32 | INFO | train_inner | epoch 001: 2144 / 3002 loss=2.883, ppl=7.38, wps=5938.6, ups=0.09, wpb=64867, bsz=128, num_updates=2125, lr=9.9991e-05, gnorm=2.402, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23747 2021-06-19 01:14:43 | INFO | train_inner | epoch 001: 2145 / 3002 loss=2.997, ppl=7.98, wps=5937.2, ups=0.09, wpb=64800, bsz=128, num_updates=2126, lr=9.9991e-05, gnorm=2.367, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23758 2021-06-19 01:14:54 | INFO | train_inner | epoch 001: 2146 / 3002 loss=2.944, ppl=7.69, wps=5877.7, ups=0.09, wpb=64863, bsz=128, num_updates=2127, lr=9.9991e-05, gnorm=2.413, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23769 2021-06-19 01:15:05 | INFO | train_inner | epoch 001: 2147 / 3002 loss=3.076, ppl=8.43, wps=5886.8, ups=0.09, wpb=64805, bsz=128, num_updates=2128, lr=9.9991e-05, gnorm=2.587, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23780 2021-06-19 01:15:16 | INFO | train_inner | epoch 001: 2148 / 3002 loss=3.022, ppl=8.12, wps=5825.6, ups=0.09, wpb=64877, bsz=128, num_updates=2129, lr=9.9991e-05, gnorm=3.042, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23791 2021-06-19 01:15:27 | INFO | train_inner | epoch 001: 2149 / 3002 loss=2.779, ppl=6.86, wps=5892.7, ups=0.09, wpb=64796, bsz=128, num_updates=2130, lr=9.9991e-05, gnorm=2.376, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23802 2021-06-19 01:15:39 | INFO | train_inner | epoch 001: 2150 / 3002 loss=2.843, ppl=7.18, wps=5812, ups=0.09, wpb=64824, bsz=128, num_updates=2131, lr=9.9991e-05, gnorm=2.347, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23813 2021-06-19 01:15:50 | INFO | train_inner | epoch 001: 2151 / 3002 loss=2.928, ppl=7.61, wps=5862.5, ups=0.09, wpb=64803, bsz=128, num_updates=2132, lr=9.99909e-05, gnorm=2.627, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23824 2021-06-19 01:16:01 | INFO | train_inner | epoch 001: 2152 / 3002 loss=2.902, ppl=7.47, wps=5913.4, ups=0.09, wpb=64811, bsz=128, num_updates=2133, lr=9.99909e-05, gnorm=2.463, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23835 2021-06-19 01:16:12 | INFO | train_inner | epoch 001: 2153 / 3002 loss=2.905, ppl=7.49, wps=5802.9, ups=0.09, wpb=64797, bsz=128, num_updates=2134, lr=9.99909e-05, gnorm=2.498, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23846 2021-06-19 01:16:23 | INFO | train_inner | epoch 001: 2154 / 3002 loss=2.874, ppl=7.33, wps=5913.4, ups=0.09, wpb=64755, bsz=128, num_updates=2135, lr=9.99909e-05, gnorm=2.479, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23857 2021-06-19 01:16:34 | INFO | train_inner | epoch 001: 2155 / 3002 loss=3.024, ppl=8.13, wps=5877.2, ups=0.09, wpb=64853, bsz=128, num_updates=2136, lr=9.99909e-05, gnorm=2.717, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23868 2021-06-19 01:16:45 | INFO | train_inner | epoch 001: 2156 / 3002 loss=2.897, ppl=7.45, wps=5894.5, ups=0.09, wpb=64819, bsz=128, num_updates=2137, lr=9.99909e-05, gnorm=19.662, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23879 2021-06-19 01:16:56 | INFO | train_inner | epoch 001: 2157 / 3002 loss=2.981, ppl=7.89, wps=5976.1, ups=0.09, wpb=64782, bsz=128, num_updates=2138, lr=9.99909e-05, gnorm=2.358, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23890 2021-06-19 01:17:07 | INFO | train_inner | epoch 001: 2158 / 3002 loss=2.973, ppl=7.85, wps=5797.3, ups=0.09, wpb=64800, bsz=128, num_updates=2139, lr=9.99909e-05, gnorm=2.418, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23901 2021-06-19 01:17:18 | INFO | train_inner | epoch 001: 2159 / 3002 loss=2.832, ppl=7.12, wps=5955.8, ups=0.09, wpb=64868, bsz=128, num_updates=2140, lr=9.99909e-05, gnorm=2.438, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23912 2021-06-19 01:17:29 | INFO | train_inner | epoch 001: 2160 / 3002 loss=2.906, ppl=7.49, wps=5958.7, ups=0.09, wpb=64793, bsz=128, num_updates=2141, lr=9.99909e-05, gnorm=2.6, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23923 2021-06-19 01:17:39 | INFO | train_inner | epoch 001: 2161 / 3002 loss=2.937, ppl=7.66, wps=5919.7, ups=0.09, wpb=64846, bsz=128, num_updates=2142, lr=9.99909e-05, gnorm=2.433, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23934 2021-06-19 01:17:51 | INFO | train_inner | epoch 001: 2162 / 3002 loss=2.846, ppl=7.19, wps=5852.8, ups=0.09, wpb=64882, bsz=128, num_updates=2143, lr=9.99909e-05, gnorm=2.458, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23945 2021-06-19 01:18:02 | INFO | train_inner | epoch 001: 2163 / 3002 loss=2.848, ppl=7.2, wps=5796.1, ups=0.09, wpb=64865, bsz=128, num_updates=2144, lr=9.99908e-05, gnorm=2.536, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23956 2021-06-19 01:18:13 | INFO | train_inner | epoch 001: 2164 / 3002 loss=3.073, ppl=8.42, wps=5895.7, ups=0.09, wpb=64846, bsz=128, num_updates=2145, lr=9.99908e-05, gnorm=2.603, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23967 2021-06-19 01:18:24 | INFO | train_inner | epoch 001: 2165 / 3002 loss=2.967, ppl=7.82, wps=5822.4, ups=0.09, wpb=64746, bsz=128, num_updates=2146, lr=9.99908e-05, gnorm=2.457, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23978 2021-06-19 01:18:35 | INFO | train_inner | epoch 001: 2166 / 3002 loss=2.887, ppl=7.4, wps=5843.2, ups=0.09, wpb=64864, bsz=128, num_updates=2147, lr=9.99908e-05, gnorm=2.398, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23989 2021-06-19 01:18:46 | INFO | train_inner | epoch 001: 2167 / 3002 loss=2.745, ppl=6.7, wps=5886.2, ups=0.09, wpb=64746, bsz=128, num_updates=2148, lr=9.99908e-05, gnorm=2.419, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24000 2021-06-19 01:18:57 | INFO | train_inner | epoch 001: 2168 / 3002 loss=3.064, ppl=8.36, wps=5799.8, ups=0.09, wpb=64794, bsz=128, num_updates=2149, lr=9.99908e-05, gnorm=2.615, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24011 2021-06-19 01:19:08 | INFO | train_inner | epoch 001: 2169 / 3002 loss=3.001, ppl=8.01, wps=5853.2, ups=0.09, wpb=64877, bsz=128, num_updates=2150, lr=9.99908e-05, gnorm=2.748, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24023 2021-06-19 01:19:19 | INFO | train_inner | epoch 001: 2170 / 3002 loss=2.993, ppl=7.96, wps=5911.2, ups=0.09, wpb=64854, bsz=128, num_updates=2151, lr=9.99908e-05, gnorm=2.456, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=24034 2021-06-19 01:19:30 | INFO | train_inner | epoch 001: 2171 / 3002 loss=2.892, ppl=7.42, wps=5823.4, ups=0.09, wpb=64935, bsz=128, num_updates=2152, lr=9.99908e-05, gnorm=2.458, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24045 2021-06-19 01:19:41 | INFO | train_inner | epoch 001: 2172 / 3002 loss=2.939, ppl=7.67, wps=5900.9, ups=0.09, wpb=64873, bsz=128, num_updates=2153, lr=9.99908e-05, gnorm=2.52, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24056 2021-06-19 01:19:52 | INFO | train_inner | epoch 001: 2173 / 3002 loss=2.971, ppl=7.84, wps=5848, ups=0.09, wpb=64745, bsz=128, num_updates=2154, lr=9.99908e-05, gnorm=2.469, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24067 2021-06-19 01:20:04 | INFO | train_inner | epoch 001: 2174 / 3002 loss=2.765, ppl=6.8, wps=5725.4, ups=0.09, wpb=64915, bsz=128, num_updates=2155, lr=9.99908e-05, gnorm=2.407, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24078 2021-06-19 01:20:15 | INFO | train_inner | epoch 001: 2175 / 3002 loss=2.825, ppl=7.09, wps=5905.2, ups=0.09, wpb=64799, bsz=128, num_updates=2156, lr=9.99908e-05, gnorm=2.544, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24089 2021-06-19 01:20:26 | INFO | train_inner | epoch 001: 2176 / 3002 loss=2.805, ppl=6.99, wps=5775.6, ups=0.09, wpb=64810, bsz=128, num_updates=2157, lr=9.99907e-05, gnorm=2.454, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24100 2021-06-19 01:20:37 | INFO | train_inner | epoch 001: 2177 / 3002 loss=2.902, ppl=7.47, wps=5884.4, ups=0.09, wpb=64879, bsz=128, num_updates=2158, lr=9.99907e-05, gnorm=2.5, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24111 2021-06-19 01:20:48 | INFO | train_inner | epoch 001: 2178 / 3002 loss=3.016, ppl=8.09, wps=5836.4, ups=0.09, wpb=64884, bsz=128, num_updates=2159, lr=9.99907e-05, gnorm=2.589, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24122 2021-06-19 01:20:59 | INFO | train_inner | epoch 001: 2179 / 3002 loss=3.031, ppl=8.18, wps=5909.1, ups=0.09, wpb=64776, bsz=128, num_updates=2160, lr=9.99907e-05, gnorm=2.555, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24133 2021-06-19 01:21:10 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 2021-06-19 01:21:21 | INFO | train_inner | epoch 001: 2181 / 3002 loss=2.866, ppl=7.29, wps=2945.7, ups=0.05, wpb=64827, bsz=128, num_updates=2161, lr=9.99907e-05, gnorm=2.615, loss_scale=0.25, train_wall=21, gb_free=2.8, wall=24155 2021-06-19 01:21:32 | INFO | train_inner | epoch 001: 2182 / 3002 loss=2.847, ppl=7.19, wps=5850.8, ups=0.09, wpb=64781, bsz=128, num_updates=2162, lr=9.99907e-05, gnorm=2.411, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24166 2021-06-19 01:21:43 | INFO | train_inner | epoch 001: 2183 / 3002 loss=3.087, ppl=8.5, wps=6065.9, ups=0.09, wpb=64973, bsz=128, num_updates=2163, lr=9.99907e-05, gnorm=2.401, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24177 2021-06-19 01:21:54 | INFO | train_inner | epoch 001: 2184 / 3002 loss=3.011, ppl=8.06, wps=5875.1, ups=0.09, wpb=64762, bsz=128, num_updates=2164, lr=9.99907e-05, gnorm=2.464, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24188 2021-06-19 01:22:05 | INFO | train_inner | epoch 001: 2185 / 3002 loss=2.878, ppl=7.35, wps=5872.6, ups=0.09, wpb=64781, bsz=128, num_updates=2165, lr=9.99907e-05, gnorm=2.479, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24199 2021-06-19 01:22:16 | INFO | train_inner | epoch 001: 2186 / 3002 loss=3.03, ppl=8.17, wps=5937.7, ups=0.09, wpb=64865, bsz=128, num_updates=2166, lr=9.99907e-05, gnorm=2.577, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24210 2021-06-19 01:22:27 | INFO | train_inner | epoch 001: 2187 / 3002 loss=2.914, ppl=7.54, wps=5941.8, ups=0.09, wpb=64817, bsz=128, num_updates=2167, lr=9.99907e-05, gnorm=2.689, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24221 2021-06-19 01:22:38 | INFO | train_inner | epoch 001: 2188 / 3002 loss=2.904, ppl=7.49, wps=5904.8, ups=0.09, wpb=64874, bsz=128, num_updates=2168, lr=9.99907e-05, gnorm=2.494, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24232 2021-06-19 01:22:49 | INFO | train_inner | epoch 001: 2189 / 3002 loss=2.745, ppl=6.71, wps=5749.9, ups=0.09, wpb=64768, bsz=128, num_updates=2169, lr=9.99906e-05, gnorm=2.338, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24243 2021-06-19 01:23:00 | INFO | train_inner | epoch 001: 2190 / 3002 loss=3.015, ppl=8.09, wps=5899.6, ups=0.09, wpb=64829, bsz=128, num_updates=2170, lr=9.99906e-05, gnorm=2.382, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24254 2021-06-19 01:23:11 | INFO | train_inner | epoch 001: 2191 / 3002 loss=2.914, ppl=7.53, wps=5860.5, ups=0.09, wpb=64867, bsz=128, num_updates=2171, lr=9.99906e-05, gnorm=2.47, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24265 2021-06-19 01:23:22 | INFO | train_inner | epoch 001: 2192 / 3002 loss=3.066, ppl=8.37, wps=5981.4, ups=0.09, wpb=64778, bsz=128, num_updates=2172, lr=9.99906e-05, gnorm=2.637, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24276 2021-06-19 01:23:33 | INFO | train_inner | epoch 001: 2193 / 3002 loss=2.925, ppl=7.59, wps=5839.7, ups=0.09, wpb=64890, bsz=128, num_updates=2173, lr=9.99906e-05, gnorm=2.55, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24287 2021-06-19 01:23:44 | INFO | train_inner | epoch 001: 2194 / 3002 loss=2.944, ppl=7.7, wps=5878.8, ups=0.09, wpb=64812, bsz=128, num_updates=2174, lr=9.99906e-05, gnorm=2.464, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24298 2021-06-19 01:23:55 | INFO | train_inner | epoch 001: 2195 / 3002 loss=2.884, ppl=7.38, wps=5967.6, ups=0.09, wpb=64861, bsz=128, num_updates=2175, lr=9.99906e-05, gnorm=2.42, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24309 2021-06-19 01:24:06 | INFO | train_inner | epoch 001: 2196 / 3002 loss=3.026, ppl=8.14, wps=5758, ups=0.09, wpb=64809, bsz=128, num_updates=2176, lr=9.99906e-05, gnorm=2.292, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24320 2021-06-19 01:24:17 | INFO | train_inner | epoch 001: 2197 / 3002 loss=2.873, ppl=7.33, wps=5940.4, ups=0.09, wpb=64907, bsz=128, num_updates=2177, lr=9.99906e-05, gnorm=2.409, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24331 2021-06-19 01:24:28 | INFO | train_inner | epoch 001: 2198 / 3002 loss=2.815, ppl=7.04, wps=5768.9, ups=0.09, wpb=64810, bsz=128, num_updates=2178, lr=9.99906e-05, gnorm=7.06, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24343 2021-06-19 01:24:40 | INFO | train_inner | epoch 001: 2199 / 3002 loss=2.896, ppl=7.45, wps=5761.8, ups=0.09, wpb=64809, bsz=128, num_updates=2179, lr=9.99906e-05, gnorm=2.405, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24354 2021-06-19 01:24:51 | INFO | train_inner | epoch 001: 2200 / 3002 loss=2.937, ppl=7.66, wps=5870.9, ups=0.09, wpb=64880, bsz=128, num_updates=2180, lr=9.99906e-05, gnorm=2.614, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24365 2021-06-19 01:25:02 | INFO | train_inner | epoch 001: 2201 / 3002 loss=2.862, ppl=7.27, wps=5797.9, ups=0.09, wpb=64821, bsz=128, num_updates=2181, lr=9.99906e-05, gnorm=2.376, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24376 2021-06-19 01:25:13 | INFO | train_inner | epoch 001: 2202 / 3002 loss=2.965, ppl=7.81, wps=5916.2, ups=0.09, wpb=64855, bsz=128, num_updates=2182, lr=9.99905e-05, gnorm=2.487, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24387 2021-06-19 01:25:24 | INFO | train_inner | epoch 001: 2203 / 3002 loss=2.931, ppl=7.63, wps=5978.3, ups=0.09, wpb=64907, bsz=128, num_updates=2183, lr=9.99905e-05, gnorm=2.486, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24398 2021-06-19 01:25:35 | INFO | train_inner | epoch 001: 2204 / 3002 loss=2.945, ppl=7.7, wps=5785.3, ups=0.09, wpb=64787, bsz=128, num_updates=2184, lr=9.99905e-05, gnorm=2.322, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24409 2021-06-19 01:25:46 | INFO | train_inner | epoch 001: 2205 / 3002 loss=3.013, ppl=8.07, wps=5848.5, ups=0.09, wpb=64819, bsz=128, num_updates=2185, lr=9.99905e-05, gnorm=2.424, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24420 2021-06-19 01:25:57 | INFO | train_inner | epoch 001: 2206 / 3002 loss=3.048, ppl=8.27, wps=5775.6, ups=0.09, wpb=64776, bsz=128, num_updates=2186, lr=9.99905e-05, gnorm=2.402, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24431 2021-06-19 01:26:08 | INFO | train_inner | epoch 001: 2207 / 3002 loss=3.161, ppl=8.94, wps=5873, ups=0.09, wpb=64839, bsz=128, num_updates=2187, lr=9.99905e-05, gnorm=2.611, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24442 2021-06-19 01:26:19 | INFO | train_inner | epoch 001: 2208 / 3002 loss=2.916, ppl=7.55, wps=5997, ups=0.09, wpb=64830, bsz=128, num_updates=2188, lr=9.99905e-05, gnorm=2.534, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24453 2021-06-19 01:26:30 | INFO | train_inner | epoch 001: 2209 / 3002 loss=2.882, ppl=7.37, wps=5852.7, ups=0.09, wpb=64871, bsz=128, num_updates=2189, lr=9.99905e-05, gnorm=2.562, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24464 2021-06-19 01:26:41 | INFO | train_inner | epoch 001: 2210 / 3002 loss=2.93, ppl=7.62, wps=5799.8, ups=0.09, wpb=64808, bsz=128, num_updates=2190, lr=9.99905e-05, gnorm=2.478, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24476 2021-06-19 01:26:52 | INFO | train_inner | epoch 001: 2211 / 3002 loss=2.847, ppl=7.2, wps=5957.2, ups=0.09, wpb=64809, bsz=128, num_updates=2191, lr=9.99905e-05, gnorm=4.216, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24486 2021-06-19 01:27:03 | INFO | train_inner | epoch 001: 2212 / 3002 loss=2.84, ppl=7.16, wps=5774.1, ups=0.09, wpb=64821, bsz=128, num_updates=2192, lr=9.99905e-05, gnorm=2.8, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24498 2021-06-19 01:27:14 | INFO | train_inner | epoch 001: 2213 / 3002 loss=3.032, ppl=8.18, wps=5941.8, ups=0.09, wpb=64807, bsz=128, num_updates=2193, lr=9.99905e-05, gnorm=2.465, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24509 2021-06-19 01:27:25 | INFO | train_inner | epoch 001: 2214 / 3002 loss=2.928, ppl=7.61, wps=5823.5, ups=0.09, wpb=64818, bsz=128, num_updates=2194, lr=9.99904e-05, gnorm=2.611, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24520 2021-06-19 01:27:36 | INFO | train_inner | epoch 001: 2215 / 3002 loss=2.885, ppl=7.39, wps=5879.3, ups=0.09, wpb=64831, bsz=128, num_updates=2195, lr=9.99904e-05, gnorm=2.906, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24531 2021-06-19 01:27:47 | INFO | train_inner | epoch 001: 2216 / 3002 loss=2.92, ppl=7.57, wps=5876.6, ups=0.09, wpb=64791, bsz=128, num_updates=2196, lr=9.99904e-05, gnorm=2.514, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24542 2021-06-19 01:27:59 | INFO | train_inner | epoch 001: 2217 / 3002 loss=2.708, ppl=6.53, wps=5799.7, ups=0.09, wpb=64808, bsz=128, num_updates=2197, lr=9.99904e-05, gnorm=2.372, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24553 2021-06-19 01:28:09 | INFO | train_inner | epoch 001: 2218 / 3002 loss=3.015, ppl=8.08, wps=5965.5, ups=0.09, wpb=64802, bsz=128, num_updates=2198, lr=9.99904e-05, gnorm=2.458, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24564 2021-06-19 01:28:21 | INFO | train_inner | epoch 001: 2219 / 3002 loss=3.047, ppl=8.26, wps=5776, ups=0.09, wpb=64797, bsz=128, num_updates=2199, lr=9.99904e-05, gnorm=2.952, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24575 2021-06-19 01:28:32 | INFO | train_inner | epoch 001: 2220 / 3002 loss=2.799, ppl=6.96, wps=5879.4, ups=0.09, wpb=64922, bsz=128, num_updates=2200, lr=9.99904e-05, gnorm=2.428, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24586 2021-06-19 01:28:43 | INFO | train_inner | epoch 001: 2221 / 3002 loss=2.911, ppl=7.52, wps=5985.3, ups=0.09, wpb=64881, bsz=128, num_updates=2201, lr=9.99904e-05, gnorm=2.544, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24597 2021-06-19 01:28:53 | INFO | train_inner | epoch 001: 2222 / 3002 loss=2.856, ppl=7.24, wps=5978.9, ups=0.09, wpb=64842, bsz=128, num_updates=2202, lr=9.99904e-05, gnorm=2.526, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24608 2021-06-19 01:29:04 | INFO | train_inner | epoch 001: 2223 / 3002 loss=2.975, ppl=7.86, wps=5874.4, ups=0.09, wpb=64863, bsz=128, num_updates=2203, lr=9.99904e-05, gnorm=2.441, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24619 2021-06-19 01:29:15 | INFO | train_inner | epoch 001: 2224 / 3002 loss=2.791, ppl=6.92, wps=5854.7, ups=0.09, wpb=64836, bsz=128, num_updates=2204, lr=9.99904e-05, gnorm=2.395, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24630 2021-06-19 01:29:26 | INFO | train_inner | epoch 001: 2225 / 3002 loss=2.958, ppl=7.77, wps=5939.5, ups=0.09, wpb=64861, bsz=128, num_updates=2205, lr=9.99904e-05, gnorm=2.513, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24641 2021-06-19 01:29:38 | INFO | train_inner | epoch 001: 2226 / 3002 loss=3.103, ppl=8.59, wps=5801.2, ups=0.09, wpb=64785, bsz=128, num_updates=2206, lr=9.99904e-05, gnorm=2.693, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24652 2021-06-19 01:29:48 | INFO | train_inner | epoch 001: 2227 / 3002 loss=2.908, ppl=7.51, wps=5993.2, ups=0.09, wpb=64818, bsz=128, num_updates=2207, lr=9.99903e-05, gnorm=2.356, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24663 2021-06-19 01:30:00 | INFO | train_inner | epoch 001: 2228 / 3002 loss=2.727, ppl=6.62, wps=5830.6, ups=0.09, wpb=64855, bsz=128, num_updates=2208, lr=9.99903e-05, gnorm=2.412, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24674 2021-06-19 01:30:11 | INFO | train_inner | epoch 001: 2229 / 3002 loss=3.021, ppl=8.12, wps=5846.5, ups=0.09, wpb=64883, bsz=128, num_updates=2209, lr=9.99903e-05, gnorm=86.127, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24685 2021-06-19 01:30:22 | INFO | train_inner | epoch 001: 2230 / 3002 loss=2.969, ppl=7.83, wps=5889.3, ups=0.09, wpb=64814, bsz=128, num_updates=2210, lr=9.99903e-05, gnorm=2.364, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24696 2021-06-19 01:30:33 | INFO | train_inner | epoch 001: 2231 / 3002 loss=2.903, ppl=7.48, wps=5721.9, ups=0.09, wpb=64771, bsz=128, num_updates=2211, lr=9.99903e-05, gnorm=2.86, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24707 2021-06-19 01:30:44 | INFO | train_inner | epoch 001: 2232 / 3002 loss=2.928, ppl=7.61, wps=5917.8, ups=0.09, wpb=64860, bsz=128, num_updates=2212, lr=9.99903e-05, gnorm=2.883, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24718 2021-06-19 01:30:55 | INFO | train_inner | epoch 001: 2233 / 3002 loss=2.909, ppl=7.51, wps=5888.1, ups=0.09, wpb=64880, bsz=128, num_updates=2213, lr=9.99903e-05, gnorm=2.698, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24729 2021-06-19 01:31:06 | INFO | train_inner | epoch 001: 2234 / 3002 loss=2.827, ppl=7.09, wps=5766.2, ups=0.09, wpb=64845, bsz=128, num_updates=2214, lr=9.99903e-05, gnorm=2.953, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24741 2021-06-19 01:31:17 | INFO | train_inner | epoch 001: 2235 / 3002 loss=2.893, ppl=7.43, wps=5874.1, ups=0.09, wpb=64829, bsz=128, num_updates=2215, lr=9.99903e-05, gnorm=3.006, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24752 2021-06-19 01:31:28 | INFO | train_inner | epoch 001: 2236 / 3002 loss=2.862, ppl=7.27, wps=5956.2, ups=0.09, wpb=64845, bsz=128, num_updates=2216, lr=9.99903e-05, gnorm=4.227, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24762 2021-06-19 01:31:39 | INFO | train_inner | epoch 001: 2237 / 3002 loss=2.811, ppl=7.02, wps=5855.7, ups=0.09, wpb=64875, bsz=128, num_updates=2217, lr=9.99903e-05, gnorm=3.145, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24774 2021-06-19 01:31:50 | INFO | train_inner | epoch 001: 2238 / 3002 loss=3.029, ppl=8.16, wps=5756.4, ups=0.09, wpb=64801, bsz=128, num_updates=2218, lr=9.99903e-05, gnorm=6.11, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24785 2021-06-19 01:32:01 | INFO | train_inner | epoch 001: 2239 / 3002 loss=3.203, ppl=9.21, wps=5926.9, ups=0.09, wpb=64828, bsz=128, num_updates=2219, lr=9.99902e-05, gnorm=4.395, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24796 2021-06-19 01:32:12 | INFO | train_inner | epoch 001: 2240 / 3002 loss=3.091, ppl=8.52, wps=5888.8, ups=0.09, wpb=64826, bsz=128, num_updates=2220, lr=9.99902e-05, gnorm=9.295, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24807 2021-06-19 01:32:23 | INFO | train_inner | epoch 001: 2241 / 3002 loss=2.942, ppl=7.68, wps=5872.8, ups=0.09, wpb=64769, bsz=128, num_updates=2221, lr=9.99902e-05, gnorm=3.979, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24818 2021-06-19 01:32:35 | INFO | train_inner | epoch 001: 2242 / 3002 loss=2.845, ppl=7.18, wps=5752.3, ups=0.09, wpb=64885, bsz=128, num_updates=2222, lr=9.99902e-05, gnorm=5.865, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24829 2021-06-19 01:32:46 | INFO | train_inner | epoch 001: 2243 / 3002 loss=2.919, ppl=7.56, wps=5857.9, ups=0.09, wpb=64911, bsz=128, num_updates=2223, lr=9.99902e-05, gnorm=4.606, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24840 2021-06-19 01:32:57 | INFO | train_inner | epoch 001: 2244 / 3002 loss=2.94, ppl=7.67, wps=5836.3, ups=0.09, wpb=64757, bsz=128, num_updates=2224, lr=9.99902e-05, gnorm=3.518, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24851 2021-06-19 01:33:08 | INFO | train_inner | epoch 001: 2245 / 3002 loss=2.839, ppl=7.15, wps=5899.7, ups=0.09, wpb=64808, bsz=128, num_updates=2225, lr=9.99902e-05, gnorm=2.969, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24862 2021-06-19 01:33:19 | INFO | train_inner | epoch 001: 2246 / 3002 loss=2.933, ppl=7.64, wps=5861, ups=0.09, wpb=64768, bsz=128, num_updates=2226, lr=9.99902e-05, gnorm=2.684, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24873 2021-06-19 01:33:30 | INFO | train_inner | epoch 001: 2247 / 3002 loss=3.088, ppl=8.5, wps=5836.9, ups=0.09, wpb=64738, bsz=128, num_updates=2227, lr=9.99902e-05, gnorm=2.855, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24884 2021-06-19 01:33:41 | INFO | train_inner | epoch 001: 2248 / 3002 loss=3.02, ppl=8.11, wps=5867.1, ups=0.09, wpb=64783, bsz=128, num_updates=2228, lr=9.99902e-05, gnorm=2.612, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24895 2021-06-19 01:33:52 | INFO | train_inner | epoch 001: 2249 / 3002 loss=3.093, ppl=8.53, wps=5778.7, ups=0.09, wpb=64808, bsz=128, num_updates=2229, lr=9.99902e-05, gnorm=2.597, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24907 2021-06-19 01:34:03 | INFO | train_inner | epoch 001: 2250 / 3002 loss=3.074, ppl=8.42, wps=5866.3, ups=0.09, wpb=64766, bsz=128, num_updates=2230, lr=9.99902e-05, gnorm=5.796, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24918 2021-06-19 01:34:14 | INFO | train_inner | epoch 001: 2251 / 3002 loss=3.047, ppl=8.26, wps=5914.8, ups=0.09, wpb=64767, bsz=128, num_updates=2231, lr=9.99902e-05, gnorm=2.537, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24929 2021-06-19 01:34:25 | INFO | train_inner | epoch 001: 2252 / 3002 loss=3.045, ppl=8.25, wps=5782.4, ups=0.09, wpb=64756, bsz=128, num_updates=2232, lr=9.99901e-05, gnorm=2.551, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24940 2021-06-19 01:34:36 | INFO | train_inner | epoch 001: 2253 / 3002 loss=2.994, ppl=7.97, wps=5871.3, ups=0.09, wpb=64801, bsz=128, num_updates=2233, lr=9.99901e-05, gnorm=2.575, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24951 2021-06-19 01:34:48 | INFO | train_inner | epoch 001: 2254 / 3002 loss=3.034, ppl=8.19, wps=5836.2, ups=0.09, wpb=64829, bsz=128, num_updates=2234, lr=9.99901e-05, gnorm=2.518, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24962 2021-06-19 01:34:59 | INFO | train_inner | epoch 001: 2255 / 3002 loss=2.97, ppl=7.84, wps=5668.5, ups=0.09, wpb=64784, bsz=128, num_updates=2235, lr=9.99901e-05, gnorm=2.566, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24973 2021-06-19 01:35:10 | INFO | train_inner | epoch 001: 2256 / 3002 loss=3.046, ppl=8.26, wps=5941.1, ups=0.09, wpb=64875, bsz=128, num_updates=2236, lr=9.99901e-05, gnorm=3.583, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24984 2021-06-19 01:35:21 | INFO | train_inner | epoch 001: 2257 / 3002 loss=2.905, ppl=7.49, wps=5849.2, ups=0.09, wpb=64810, bsz=128, num_updates=2237, lr=9.99901e-05, gnorm=2.444, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24995 2021-06-19 01:35:32 | INFO | train_inner | epoch 001: 2258 / 3002 loss=3.18, ppl=9.06, wps=5710, ups=0.09, wpb=64815, bsz=128, num_updates=2238, lr=9.99901e-05, gnorm=2.572, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25007 2021-06-19 01:35:43 | INFO | train_inner | epoch 001: 2259 / 3002 loss=2.811, ppl=7.02, wps=5870.3, ups=0.09, wpb=64826, bsz=128, num_updates=2239, lr=9.99901e-05, gnorm=2.677, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25018 2021-06-19 01:35:55 | INFO | train_inner | epoch 001: 2260 / 3002 loss=2.916, ppl=7.55, wps=5788.2, ups=0.09, wpb=64742, bsz=128, num_updates=2240, lr=9.99901e-05, gnorm=2.479, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25029 2021-06-19 01:36:06 | INFO | train_inner | epoch 001: 2261 / 3002 loss=3.073, ppl=8.41, wps=5818, ups=0.09, wpb=64851, bsz=128, num_updates=2241, lr=9.99901e-05, gnorm=3.907, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25040 2021-06-19 01:36:17 | INFO | train_inner | epoch 001: 2262 / 3002 loss=3.038, ppl=8.21, wps=5998.4, ups=0.09, wpb=64808, bsz=128, num_updates=2242, lr=9.99901e-05, gnorm=2.542, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=25051 2021-06-19 01:36:28 | INFO | train_inner | epoch 001: 2263 / 3002 loss=2.96, ppl=7.78, wps=5832.3, ups=0.09, wpb=64800, bsz=128, num_updates=2243, lr=9.99901e-05, gnorm=2.792, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25062 2021-06-19 01:36:39 | INFO | train_inner | epoch 001: 2264 / 3002 loss=3.084, ppl=8.48, wps=5847.9, ups=0.09, wpb=64899, bsz=128, num_updates=2244, lr=9.999e-05, gnorm=2.531, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25073 2021-06-19 01:36:50 | INFO | train_inner | epoch 001: 2265 / 3002 loss=2.875, ppl=7.34, wps=5909.1, ups=0.09, wpb=64947, bsz=128, num_updates=2245, lr=9.999e-05, gnorm=2.436, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25084 2021-06-19 01:37:01 | INFO | train_inner | epoch 001: 2266 / 3002 loss=3.038, ppl=8.21, wps=5753.5, ups=0.09, wpb=64761, bsz=128, num_updates=2246, lr=9.999e-05, gnorm=2.579, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25095 2021-06-19 01:37:12 | INFO | train_inner | epoch 001: 2267 / 3002 loss=3.009, ppl=8.05, wps=5857.3, ups=0.09, wpb=64781, bsz=128, num_updates=2247, lr=9.999e-05, gnorm=2.448, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25106 2021-06-19 01:37:23 | INFO | train_inner | epoch 001: 2268 / 3002 loss=2.877, ppl=7.35, wps=5873.5, ups=0.09, wpb=64883, bsz=128, num_updates=2248, lr=9.999e-05, gnorm=2.467, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25117 2021-06-19 01:37:34 | INFO | train_inner | epoch 001: 2269 / 3002 loss=2.929, ppl=7.61, wps=5867.8, ups=0.09, wpb=64851, bsz=128, num_updates=2249, lr=9.999e-05, gnorm=2.439, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25128 2021-06-19 01:37:45 | INFO | train_inner | epoch 001: 2270 / 3002 loss=2.978, ppl=7.88, wps=5880.8, ups=0.09, wpb=64820, bsz=128, num_updates=2250, lr=9.999e-05, gnorm=2.414, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25140 2021-06-19 01:37:56 | INFO | train_inner | epoch 001: 2271 / 3002 loss=2.929, ppl=7.61, wps=5892, ups=0.09, wpb=64883, bsz=128, num_updates=2251, lr=9.999e-05, gnorm=2.604, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25151 2021-06-19 01:38:07 | INFO | train_inner | epoch 001: 2272 / 3002 loss=3.018, ppl=8.1, wps=5830.3, ups=0.09, wpb=64878, bsz=128, num_updates=2252, lr=9.999e-05, gnorm=2.543, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25162 2021-06-19 01:38:18 | INFO | train_inner | epoch 001: 2273 / 3002 loss=3.041, ppl=8.23, wps=5961.8, ups=0.09, wpb=64801, bsz=128, num_updates=2253, lr=9.999e-05, gnorm=2.635, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=25173 2021-06-19 01:38:29 | INFO | train_inner | epoch 001: 2274 / 3002 loss=2.893, ppl=7.43, wps=5900.3, ups=0.09, wpb=64851, bsz=128, num_updates=2254, lr=9.999e-05, gnorm=2.403, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25184 2021-06-19 01:38:40 | INFO | train_inner | epoch 001: 2275 / 3002 loss=3.152, ppl=8.89, wps=5838.3, ups=0.09, wpb=64804, bsz=128, num_updates=2255, lr=9.999e-05, gnorm=2.432, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25195 2021-06-19 01:38:51 | INFO | train_inner | epoch 001: 2276 / 3002 loss=2.899, ppl=7.46, wps=5833, ups=0.09, wpb=64885, bsz=128, num_updates=2256, lr=9.999e-05, gnorm=2.558, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25206 2021-06-19 01:39:03 | INFO | train_inner | epoch 001: 2277 / 3002 loss=2.97, ppl=7.84, wps=5774.6, ups=0.09, wpb=64770, bsz=128, num_updates=2257, lr=9.99899e-05, gnorm=2.506, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25217 2021-06-19 01:39:14 | INFO | train_inner | epoch 001: 2278 / 3002 loss=2.943, ppl=7.69, wps=5711.7, ups=0.09, wpb=64814, bsz=128, num_updates=2258, lr=9.99899e-05, gnorm=3.467, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25228 2021-06-19 01:39:25 | INFO | train_inner | epoch 001: 2279 / 3002 loss=2.98, ppl=7.89, wps=5794.9, ups=0.09, wpb=64738, bsz=128, num_updates=2259, lr=9.99899e-05, gnorm=2.482, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25239 2021-06-19 01:39:36 | INFO | train_inner | epoch 001: 2280 / 3002 loss=3.129, ppl=8.75, wps=5787, ups=0.09, wpb=64756, bsz=128, num_updates=2260, lr=9.99899e-05, gnorm=2.565, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25251 2021-06-19 01:39:47 | INFO | train_inner | epoch 001: 2281 / 3002 loss=2.906, ppl=7.49, wps=5866.5, ups=0.09, wpb=64728, bsz=128, num_updates=2261, lr=9.99899e-05, gnorm=2.52, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25262 2021-06-19 01:39:58 | INFO | train_inner | epoch 001: 2282 / 3002 loss=2.841, ppl=7.17, wps=5859.7, ups=0.09, wpb=64913, bsz=128, num_updates=2262, lr=9.99899e-05, gnorm=2.475, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25273 2021-06-19 01:40:10 | INFO | train_inner | epoch 001: 2283 / 3002 loss=2.969, ppl=7.83, wps=5800, ups=0.09, wpb=64845, bsz=128, num_updates=2263, lr=9.99899e-05, gnorm=2.529, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25284 2021-06-19 01:40:21 | INFO | train_inner | epoch 001: 2284 / 3002 loss=2.954, ppl=7.75, wps=5841.8, ups=0.09, wpb=64831, bsz=128, num_updates=2264, lr=9.99899e-05, gnorm=2.408, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25295 2021-06-19 01:40:32 | INFO | train_inner | epoch 001: 2285 / 3002 loss=3.228, ppl=9.37, wps=5792.9, ups=0.09, wpb=64778, bsz=128, num_updates=2265, lr=9.99899e-05, gnorm=2.447, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25306 2021-06-19 01:40:43 | INFO | train_inner | epoch 001: 2286 / 3002 loss=2.837, ppl=7.14, wps=5843.5, ups=0.09, wpb=64832, bsz=128, num_updates=2266, lr=9.99899e-05, gnorm=2.352, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25317 2021-06-19 01:40:54 | INFO | train_inner | epoch 001: 2287 / 3002 loss=2.975, ppl=7.86, wps=5777.8, ups=0.09, wpb=64780, bsz=128, num_updates=2267, lr=9.99899e-05, gnorm=3.604, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25329 2021-06-19 01:41:05 | INFO | train_inner | epoch 001: 2288 / 3002 loss=3.014, ppl=8.08, wps=5824.3, ups=0.09, wpb=64831, bsz=128, num_updates=2268, lr=9.99899e-05, gnorm=2.455, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25340 2021-06-19 01:41:16 | INFO | train_inner | epoch 001: 2289 / 3002 loss=2.819, ppl=7.06, wps=5846.5, ups=0.09, wpb=64912, bsz=128, num_updates=2269, lr=9.99898e-05, gnorm=2.326, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25351 2021-06-19 01:41:28 | INFO | train_inner | epoch 001: 2290 / 3002 loss=3.072, ppl=8.41, wps=5821.3, ups=0.09, wpb=64760, bsz=128, num_updates=2270, lr=9.99898e-05, gnorm=2.679, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25362 2021-06-19 01:41:39 | INFO | train_inner | epoch 001: 2291 / 3002 loss=2.903, ppl=7.48, wps=5784.3, ups=0.09, wpb=64632, bsz=128, num_updates=2271, lr=9.99898e-05, gnorm=2.464, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25373 2021-06-19 01:41:50 | INFO | train_inner | epoch 001: 2292 / 3002 loss=2.934, ppl=7.64, wps=5902.4, ups=0.09, wpb=64802, bsz=128, num_updates=2272, lr=9.99898e-05, gnorm=2.508, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25384 2021-06-19 01:42:01 | INFO | train_inner | epoch 001: 2293 / 3002 loss=3.023, ppl=8.13, wps=5770.9, ups=0.09, wpb=64793, bsz=128, num_updates=2273, lr=9.99898e-05, gnorm=2.393, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25395 2021-06-19 01:42:12 | INFO | train_inner | epoch 001: 2294 / 3002 loss=2.976, ppl=7.87, wps=5868.9, ups=0.09, wpb=64770, bsz=128, num_updates=2274, lr=9.99898e-05, gnorm=2.643, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25406 2021-06-19 01:42:23 | INFO | train_inner | epoch 001: 2295 / 3002 loss=3.108, ppl=8.62, wps=5917.8, ups=0.09, wpb=64753, bsz=128, num_updates=2275, lr=9.99898e-05, gnorm=4.927, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=25417 2021-06-19 01:42:34 | INFO | train_inner | epoch 001: 2296 / 3002 loss=2.931, ppl=7.63, wps=5839.5, ups=0.09, wpb=64772, bsz=128, num_updates=2276, lr=9.99898e-05, gnorm=2.43, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25428 2021-06-19 01:42:45 | INFO | train_inner | epoch 001: 2297 / 3002 loss=2.882, ppl=7.37, wps=5816.5, ups=0.09, wpb=64869, bsz=128, num_updates=2277, lr=9.99898e-05, gnorm=2.276, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25440 2021-06-19 01:42:56 | INFO | train_inner | epoch 001: 2298 / 3002 loss=2.906, ppl=7.5, wps=5830.9, ups=0.09, wpb=64732, bsz=128, num_updates=2278, lr=9.99898e-05, gnorm=2.345, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25451 2021-06-19 01:43:07 | INFO | train_inner | epoch 001: 2299 / 3002 loss=2.874, ppl=7.33, wps=5840.7, ups=0.09, wpb=64790, bsz=128, num_updates=2279, lr=9.99898e-05, gnorm=2.396, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25462 2021-06-19 01:43:18 | INFO | train_inner | epoch 001: 2300 / 3002 loss=2.867, ppl=7.3, wps=5841.3, ups=0.09, wpb=64733, bsz=128, num_updates=2280, lr=9.99898e-05, gnorm=2.561, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25473 2021-06-19 01:43:30 | INFO | train_inner | epoch 001: 2301 / 3002 loss=2.929, ppl=7.62, wps=5810.4, ups=0.09, wpb=64738, bsz=128, num_updates=2281, lr=9.99898e-05, gnorm=2.448, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25484 2021-06-19 01:43:41 | INFO | train_inner | epoch 001: 2302 / 3002 loss=2.919, ppl=7.56, wps=5785.1, ups=0.09, wpb=64854, bsz=128, num_updates=2282, lr=9.99897e-05, gnorm=2.378, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25495 2021-06-19 01:43:52 | INFO | train_inner | epoch 001: 2303 / 3002 loss=2.901, ppl=7.47, wps=5873, ups=0.09, wpb=64788, bsz=128, num_updates=2283, lr=9.99897e-05, gnorm=2.536, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25506 2021-06-19 01:44:03 | INFO | train_inner | epoch 001: 2304 / 3002 loss=2.959, ppl=7.78, wps=5844, ups=0.09, wpb=64841, bsz=128, num_updates=2284, lr=9.99897e-05, gnorm=2.465, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25517 2021-06-19 01:44:14 | INFO | train_inner | epoch 001: 2305 / 3002 loss=2.869, ppl=7.3, wps=5976.7, ups=0.09, wpb=64824, bsz=128, num_updates=2285, lr=9.99897e-05, gnorm=2.386, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=25528 2021-06-19 01:44:25 | INFO | train_inner | epoch 001: 2306 / 3002 loss=2.945, ppl=7.7, wps=5806.6, ups=0.09, wpb=64875, bsz=128, num_updates=2286, lr=9.99897e-05, gnorm=2.355, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25539 2021-06-19 01:44:36 | INFO | train_inner | epoch 001: 2307 / 3002 loss=2.827, ppl=7.1, wps=5838, ups=0.09, wpb=64816, bsz=128, num_updates=2287, lr=9.99897e-05, gnorm=2.293, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25550 2021-06-19 01:44:47 | INFO | train_inner | epoch 001: 2308 / 3002 loss=3.052, ppl=8.3, wps=5863.1, ups=0.09, wpb=64832, bsz=128, num_updates=2288, lr=9.99897e-05, gnorm=2.288, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25561 2021-06-19 01:44:58 | INFO | train_inner | epoch 001: 2309 / 3002 loss=2.986, ppl=7.92, wps=5895.3, ups=0.09, wpb=64784, bsz=128, num_updates=2289, lr=9.99897e-05, gnorm=2.394, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25572 2021-06-19 01:45:09 | INFO | train_inner | epoch 001: 2310 / 3002 loss=2.88, ppl=7.36, wps=5876.8, ups=0.09, wpb=64870, bsz=128, num_updates=2290, lr=9.99897e-05, gnorm=2.382, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25583 2021-06-19 01:45:20 | INFO | train_inner | epoch 001: 2311 / 3002 loss=2.86, ppl=7.26, wps=5828.4, ups=0.09, wpb=64827, bsz=128, num_updates=2291, lr=9.99897e-05, gnorm=3.624, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25595 2021-06-19 01:45:31 | INFO | train_inner | epoch 001: 2312 / 3002 loss=2.869, ppl=7.3, wps=5888.7, ups=0.09, wpb=64838, bsz=128, num_updates=2292, lr=9.99897e-05, gnorm=2.431, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25606 2021-06-19 01:45:42 | INFO | train_inner | epoch 001: 2313 / 3002 loss=2.986, ppl=7.92, wps=5916.2, ups=0.09, wpb=64864, bsz=128, num_updates=2293, lr=9.99897e-05, gnorm=2.456, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25617 2021-06-19 01:45:53 | INFO | train_inner | epoch 001: 2314 / 3002 loss=2.885, ppl=7.39, wps=5868.7, ups=0.09, wpb=64810, bsz=128, num_updates=2294, lr=9.99896e-05, gnorm=2.444, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25628 2021-06-19 01:46:04 | INFO | train_inner | epoch 001: 2315 / 3002 loss=2.988, ppl=7.93, wps=5822.4, ups=0.09, wpb=64832, bsz=128, num_updates=2295, lr=9.99896e-05, gnorm=2.526, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25639 2021-06-19 01:46:16 | INFO | train_inner | epoch 001: 2316 / 3002 loss=2.845, ppl=7.19, wps=5843.4, ups=0.09, wpb=64896, bsz=128, num_updates=2296, lr=9.99896e-05, gnorm=2.323, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25650 2021-06-19 01:46:26 | INFO | train_inner | epoch 001: 2317 / 3002 loss=2.996, ppl=7.98, wps=5929, ups=0.09, wpb=64837, bsz=128, num_updates=2297, lr=9.99896e-05, gnorm=2.387, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=25661 2021-06-19 01:46:37 | INFO | train_inner | epoch 001: 2318 / 3002 loss=2.998, ppl=7.99, wps=5895.6, ups=0.09, wpb=64797, bsz=128, num_updates=2298, lr=9.99896e-05, gnorm=21.177, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25672 2021-06-19 01:46:48 | INFO | train_inner | epoch 001: 2319 / 3002 loss=2.982, ppl=7.9, wps=5929.2, ups=0.09, wpb=64834, bsz=128, num_updates=2299, lr=9.99896e-05, gnorm=2.28, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=25683 2021-06-19 01:47:00 | INFO | train_inner | epoch 001: 2320 / 3002 loss=2.947, ppl=7.71, wps=5748.6, ups=0.09, wpb=64740, bsz=128, num_updates=2300, lr=9.99896e-05, gnorm=2.436, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25694 2021-06-19 01:47:11 | INFO | train_inner | epoch 001: 2321 / 3002 loss=2.873, ppl=7.33, wps=5838.9, ups=0.09, wpb=64788, bsz=128, num_updates=2301, lr=9.99896e-05, gnorm=2.588, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25705 2021-06-19 01:47:22 | INFO | train_inner | epoch 001: 2322 / 3002 loss=2.82, ppl=7.06, wps=5944.6, ups=0.09, wpb=64928, bsz=128, num_updates=2302, lr=9.99896e-05, gnorm=8.33, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=25716 2021-06-19 01:47:33 | INFO | train_inner | epoch 001: 2323 / 3002 loss=2.973, ppl=7.85, wps=5792.3, ups=0.09, wpb=64760, bsz=128, num_updates=2303, lr=9.99896e-05, gnorm=2.415, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25727 2021-06-19 01:47:44 | INFO | train_inner | epoch 001: 2324 / 3002 loss=3.051, ppl=8.29, wps=5875.7, ups=0.09, wpb=64821, bsz=128, num_updates=2304, lr=9.99896e-05, gnorm=3.57, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25738 2021-06-19 01:47:55 | INFO | train_inner | epoch 001: 2325 / 3002 loss=3.18, ppl=9.06, wps=5732, ups=0.09, wpb=64776, bsz=128, num_updates=2305, lr=9.99896e-05, gnorm=2.467, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25750 2021-06-19 01:48:06 | INFO | train_inner | epoch 001: 2326 / 3002 loss=3.005, ppl=8.03, wps=5892.1, ups=0.09, wpb=64846, bsz=128, num_updates=2306, lr=9.99896e-05, gnorm=2.783, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25761 2021-06-19 01:48:17 | INFO | train_inner | epoch 001: 2327 / 3002 loss=2.941, ppl=7.68, wps=5953.5, ups=0.09, wpb=64798, bsz=128, num_updates=2307, lr=9.99895e-05, gnorm=2.431, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=25771 2021-06-19 01:48:28 | INFO | train_inner | epoch 001: 2328 / 3002 loss=3.027, ppl=8.15, wps=5799.8, ups=0.09, wpb=64871, bsz=128, num_updates=2308, lr=9.99895e-05, gnorm=2.923, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25783 2021-06-19 01:48:39 | INFO | train_inner | epoch 001: 2329 / 3002 loss=2.914, ppl=7.54, wps=5894.6, ups=0.09, wpb=64809, bsz=128, num_updates=2309, lr=9.99895e-05, gnorm=2.501, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25794 2021-06-19 01:48:50 | INFO | train_inner | epoch 001: 2330 / 3002 loss=3.142, ppl=8.83, wps=5839.4, ups=0.09, wpb=64843, bsz=128, num_updates=2310, lr=9.99895e-05, gnorm=2.549, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25805 2021-06-19 01:49:01 | INFO | train_inner | epoch 001: 2331 / 3002 loss=2.977, ppl=7.88, wps=5968.6, ups=0.09, wpb=64782, bsz=128, num_updates=2311, lr=9.99895e-05, gnorm=2.489, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=25816 2021-06-19 01:49:12 | INFO | train_inner | epoch 001: 2332 / 3002 loss=2.971, ppl=7.84, wps=5904.2, ups=0.09, wpb=64780, bsz=128, num_updates=2312, lr=9.99895e-05, gnorm=2.571, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25827 2021-06-19 01:49:23 | INFO | train_inner | epoch 001: 2333 / 3002 loss=2.839, ppl=7.15, wps=5921.4, ups=0.09, wpb=64830, bsz=128, num_updates=2313, lr=9.99895e-05, gnorm=2.392, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25837 2021-06-19 01:49:34 | INFO | train_inner | epoch 001: 2334 / 3002 loss=2.965, ppl=7.81, wps=5806.6, ups=0.09, wpb=64816, bsz=128, num_updates=2314, lr=9.99895e-05, gnorm=2.472, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25849 2021-06-19 01:49:45 | INFO | train_inner | epoch 001: 2335 / 3002 loss=2.922, ppl=7.58, wps=5855.3, ups=0.09, wpb=64872, bsz=128, num_updates=2315, lr=9.99895e-05, gnorm=2.508, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25860 2021-06-19 01:49:56 | INFO | train_inner | epoch 001: 2336 / 3002 loss=3.013, ppl=8.08, wps=5875.8, ups=0.09, wpb=64743, bsz=128, num_updates=2316, lr=9.99895e-05, gnorm=2.537, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25871 2021-06-19 01:50:08 | INFO | train_inner | epoch 001: 2337 / 3002 loss=2.909, ppl=7.51, wps=5795.2, ups=0.09, wpb=64877, bsz=128, num_updates=2317, lr=9.99895e-05, gnorm=2.495, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25882 2021-06-19 01:50:19 | INFO | train_inner | epoch 001: 2338 / 3002 loss=2.774, ppl=6.84, wps=5844.9, ups=0.09, wpb=64751, bsz=128, num_updates=2318, lr=9.99895e-05, gnorm=4.337, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25893 2021-06-19 01:50:30 | INFO | train_inner | epoch 001: 2339 / 3002 loss=2.853, ppl=7.22, wps=5846, ups=0.09, wpb=64809, bsz=128, num_updates=2319, lr=9.99894e-05, gnorm=2.36, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25904 2021-06-19 01:50:41 | INFO | train_inner | epoch 001: 2340 / 3002 loss=2.971, ppl=7.84, wps=5823.4, ups=0.09, wpb=64841, bsz=128, num_updates=2320, lr=9.99894e-05, gnorm=2.437, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25915 2021-06-19 01:50:52 | INFO | train_inner | epoch 001: 2341 / 3002 loss=2.86, ppl=7.26, wps=5903.4, ups=0.09, wpb=64902, bsz=128, num_updates=2321, lr=9.99894e-05, gnorm=2.364, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25926 2021-06-19 01:51:03 | INFO | train_inner | epoch 001: 2342 / 3002 loss=2.926, ppl=7.6, wps=5950.5, ups=0.09, wpb=64868, bsz=128, num_updates=2322, lr=9.99894e-05, gnorm=2.452, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=25937 2021-06-19 01:51:14 | INFO | train_inner | epoch 001: 2343 / 3002 loss=2.899, ppl=7.46, wps=5876.4, ups=0.09, wpb=64807, bsz=128, num_updates=2323, lr=9.99894e-05, gnorm=6.751, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25948 2021-06-19 01:51:25 | INFO | train_inner | epoch 001: 2344 / 3002 loss=2.863, ppl=7.28, wps=5682.3, ups=0.09, wpb=64803, bsz=128, num_updates=2324, lr=9.99894e-05, gnorm=2.534, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25960 2021-06-19 01:51:36 | INFO | train_inner | epoch 001: 2345 / 3002 loss=2.844, ppl=7.18, wps=5856.4, ups=0.09, wpb=64859, bsz=128, num_updates=2325, lr=9.99894e-05, gnorm=4.598, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25971 2021-06-19 01:51:47 | INFO | train_inner | epoch 001: 2346 / 3002 loss=2.927, ppl=7.61, wps=5922.7, ups=0.09, wpb=64875, bsz=128, num_updates=2326, lr=9.99894e-05, gnorm=2.698, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25982 2021-06-19 01:51:58 | INFO | train_inner | epoch 001: 2347 / 3002 loss=2.856, ppl=7.24, wps=5970.8, ups=0.09, wpb=64827, bsz=128, num_updates=2327, lr=9.99894e-05, gnorm=2.616, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=25992 2021-06-19 01:52:09 | INFO | train_inner | epoch 001: 2348 / 3002 loss=3.032, ppl=8.18, wps=5842.7, ups=0.09, wpb=64763, bsz=128, num_updates=2328, lr=9.99894e-05, gnorm=2.382, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26004 2021-06-19 01:52:20 | INFO | train_inner | epoch 001: 2349 / 3002 loss=2.79, ppl=6.92, wps=5824.2, ups=0.09, wpb=64776, bsz=128, num_updates=2329, lr=9.99894e-05, gnorm=2.453, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26015 2021-06-19 01:52:31 | INFO | train_inner | epoch 001: 2350 / 3002 loss=2.799, ppl=6.96, wps=5822.2, ups=0.09, wpb=64810, bsz=128, num_updates=2330, lr=9.99894e-05, gnorm=2.474, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26026 2021-06-19 01:52:43 | INFO | train_inner | epoch 001: 2351 / 3002 loss=2.67, ppl=6.36, wps=5812.2, ups=0.09, wpb=64799, bsz=128, num_updates=2331, lr=9.99894e-05, gnorm=2.323, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26037 2021-06-19 01:52:54 | INFO | train_inner | epoch 001: 2352 / 3002 loss=2.758, ppl=6.76, wps=5880.6, ups=0.09, wpb=64792, bsz=128, num_updates=2332, lr=9.99893e-05, gnorm=2.532, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26048 2021-06-19 01:53:05 | INFO | train_inner | epoch 001: 2353 / 3002 loss=2.84, ppl=7.16, wps=5915.7, ups=0.09, wpb=64931, bsz=128, num_updates=2333, lr=9.99893e-05, gnorm=2.406, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26059 2021-06-19 01:53:16 | INFO | train_inner | epoch 001: 2354 / 3002 loss=2.811, ppl=7.02, wps=5863.5, ups=0.09, wpb=64792, bsz=128, num_updates=2334, lr=9.99893e-05, gnorm=2.459, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26070 2021-06-19 01:53:27 | INFO | train_inner | epoch 001: 2355 / 3002 loss=2.975, ppl=7.86, wps=5775.9, ups=0.09, wpb=64841, bsz=128, num_updates=2335, lr=9.99893e-05, gnorm=2.475, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26081 2021-06-19 01:53:38 | INFO | train_inner | epoch 001: 2356 / 3002 loss=2.943, ppl=7.69, wps=5841.4, ups=0.09, wpb=64768, bsz=128, num_updates=2336, lr=9.99893e-05, gnorm=2.387, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26092 2021-06-19 01:53:49 | INFO | train_inner | epoch 001: 2357 / 3002 loss=3.066, ppl=8.37, wps=5914.5, ups=0.09, wpb=64725, bsz=128, num_updates=2337, lr=9.99893e-05, gnorm=2.499, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26103 2021-06-19 01:54:00 | INFO | train_inner | epoch 001: 2358 / 3002 loss=2.952, ppl=7.74, wps=5838.3, ups=0.09, wpb=64822, bsz=128, num_updates=2338, lr=9.99893e-05, gnorm=2.395, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26114 2021-06-19 01:54:11 | INFO | train_inner | epoch 001: 2359 / 3002 loss=2.784, ppl=6.89, wps=5782.1, ups=0.09, wpb=64819, bsz=128, num_updates=2339, lr=9.99893e-05, gnorm=2.434, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26126 2021-06-19 01:54:22 | INFO | train_inner | epoch 001: 2360 / 3002 loss=2.893, ppl=7.43, wps=5962.9, ups=0.09, wpb=64912, bsz=128, num_updates=2340, lr=9.99893e-05, gnorm=2.507, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26136 2021-06-19 01:54:33 | INFO | train_inner | epoch 001: 2361 / 3002 loss=2.904, ppl=7.48, wps=5902.9, ups=0.09, wpb=64853, bsz=128, num_updates=2341, lr=9.99893e-05, gnorm=2.347, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26147 2021-06-19 01:54:44 | INFO | train_inner | epoch 001: 2362 / 3002 loss=3.01, ppl=8.06, wps=5785.1, ups=0.09, wpb=64728, bsz=128, num_updates=2342, lr=9.99893e-05, gnorm=2.848, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26159 2021-06-19 01:54:55 | INFO | train_inner | epoch 001: 2363 / 3002 loss=3.035, ppl=8.19, wps=5833, ups=0.09, wpb=64873, bsz=128, num_updates=2343, lr=9.99893e-05, gnorm=2.397, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26170 2021-06-19 01:55:06 | INFO | train_inner | epoch 001: 2364 / 3002 loss=2.716, ppl=6.57, wps=5892.1, ups=0.09, wpb=64886, bsz=128, num_updates=2344, lr=9.99892e-05, gnorm=49.332, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26181 2021-06-19 01:55:18 | INFO | train_inner | epoch 001: 2365 / 3002 loss=2.948, ppl=7.72, wps=5825, ups=0.09, wpb=64846, bsz=128, num_updates=2345, lr=9.99892e-05, gnorm=2.397, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26192 2021-06-19 01:55:28 | INFO | train_inner | epoch 001: 2366 / 3002 loss=2.885, ppl=7.39, wps=5959.2, ups=0.09, wpb=64826, bsz=128, num_updates=2346, lr=9.99892e-05, gnorm=2.527, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26203 2021-06-19 01:55:40 | INFO | train_inner | epoch 001: 2367 / 3002 loss=2.799, ppl=6.96, wps=5840.6, ups=0.09, wpb=64837, bsz=128, num_updates=2347, lr=9.99892e-05, gnorm=2.37, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26214 2021-06-19 01:55:51 | INFO | train_inner | epoch 001: 2368 / 3002 loss=3.016, ppl=8.09, wps=5885.8, ups=0.09, wpb=64815, bsz=128, num_updates=2348, lr=9.99892e-05, gnorm=2.555, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26225 2021-06-19 01:56:02 | INFO | train_inner | epoch 001: 2369 / 3002 loss=2.948, ppl=7.72, wps=5812.9, ups=0.09, wpb=64847, bsz=128, num_updates=2349, lr=9.99892e-05, gnorm=3.71, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26236 2021-06-19 01:56:13 | INFO | train_inner | epoch 001: 2370 / 3002 loss=2.892, ppl=7.42, wps=5886, ups=0.09, wpb=64875, bsz=128, num_updates=2350, lr=9.99892e-05, gnorm=2.881, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26247 2021-06-19 01:56:24 | INFO | train_inner | epoch 001: 2371 / 3002 loss=3.133, ppl=8.77, wps=5798.8, ups=0.09, wpb=64777, bsz=128, num_updates=2351, lr=9.99892e-05, gnorm=2.581, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26258 2021-06-19 01:56:35 | INFO | train_inner | epoch 001: 2372 / 3002 loss=2.921, ppl=7.58, wps=5871.3, ups=0.09, wpb=64823, bsz=128, num_updates=2352, lr=9.99892e-05, gnorm=3.693, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26269 2021-06-19 01:56:46 | INFO | train_inner | epoch 001: 2373 / 3002 loss=2.912, ppl=7.53, wps=5854.4, ups=0.09, wpb=64833, bsz=128, num_updates=2353, lr=9.99892e-05, gnorm=3.514, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26280 2021-06-19 01:56:57 | INFO | train_inner | epoch 001: 2374 / 3002 loss=3.068, ppl=8.39, wps=5763.7, ups=0.09, wpb=64855, bsz=128, num_updates=2354, lr=9.99892e-05, gnorm=2.602, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26292 2021-06-19 01:57:08 | INFO | train_inner | epoch 001: 2375 / 3002 loss=2.962, ppl=7.79, wps=5838.4, ups=0.09, wpb=64864, bsz=128, num_updates=2355, lr=9.99892e-05, gnorm=2.821, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26303 2021-06-19 01:57:19 | INFO | train_inner | epoch 001: 2376 / 3002 loss=3.035, ppl=8.19, wps=5912, ups=0.09, wpb=64775, bsz=128, num_updates=2356, lr=9.99892e-05, gnorm=2.781, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26314 2021-06-19 01:57:30 | INFO | train_inner | epoch 001: 2377 / 3002 loss=2.905, ppl=7.49, wps=5809.6, ups=0.09, wpb=64788, bsz=128, num_updates=2357, lr=9.99891e-05, gnorm=8.051, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26325 2021-06-19 01:57:42 | INFO | train_inner | epoch 001: 2378 / 3002 loss=2.991, ppl=7.95, wps=5790.7, ups=0.09, wpb=64769, bsz=128, num_updates=2358, lr=9.99891e-05, gnorm=2.565, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26336 2021-06-19 01:57:53 | INFO | train_inner | epoch 001: 2379 / 3002 loss=2.954, ppl=7.75, wps=5812.5, ups=0.09, wpb=64889, bsz=128, num_updates=2359, lr=9.99891e-05, gnorm=2.581, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26347 2021-06-19 01:58:04 | INFO | train_inner | epoch 001: 2380 / 3002 loss=3.053, ppl=8.3, wps=5868.2, ups=0.09, wpb=64845, bsz=128, num_updates=2360, lr=9.99891e-05, gnorm=2.568, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26358 2021-06-19 01:58:15 | INFO | train_inner | epoch 001: 2381 / 3002 loss=2.967, ppl=7.82, wps=5802.8, ups=0.09, wpb=64770, bsz=128, num_updates=2361, lr=9.99891e-05, gnorm=2.462, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26369 2021-06-19 01:58:26 | INFO | train_inner | epoch 001: 2382 / 3002 loss=2.867, ppl=7.29, wps=5872, ups=0.09, wpb=64889, bsz=128, num_updates=2362, lr=9.99891e-05, gnorm=2.64, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26380 2021-06-19 01:58:37 | INFO | train_inner | epoch 001: 2383 / 3002 loss=3.015, ppl=8.08, wps=5845.9, ups=0.09, wpb=64810, bsz=128, num_updates=2363, lr=9.99891e-05, gnorm=2.522, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26392 2021-06-19 01:58:48 | INFO | train_inner | epoch 001: 2384 / 3002 loss=2.864, ppl=7.28, wps=5852.8, ups=0.09, wpb=64779, bsz=128, num_updates=2364, lr=9.99891e-05, gnorm=2.449, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26403 2021-06-19 01:58:59 | INFO | train_inner | epoch 001: 2385 / 3002 loss=2.926, ppl=7.6, wps=5906.7, ups=0.09, wpb=64814, bsz=128, num_updates=2365, lr=9.99891e-05, gnorm=2.614, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26414 2021-06-19 01:59:10 | INFO | train_inner | epoch 001: 2386 / 3002 loss=3.02, ppl=8.11, wps=5948.7, ups=0.09, wpb=64823, bsz=128, num_updates=2366, lr=9.99891e-05, gnorm=2.732, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26424 2021-06-19 01:59:21 | INFO | train_inner | epoch 001: 2387 / 3002 loss=2.905, ppl=7.49, wps=5857.5, ups=0.09, wpb=64792, bsz=128, num_updates=2367, lr=9.99891e-05, gnorm=2.527, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26436 2021-06-19 01:59:32 | INFO | train_inner | epoch 001: 2388 / 3002 loss=2.941, ppl=7.68, wps=5830, ups=0.09, wpb=64790, bsz=128, num_updates=2368, lr=9.99891e-05, gnorm=2.53, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26447 2021-06-19 01:59:43 | INFO | train_inner | epoch 001: 2389 / 3002 loss=2.969, ppl=7.83, wps=5893.3, ups=0.09, wpb=64834, bsz=128, num_updates=2369, lr=9.9989e-05, gnorm=2.501, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26458 2021-06-19 01:59:55 | INFO | train_inner | epoch 001: 2390 / 3002 loss=3.032, ppl=8.18, wps=5759.5, ups=0.09, wpb=64834, bsz=128, num_updates=2370, lr=9.9989e-05, gnorm=2.554, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26469 2021-06-19 02:00:06 | INFO | train_inner | epoch 001: 2391 / 3002 loss=2.828, ppl=7.1, wps=5770.3, ups=0.09, wpb=64828, bsz=128, num_updates=2371, lr=9.9989e-05, gnorm=2.439, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26480 2021-06-19 02:00:17 | INFO | train_inner | epoch 001: 2392 / 3002 loss=3.033, ppl=8.19, wps=5771.8, ups=0.09, wpb=64807, bsz=128, num_updates=2372, lr=9.9989e-05, gnorm=2.427, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26491 2021-06-19 02:00:28 | INFO | train_inner | epoch 001: 2393 / 3002 loss=2.986, ppl=7.92, wps=5791.7, ups=0.09, wpb=64779, bsz=128, num_updates=2373, lr=9.9989e-05, gnorm=2.736, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26503 2021-06-19 02:00:39 | INFO | train_inner | epoch 001: 2394 / 3002 loss=2.968, ppl=7.83, wps=5783.5, ups=0.09, wpb=64828, bsz=128, num_updates=2374, lr=9.9989e-05, gnorm=2.646, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26514 2021-06-19 02:00:50 | INFO | train_inner | epoch 001: 2395 / 3002 loss=2.821, ppl=7.07, wps=5952.9, ups=0.09, wpb=64813, bsz=128, num_updates=2375, lr=9.9989e-05, gnorm=4.172, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26525 2021-06-19 02:01:01 | INFO | train_inner | epoch 001: 2396 / 3002 loss=3.184, ppl=9.09, wps=5792.8, ups=0.09, wpb=64755, bsz=128, num_updates=2376, lr=9.9989e-05, gnorm=2.537, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26536 2021-06-19 02:01:13 | INFO | train_inner | epoch 001: 2397 / 3002 loss=2.829, ppl=7.11, wps=5792.3, ups=0.09, wpb=64809, bsz=128, num_updates=2377, lr=9.9989e-05, gnorm=2.539, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26547 2021-06-19 02:01:24 | INFO | train_inner | epoch 001: 2398 / 3002 loss=2.823, ppl=7.08, wps=5911.9, ups=0.09, wpb=64893, bsz=128, num_updates=2378, lr=9.9989e-05, gnorm=2.481, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26558 2021-06-19 02:01:34 | INFO | train_inner | epoch 001: 2399 / 3002 loss=2.943, ppl=7.69, wps=5975.6, ups=0.09, wpb=64763, bsz=128, num_updates=2379, lr=9.9989e-05, gnorm=2.521, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26569 2021-06-19 02:01:45 | INFO | train_inner | epoch 001: 2400 / 3002 loss=2.733, ppl=6.65, wps=5938.7, ups=0.09, wpb=64861, bsz=128, num_updates=2380, lr=9.9989e-05, gnorm=2.416, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26580 2021-06-19 02:01:56 | INFO | train_inner | epoch 001: 2401 / 3002 loss=2.833, ppl=7.12, wps=5854.1, ups=0.09, wpb=64812, bsz=128, num_updates=2381, lr=9.9989e-05, gnorm=2.512, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26591 2021-06-19 02:02:07 | INFO | train_inner | epoch 001: 2402 / 3002 loss=2.862, ppl=7.27, wps=5873.2, ups=0.09, wpb=64819, bsz=128, num_updates=2382, lr=9.99889e-05, gnorm=2.501, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26602 2021-06-19 02:02:18 | INFO | train_inner | epoch 001: 2403 / 3002 loss=2.964, ppl=7.8, wps=5897.6, ups=0.09, wpb=64799, bsz=128, num_updates=2383, lr=9.99889e-05, gnorm=2.472, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26613 2021-06-19 02:02:29 | INFO | train_inner | epoch 001: 2404 / 3002 loss=2.734, ppl=6.65, wps=5885.4, ups=0.09, wpb=64806, bsz=128, num_updates=2384, lr=9.99889e-05, gnorm=2.259, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26624 2021-06-19 02:02:40 | INFO | train_inner | epoch 001: 2405 / 3002 loss=2.977, ppl=7.87, wps=6078.3, ups=0.09, wpb=64863, bsz=128, num_updates=2385, lr=9.99889e-05, gnorm=2.409, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26635 2021-06-19 02:02:51 | INFO | train_inner | epoch 001: 2406 / 3002 loss=2.716, ppl=6.57, wps=5802.7, ups=0.09, wpb=64857, bsz=128, num_updates=2386, lr=9.99889e-05, gnorm=2.325, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26646 2021-06-19 02:03:02 | INFO | train_inner | epoch 001: 2407 / 3002 loss=2.853, ppl=7.23, wps=5909.8, ups=0.09, wpb=64930, bsz=128, num_updates=2387, lr=9.99889e-05, gnorm=2.384, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26657 2021-06-19 02:03:13 | INFO | train_inner | epoch 001: 2408 / 3002 loss=2.871, ppl=7.32, wps=5816.5, ups=0.09, wpb=64769, bsz=128, num_updates=2388, lr=9.99889e-05, gnorm=2.533, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26668 2021-06-19 02:03:24 | INFO | train_inner | epoch 001: 2409 / 3002 loss=2.92, ppl=7.57, wps=5999, ups=0.09, wpb=64874, bsz=128, num_updates=2389, lr=9.99889e-05, gnorm=62.891, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26679 2021-06-19 02:03:35 | INFO | train_inner | epoch 001: 2410 / 3002 loss=2.942, ppl=7.69, wps=5861.5, ups=0.09, wpb=64828, bsz=128, num_updates=2390, lr=9.99889e-05, gnorm=2.611, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26690 2021-06-19 02:03:46 | INFO | train_inner | epoch 001: 2411 / 3002 loss=3.004, ppl=8.02, wps=5945, ups=0.09, wpb=64797, bsz=128, num_updates=2391, lr=9.99889e-05, gnorm=2.739, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26701 2021-06-19 02:03:57 | INFO | train_inner | epoch 001: 2412 / 3002 loss=2.999, ppl=7.99, wps=5832.8, ups=0.09, wpb=64763, bsz=128, num_updates=2392, lr=9.99889e-05, gnorm=7.665, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26712 2021-06-19 02:04:08 | INFO | train_inner | epoch 001: 2413 / 3002 loss=3.224, ppl=9.34, wps=5895.6, ups=0.09, wpb=64786, bsz=128, num_updates=2393, lr=9.99889e-05, gnorm=10.52, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26723 2021-06-19 02:04:20 | INFO | train_inner | epoch 001: 2414 / 3002 loss=3.082, ppl=8.47, wps=5798.9, ups=0.09, wpb=64796, bsz=128, num_updates=2394, lr=9.99888e-05, gnorm=4.589, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26734 2021-06-19 02:04:31 | INFO | train_inner | epoch 001: 2415 / 3002 loss=2.953, ppl=7.75, wps=5700.5, ups=0.09, wpb=64811, bsz=128, num_updates=2395, lr=9.99888e-05, gnorm=6.095, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26745 2021-06-19 02:04:42 | INFO | train_inner | epoch 001: 2416 / 3002 loss=2.903, ppl=7.48, wps=5929.1, ups=0.09, wpb=64880, bsz=128, num_updates=2396, lr=9.99888e-05, gnorm=4.854, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26756 2021-06-19 02:04:53 | INFO | train_inner | epoch 001: 2417 / 3002 loss=2.928, ppl=7.61, wps=5822.1, ups=0.09, wpb=64878, bsz=128, num_updates=2397, lr=9.99888e-05, gnorm=2.906, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26767 2021-06-19 02:05:04 | INFO | train_inner | epoch 001: 2418 / 3002 loss=3.116, ppl=8.67, wps=5925.2, ups=0.09, wpb=64917, bsz=128, num_updates=2398, lr=9.99888e-05, gnorm=3.848, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26778 2021-06-19 02:05:15 | INFO | train_inner | epoch 001: 2419 / 3002 loss=2.898, ppl=7.45, wps=5927.7, ups=0.09, wpb=64870, bsz=128, num_updates=2399, lr=9.99888e-05, gnorm=2.812, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26789 2021-06-19 02:05:26 | INFO | train_inner | epoch 001: 2420 / 3002 loss=2.884, ppl=7.38, wps=5893.2, ups=0.09, wpb=64820, bsz=128, num_updates=2400, lr=9.99888e-05, gnorm=2.58, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26800 2021-06-19 02:05:37 | INFO | train_inner | epoch 001: 2421 / 3002 loss=2.996, ppl=7.98, wps=5836.3, ups=0.09, wpb=64829, bsz=128, num_updates=2401, lr=9.99888e-05, gnorm=2.813, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26811 2021-06-19 02:05:48 | INFO | train_inner | epoch 001: 2422 / 3002 loss=3.231, ppl=9.39, wps=5908.3, ups=0.09, wpb=64877, bsz=128, num_updates=2402, lr=9.99888e-05, gnorm=2.807, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26822 2021-06-19 02:05:59 | INFO | train_inner | epoch 001: 2423 / 3002 loss=2.959, ppl=7.78, wps=5801.2, ups=0.09, wpb=64872, bsz=128, num_updates=2403, lr=9.99888e-05, gnorm=10.088, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26833 2021-06-19 02:06:10 | INFO | train_inner | epoch 001: 2424 / 3002 loss=3.013, ppl=8.08, wps=5879, ups=0.09, wpb=64781, bsz=128, num_updates=2404, lr=9.99888e-05, gnorm=2.649, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26844 2021-06-19 02:06:21 | INFO | train_inner | epoch 001: 2425 / 3002 loss=3.045, ppl=8.25, wps=5811, ups=0.09, wpb=64763, bsz=128, num_updates=2405, lr=9.99888e-05, gnorm=2.632, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26856 2021-06-19 02:06:32 | INFO | train_inner | epoch 001: 2426 / 3002 loss=2.854, ppl=7.23, wps=5844.1, ups=0.09, wpb=64872, bsz=128, num_updates=2406, lr=9.99888e-05, gnorm=2.599, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26867 2021-06-19 02:06:43 | INFO | train_inner | epoch 001: 2427 / 3002 loss=3.009, ppl=8.05, wps=5889.8, ups=0.09, wpb=64882, bsz=128, num_updates=2407, lr=9.99887e-05, gnorm=5.998, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26878 2021-06-19 02:06:55 | INFO | train_inner | epoch 001: 2428 / 3002 loss=3.035, ppl=8.2, wps=5751.1, ups=0.09, wpb=64792, bsz=128, num_updates=2408, lr=9.99887e-05, gnorm=2.963, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26889 2021-06-19 02:07:06 | INFO | train_inner | epoch 001: 2429 / 3002 loss=2.852, ppl=7.22, wps=5860.2, ups=0.09, wpb=64837, bsz=128, num_updates=2409, lr=9.99887e-05, gnorm=10.095, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26900 2021-06-19 02:07:17 | INFO | train_inner | epoch 001: 2430 / 3002 loss=2.951, ppl=7.73, wps=5762.7, ups=0.09, wpb=64760, bsz=128, num_updates=2410, lr=9.99887e-05, gnorm=2.765, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26911 2021-06-19 02:07:28 | INFO | train_inner | epoch 001: 2431 / 3002 loss=3.044, ppl=8.25, wps=5936.8, ups=0.09, wpb=64766, bsz=128, num_updates=2411, lr=9.99887e-05, gnorm=2.475, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26922 2021-06-19 02:07:39 | INFO | train_inner | epoch 001: 2432 / 3002 loss=2.864, ppl=7.28, wps=5860.1, ups=0.09, wpb=64863, bsz=128, num_updates=2412, lr=9.99887e-05, gnorm=2.449, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26933 2021-06-19 02:07:50 | INFO | train_inner | epoch 001: 2433 / 3002 loss=3.108, ppl=8.62, wps=5827.1, ups=0.09, wpb=64823, bsz=128, num_updates=2413, lr=9.99887e-05, gnorm=2.616, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26944 2021-06-19 02:08:01 | INFO | train_inner | epoch 001: 2434 / 3002 loss=2.92, ppl=7.57, wps=5825.4, ups=0.09, wpb=64835, bsz=128, num_updates=2414, lr=9.99887e-05, gnorm=2.499, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26956 2021-06-19 02:08:12 | INFO | train_inner | epoch 001: 2435 / 3002 loss=2.904, ppl=7.49, wps=5801.9, ups=0.09, wpb=64858, bsz=128, num_updates=2415, lr=9.99887e-05, gnorm=2.516, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26967 2021-06-19 02:08:24 | INFO | train_inner | epoch 001: 2436 / 3002 loss=2.918, ppl=7.56, wps=5784.7, ups=0.09, wpb=64844, bsz=128, num_updates=2416, lr=9.99887e-05, gnorm=2.495, loss_scale=1, train_wall=11, gb_free=2.8, wall=26978 2021-06-19 02:08:35 | INFO | train_inner | epoch 001: 2437 / 3002 loss=3.099, ppl=8.57, wps=5817.6, ups=0.09, wpb=64800, bsz=128, num_updates=2417, lr=9.99887e-05, gnorm=2.513, loss_scale=1, train_wall=11, gb_free=2.8, wall=26989 2021-06-19 02:08:46 | INFO | train_inner | epoch 001: 2438 / 3002 loss=2.896, ppl=7.45, wps=5917.9, ups=0.09, wpb=64777, bsz=128, num_updates=2418, lr=9.99887e-05, gnorm=12.403, loss_scale=1, train_wall=10, gb_free=2.8, wall=27000 2021-06-19 02:08:57 | INFO | train_inner | epoch 001: 2439 / 3002 loss=3.181, ppl=9.07, wps=5799.4, ups=0.09, wpb=64801, bsz=128, num_updates=2419, lr=9.99886e-05, gnorm=2.537, loss_scale=1, train_wall=11, gb_free=2.8, wall=27011 2021-06-19 02:09:08 | INFO | train_inner | epoch 001: 2440 / 3002 loss=2.837, ppl=7.15, wps=5825.5, ups=0.09, wpb=64850, bsz=128, num_updates=2420, lr=9.99886e-05, gnorm=2.401, loss_scale=1, train_wall=11, gb_free=2.8, wall=27022 2021-06-19 02:09:19 | INFO | train_inner | epoch 001: 2441 / 3002 loss=3.004, ppl=8.02, wps=5871.2, ups=0.09, wpb=64770, bsz=128, num_updates=2421, lr=9.99886e-05, gnorm=2.473, loss_scale=1, train_wall=11, gb_free=2.8, wall=27033 2021-06-19 02:09:30 | INFO | train_inner | epoch 001: 2442 / 3002 loss=2.833, ppl=7.13, wps=5834.2, ups=0.09, wpb=64843, bsz=128, num_updates=2422, lr=9.99886e-05, gnorm=2.901, loss_scale=1, train_wall=11, gb_free=2.8, wall=27044 2021-06-19 02:09:41 | INFO | train_inner | epoch 001: 2443 / 3002 loss=2.92, ppl=7.57, wps=5749.8, ups=0.09, wpb=64789, bsz=128, num_updates=2423, lr=9.99886e-05, gnorm=2.487, loss_scale=1, train_wall=11, gb_free=2.8, wall=27056 2021-06-19 02:09:52 | INFO | train_inner | epoch 001: 2444 / 3002 loss=2.957, ppl=7.77, wps=5871.7, ups=0.09, wpb=64808, bsz=128, num_updates=2424, lr=9.99886e-05, gnorm=2.547, loss_scale=1, train_wall=11, gb_free=2.8, wall=27067 2021-06-19 02:10:04 | INFO | train_inner | epoch 001: 2445 / 3002 loss=2.886, ppl=7.39, wps=5842.8, ups=0.09, wpb=64859, bsz=128, num_updates=2425, lr=9.99886e-05, gnorm=2.492, loss_scale=1, train_wall=11, gb_free=2.8, wall=27078 2021-06-19 02:10:15 | INFO | train_inner | epoch 001: 2446 / 3002 loss=2.924, ppl=7.59, wps=5906.7, ups=0.09, wpb=64806, bsz=128, num_updates=2426, lr=9.99886e-05, gnorm=2.562, loss_scale=1, train_wall=10, gb_free=2.8, wall=27089 2021-06-19 02:10:26 | INFO | train_inner | epoch 001: 2447 / 3002 loss=2.836, ppl=7.14, wps=5859.6, ups=0.09, wpb=64835, bsz=128, num_updates=2427, lr=9.99886e-05, gnorm=2.386, loss_scale=1, train_wall=11, gb_free=2.8, wall=27100 2021-06-19 02:10:37 | INFO | train_inner | epoch 001: 2448 / 3002 loss=2.955, ppl=7.75, wps=5847.7, ups=0.09, wpb=64760, bsz=128, num_updates=2428, lr=9.99886e-05, gnorm=2.539, loss_scale=1, train_wall=11, gb_free=2.8, wall=27111 2021-06-19 02:10:48 | INFO | train_inner | epoch 001: 2449 / 3002 loss=2.962, ppl=7.79, wps=5905.6, ups=0.09, wpb=64831, bsz=128, num_updates=2429, lr=9.99886e-05, gnorm=2.493, loss_scale=1, train_wall=11, gb_free=2.8, wall=27122 2021-06-19 02:10:59 | INFO | train_inner | epoch 001: 2450 / 3002 loss=2.913, ppl=7.53, wps=5966.1, ups=0.09, wpb=64873, bsz=128, num_updates=2430, lr=9.99886e-05, gnorm=2.453, loss_scale=1, train_wall=10, gb_free=2.8, wall=27133 2021-06-19 02:11:09 | INFO | train_inner | epoch 001: 2451 / 3002 loss=2.855, ppl=7.24, wps=5997.5, ups=0.09, wpb=64836, bsz=128, num_updates=2431, lr=9.99886e-05, gnorm=2.359, loss_scale=1, train_wall=10, gb_free=2.8, wall=27144 2021-06-19 02:11:20 | INFO | train_inner | epoch 001: 2452 / 3002 loss=2.952, ppl=7.74, wps=5904.1, ups=0.09, wpb=64846, bsz=128, num_updates=2432, lr=9.99885e-05, gnorm=2.579, loss_scale=1, train_wall=11, gb_free=2.8, wall=27155 2021-06-19 02:11:31 | INFO | train_inner | epoch 001: 2453 / 3002 loss=3.101, ppl=8.58, wps=5886.5, ups=0.09, wpb=64797, bsz=128, num_updates=2433, lr=9.99885e-05, gnorm=2.305, loss_scale=1, train_wall=11, gb_free=2.8, wall=27166 2021-06-19 02:11:43 | INFO | train_inner | epoch 001: 2454 / 3002 loss=2.917, ppl=7.55, wps=5786.5, ups=0.09, wpb=64793, bsz=128, num_updates=2434, lr=9.99885e-05, gnorm=2.373, loss_scale=1, train_wall=11, gb_free=2.8, wall=27177 2021-06-19 02:11:54 | INFO | train_inner | epoch 001: 2455 / 3002 loss=2.988, ppl=7.93, wps=5867.3, ups=0.09, wpb=64824, bsz=128, num_updates=2435, lr=9.99885e-05, gnorm=2.393, loss_scale=1, train_wall=11, gb_free=2.8, wall=27188 2021-06-19 02:12:05 | INFO | train_inner | epoch 001: 2456 / 3002 loss=2.933, ppl=7.64, wps=5893.3, ups=0.09, wpb=64760, bsz=128, num_updates=2436, lr=9.99885e-05, gnorm=2.354, loss_scale=1, train_wall=11, gb_free=2.8, wall=27199 2021-06-19 02:12:16 | INFO | train_inner | epoch 001: 2457 / 3002 loss=2.955, ppl=7.75, wps=5816.2, ups=0.09, wpb=64802, bsz=128, num_updates=2437, lr=9.99885e-05, gnorm=2.431, loss_scale=1, train_wall=11, gb_free=2.8, wall=27210 2021-06-19 02:12:26 | INFO | train_inner | epoch 001: 2458 / 3002 loss=3, ppl=8, wps=6048.1, ups=0.09, wpb=64832, bsz=128, num_updates=2438, lr=9.99885e-05, gnorm=2.504, loss_scale=1, train_wall=10, gb_free=2.8, wall=27221 2021-06-19 02:12:37 | INFO | train_inner | epoch 001: 2459 / 3002 loss=3.04, ppl=8.23, wps=5955, ups=0.09, wpb=64792, bsz=128, num_updates=2439, lr=9.99885e-05, gnorm=2.441, loss_scale=1, train_wall=10, gb_free=2.8, wall=27232 2021-06-19 02:12:48 | INFO | train_inner | epoch 001: 2460 / 3002 loss=2.955, ppl=7.76, wps=5921.2, ups=0.09, wpb=64811, bsz=128, num_updates=2440, lr=9.99885e-05, gnorm=2.455, loss_scale=1, train_wall=10, gb_free=2.8, wall=27243 2021-06-19 02:12:59 | INFO | train_inner | epoch 001: 2461 / 3002 loss=2.826, ppl=7.09, wps=5795.2, ups=0.09, wpb=64828, bsz=128, num_updates=2441, lr=9.99885e-05, gnorm=2.64, loss_scale=1, train_wall=11, gb_free=2.8, wall=27254 2021-06-19 02:13:10 | INFO | train_inner | epoch 001: 2462 / 3002 loss=2.926, ppl=7.6, wps=5890.9, ups=0.09, wpb=64836, bsz=128, num_updates=2442, lr=9.99885e-05, gnorm=2.504, loss_scale=1, train_wall=11, gb_free=2.8, wall=27265 2021-06-19 02:13:21 | INFO | train_inner | epoch 001: 2463 / 3002 loss=2.765, ppl=6.8, wps=5908.2, ups=0.09, wpb=64871, bsz=128, num_updates=2443, lr=9.99885e-05, gnorm=2.752, loss_scale=1, train_wall=11, gb_free=2.8, wall=27276 2021-06-19 02:13:33 | INFO | train_inner | epoch 001: 2464 / 3002 loss=2.99, ppl=7.94, wps=5800.5, ups=0.09, wpb=64825, bsz=128, num_updates=2444, lr=9.99884e-05, gnorm=2.468, loss_scale=1, train_wall=11, gb_free=2.8, wall=27287 2021-06-19 02:13:44 | INFO | train_inner | epoch 001: 2465 / 3002 loss=3.016, ppl=8.09, wps=5775.1, ups=0.09, wpb=64817, bsz=128, num_updates=2445, lr=9.99884e-05, gnorm=2.5, loss_scale=1, train_wall=11, gb_free=2.8, wall=27298 2021-06-19 02:13:55 | INFO | train_inner | epoch 001: 2466 / 3002 loss=2.9, ppl=7.46, wps=5872.4, ups=0.09, wpb=64796, bsz=128, num_updates=2446, lr=9.99884e-05, gnorm=2.437, loss_scale=1, train_wall=11, gb_free=2.8, wall=27309 2021-06-19 02:14:06 | INFO | train_inner | epoch 001: 2467 / 3002 loss=3.001, ppl=8, wps=5845.8, ups=0.09, wpb=64774, bsz=128, num_updates=2447, lr=9.99884e-05, gnorm=2.396, loss_scale=1, train_wall=11, gb_free=2.8, wall=27320 2021-06-19 02:14:17 | INFO | train_inner | epoch 001: 2468 / 3002 loss=3.063, ppl=8.35, wps=5886.9, ups=0.09, wpb=64882, bsz=128, num_updates=2448, lr=9.99884e-05, gnorm=2.506, loss_scale=1, train_wall=11, gb_free=2.8, wall=27331 2021-06-19 02:14:28 | INFO | train_inner | epoch 001: 2469 / 3002 loss=3.013, ppl=8.07, wps=5855.6, ups=0.09, wpb=64795, bsz=128, num_updates=2449, lr=9.99884e-05, gnorm=2.443, loss_scale=1, train_wall=11, gb_free=2.8, wall=27342 2021-06-19 02:14:39 | INFO | train_inner | epoch 001: 2470 / 3002 loss=2.911, ppl=7.52, wps=5868, ups=0.09, wpb=64795, bsz=128, num_updates=2450, lr=9.99884e-05, gnorm=2.295, loss_scale=1, train_wall=11, gb_free=2.8, wall=27353 2021-06-19 02:14:50 | INFO | train_inner | epoch 001: 2471 / 3002 loss=2.949, ppl=7.72, wps=5799.6, ups=0.09, wpb=64856, bsz=128, num_updates=2451, lr=9.99884e-05, gnorm=2.431, loss_scale=1, train_wall=11, gb_free=2.8, wall=27365 2021-06-19 02:15:01 | INFO | train_inner | epoch 001: 2472 / 3002 loss=3.043, ppl=8.24, wps=5872.2, ups=0.09, wpb=64816, bsz=128, num_updates=2452, lr=9.99884e-05, gnorm=2.383, loss_scale=1, train_wall=11, gb_free=2.8, wall=27376 2021-06-19 02:15:12 | INFO | train_inner | epoch 001: 2473 / 3002 loss=2.885, ppl=7.39, wps=5846.9, ups=0.09, wpb=64781, bsz=128, num_updates=2453, lr=9.99884e-05, gnorm=2.418, loss_scale=1, train_wall=11, gb_free=2.8, wall=27387 2021-06-19 02:15:23 | INFO | train_inner | epoch 001: 2474 / 3002 loss=2.946, ppl=7.71, wps=5861.2, ups=0.09, wpb=64817, bsz=128, num_updates=2454, lr=9.99884e-05, gnorm=2.339, loss_scale=1, train_wall=11, gb_free=2.8, wall=27398 2021-06-19 02:15:35 | INFO | train_inner | epoch 001: 2475 / 3002 loss=2.99, ppl=7.94, wps=5823.9, ups=0.09, wpb=64810, bsz=128, num_updates=2455, lr=9.99884e-05, gnorm=2.648, loss_scale=1, train_wall=11, gb_free=2.8, wall=27409 2021-06-19 02:15:45 | INFO | train_inner | epoch 001: 2476 / 3002 loss=2.891, ppl=7.42, wps=5942.7, ups=0.09, wpb=64880, bsz=128, num_updates=2456, lr=9.99884e-05, gnorm=3.059, loss_scale=1, train_wall=10, gb_free=2.8, wall=27420 2021-06-19 02:15:57 | INFO | train_inner | epoch 001: 2477 / 3002 loss=3.007, ppl=8.04, wps=5848.7, ups=0.09, wpb=64755, bsz=128, num_updates=2457, lr=9.99883e-05, gnorm=2.392, loss_scale=1, train_wall=11, gb_free=2.8, wall=27431 2021-06-19 02:16:08 | INFO | train_inner | epoch 001: 2478 / 3002 loss=2.94, ppl=7.67, wps=5782.8, ups=0.09, wpb=64758, bsz=128, num_updates=2458, lr=9.99883e-05, gnorm=2.73, loss_scale=1, train_wall=11, gb_free=2.8, wall=27442 2021-06-19 02:16:19 | INFO | train_inner | epoch 001: 2479 / 3002 loss=2.884, ppl=7.38, wps=5853.6, ups=0.09, wpb=64904, bsz=128, num_updates=2459, lr=9.99883e-05, gnorm=2.438, loss_scale=1, train_wall=11, gb_free=2.8, wall=27453 2021-06-19 02:16:30 | INFO | train_inner | epoch 001: 2480 / 3002 loss=2.848, ppl=7.2, wps=5804.3, ups=0.09, wpb=64823, bsz=128, num_updates=2460, lr=9.99883e-05, gnorm=2.422, loss_scale=1, train_wall=11, gb_free=2.8, wall=27464 2021-06-19 02:16:41 | INFO | train_inner | epoch 001: 2481 / 3002 loss=2.815, ppl=7.04, wps=5799.6, ups=0.09, wpb=64822, bsz=128, num_updates=2461, lr=9.99883e-05, gnorm=2.398, loss_scale=1, train_wall=11, gb_free=2.8, wall=27476 2021-06-19 02:16:52 | INFO | train_inner | epoch 001: 2482 / 3002 loss=2.863, ppl=7.28, wps=5843.4, ups=0.09, wpb=64540, bsz=128, num_updates=2462, lr=9.99883e-05, gnorm=2.348, loss_scale=1, train_wall=11, gb_free=2.8, wall=27487 2021-06-19 02:17:03 | INFO | train_inner | epoch 001: 2483 / 3002 loss=2.872, ppl=7.32, wps=5865.8, ups=0.09, wpb=64764, bsz=128, num_updates=2463, lr=9.99883e-05, gnorm=2.487, loss_scale=1, train_wall=11, gb_free=2.8, wall=27498 2021-06-19 02:17:14 | INFO | train_inner | epoch 001: 2484 / 3002 loss=2.889, ppl=7.41, wps=5792.7, ups=0.09, wpb=64887, bsz=128, num_updates=2464, lr=9.99883e-05, gnorm=2.366, loss_scale=1, train_wall=11, gb_free=2.8, wall=27509 2021-06-19 02:17:26 | INFO | train_inner | epoch 001: 2485 / 3002 loss=2.913, ppl=7.53, wps=5820.6, ups=0.09, wpb=64784, bsz=128, num_updates=2465, lr=9.99883e-05, gnorm=2.483, loss_scale=1, train_wall=11, gb_free=2.8, wall=27520 2021-06-19 02:17:37 | INFO | train_inner | epoch 001: 2486 / 3002 loss=2.942, ppl=7.68, wps=5812.9, ups=0.09, wpb=64827, bsz=128, num_updates=2466, lr=9.99883e-05, gnorm=2.345, loss_scale=1, train_wall=11, gb_free=2.8, wall=27531 2021-06-19 02:17:48 | INFO | train_inner | epoch 001: 2487 / 3002 loss=2.753, ppl=6.74, wps=5847.4, ups=0.09, wpb=64889, bsz=128, num_updates=2467, lr=9.99883e-05, gnorm=2.404, loss_scale=1, train_wall=11, gb_free=2.8, wall=27542 2021-06-19 02:17:59 | INFO | train_inner | epoch 001: 2488 / 3002 loss=2.995, ppl=7.97, wps=5865.7, ups=0.09, wpb=64790, bsz=128, num_updates=2468, lr=9.99883e-05, gnorm=2.414, loss_scale=1, train_wall=11, gb_free=2.8, wall=27553 2021-06-19 02:18:10 | INFO | train_inner | epoch 001: 2489 / 3002 loss=3.002, ppl=8.01, wps=5904.5, ups=0.09, wpb=64799, bsz=128, num_updates=2469, lr=9.99882e-05, gnorm=2.392, loss_scale=1, train_wall=11, gb_free=2.8, wall=27564 2021-06-19 02:18:21 | INFO | train_inner | epoch 001: 2490 / 3002 loss=2.778, ppl=6.86, wps=5748.4, ups=0.09, wpb=64873, bsz=128, num_updates=2470, lr=9.99882e-05, gnorm=2.333, loss_scale=1, train_wall=11, gb_free=2.8, wall=27575 2021-06-19 02:18:32 | INFO | train_inner | epoch 001: 2491 / 3002 loss=2.754, ppl=6.75, wps=5872.9, ups=0.09, wpb=64795, bsz=128, num_updates=2471, lr=9.99882e-05, gnorm=2.227, loss_scale=1, train_wall=11, gb_free=2.8, wall=27587 2021-06-19 02:18:43 | INFO | train_inner | epoch 001: 2492 / 3002 loss=3.009, ppl=8.05, wps=5841.7, ups=0.09, wpb=64796, bsz=128, num_updates=2472, lr=9.99882e-05, gnorm=2.356, loss_scale=1, train_wall=11, gb_free=2.8, wall=27598 2021-06-19 02:18:54 | INFO | train_inner | epoch 001: 2493 / 3002 loss=2.896, ppl=7.45, wps=5921.9, ups=0.09, wpb=64792, bsz=128, num_updates=2473, lr=9.99882e-05, gnorm=2.371, loss_scale=1, train_wall=10, gb_free=2.8, wall=27609 2021-06-19 02:19:05 | INFO | train_inner | epoch 001: 2494 / 3002 loss=3.106, ppl=8.61, wps=5748.3, ups=0.09, wpb=64763, bsz=128, num_updates=2474, lr=9.99882e-05, gnorm=2.398, loss_scale=1, train_wall=11, gb_free=2.8, wall=27620 2021-06-19 02:19:17 | INFO | train_inner | epoch 001: 2495 / 3002 loss=2.881, ppl=7.37, wps=5813.8, ups=0.09, wpb=64850, bsz=128, num_updates=2475, lr=9.99882e-05, gnorm=2.343, loss_scale=1, train_wall=11, gb_free=2.8, wall=27631 2021-06-19 02:19:28 | INFO | train_inner | epoch 001: 2496 / 3002 loss=2.845, ppl=7.18, wps=5878.2, ups=0.09, wpb=64876, bsz=128, num_updates=2476, lr=9.99882e-05, gnorm=2.292, loss_scale=1, train_wall=11, gb_free=2.8, wall=27642 2021-06-19 02:19:39 | INFO | train_inner | epoch 001: 2497 / 3002 loss=2.942, ppl=7.68, wps=5860.3, ups=0.09, wpb=64816, bsz=128, num_updates=2477, lr=9.99882e-05, gnorm=2.702, loss_scale=1, train_wall=11, gb_free=2.8, wall=27653 2021-06-19 02:19:50 | INFO | train_inner | epoch 001: 2498 / 3002 loss=2.935, ppl=7.65, wps=5818.4, ups=0.09, wpb=64779, bsz=128, num_updates=2478, lr=9.99882e-05, gnorm=2.512, loss_scale=1, train_wall=11, gb_free=2.8, wall=27664 2021-06-19 02:20:01 | INFO | train_inner | epoch 001: 2499 / 3002 loss=2.925, ppl=7.59, wps=5884.3, ups=0.09, wpb=64832, bsz=128, num_updates=2479, lr=9.99882e-05, gnorm=2.37, loss_scale=1, train_wall=11, gb_free=2.8, wall=27675 2021-06-19 02:20:12 | INFO | train_inner | epoch 001: 2500 / 3002 loss=2.959, ppl=7.77, wps=5962.1, ups=0.09, wpb=64759, bsz=128, num_updates=2480, lr=9.99882e-05, gnorm=2.351, loss_scale=1, train_wall=10, gb_free=2.8, wall=27686 2021-06-19 02:20:23 | INFO | train_inner | epoch 001: 2501 / 3002 loss=2.755, ppl=6.75, wps=5869.5, ups=0.09, wpb=64872, bsz=128, num_updates=2481, lr=9.99882e-05, gnorm=2.334, loss_scale=1, train_wall=11, gb_free=2.8, wall=27697 2021-06-19 02:20:34 | INFO | train_inner | epoch 001: 2502 / 3002 loss=2.898, ppl=7.46, wps=5786.6, ups=0.09, wpb=64831, bsz=128, num_updates=2482, lr=9.99881e-05, gnorm=2.468, loss_scale=1, train_wall=11, gb_free=2.8, wall=27708 2021-06-19 02:20:45 | INFO | train_inner | epoch 001: 2503 / 3002 loss=2.669, ppl=6.36, wps=5841.4, ups=0.09, wpb=64782, bsz=128, num_updates=2483, lr=9.99881e-05, gnorm=2.396, loss_scale=1, train_wall=11, gb_free=2.8, wall=27719 2021-06-19 02:20:56 | INFO | train_inner | epoch 001: 2504 / 3002 loss=2.869, ppl=7.31, wps=5873.2, ups=0.09, wpb=64841, bsz=128, num_updates=2484, lr=9.99881e-05, gnorm=2.314, loss_scale=1, train_wall=11, gb_free=2.8, wall=27730 2021-06-19 02:21:07 | INFO | train_inner | epoch 001: 2505 / 3002 loss=2.761, ppl=6.78, wps=5921.5, ups=0.09, wpb=64786, bsz=128, num_updates=2485, lr=9.99881e-05, gnorm=3.066, loss_scale=1, train_wall=10, gb_free=2.8, wall=27741 2021-06-19 02:21:18 | INFO | train_inner | epoch 001: 2506 / 3002 loss=2.893, ppl=7.43, wps=5856, ups=0.09, wpb=64864, bsz=128, num_updates=2486, lr=9.99881e-05, gnorm=2.509, loss_scale=1, train_wall=11, gb_free=2.8, wall=27752 2021-06-19 02:21:29 | INFO | train_inner | epoch 001: 2507 / 3002 loss=2.82, ppl=7.06, wps=5730.6, ups=0.09, wpb=64943, bsz=128, num_updates=2487, lr=9.99881e-05, gnorm=2.28, loss_scale=1, train_wall=11, gb_free=2.8, wall=27764 2021-06-19 02:21:41 | INFO | train_inner | epoch 001: 2508 / 3002 loss=2.854, ppl=7.23, wps=5865.3, ups=0.09, wpb=64916, bsz=128, num_updates=2488, lr=9.99881e-05, gnorm=2.373, loss_scale=1, train_wall=11, gb_free=2.8, wall=27775 2021-06-19 02:21:51 | INFO | train_inner | epoch 001: 2509 / 3002 loss=2.861, ppl=7.26, wps=6001.5, ups=0.09, wpb=64838, bsz=128, num_updates=2489, lr=9.99881e-05, gnorm=2.393, loss_scale=1, train_wall=10, gb_free=2.8, wall=27786 2021-06-19 02:22:02 | INFO | train_inner | epoch 001: 2510 / 3002 loss=2.842, ppl=7.17, wps=5854.4, ups=0.09, wpb=64886, bsz=128, num_updates=2490, lr=9.99881e-05, gnorm=2.687, loss_scale=1, train_wall=11, gb_free=2.8, wall=27797 2021-06-19 02:22:13 | INFO | train_inner | epoch 001: 2511 / 3002 loss=2.817, ppl=7.05, wps=5937, ups=0.09, wpb=64948, bsz=128, num_updates=2491, lr=9.99881e-05, gnorm=2.364, loss_scale=1, train_wall=10, gb_free=2.8, wall=27808 2021-06-19 02:22:24 | INFO | train_inner | epoch 001: 2512 / 3002 loss=2.964, ppl=7.8, wps=5853.3, ups=0.09, wpb=64862, bsz=128, num_updates=2492, lr=9.99881e-05, gnorm=2.455, loss_scale=1, train_wall=11, gb_free=2.8, wall=27819 2021-06-19 02:22:35 | INFO | train_inner | epoch 001: 2513 / 3002 loss=2.902, ppl=7.48, wps=5887.7, ups=0.09, wpb=64805, bsz=128, num_updates=2493, lr=9.99881e-05, gnorm=2.467, loss_scale=1, train_wall=11, gb_free=2.8, wall=27830 2021-06-19 02:22:47 | INFO | train_inner | epoch 001: 2514 / 3002 loss=2.803, ppl=6.98, wps=5779.3, ups=0.09, wpb=64757, bsz=128, num_updates=2494, lr=9.9988e-05, gnorm=2.471, loss_scale=1, train_wall=11, gb_free=2.8, wall=27841 2021-06-19 02:22:58 | INFO | train_inner | epoch 001: 2515 / 3002 loss=2.954, ppl=7.75, wps=5867.2, ups=0.09, wpb=64851, bsz=128, num_updates=2495, lr=9.9988e-05, gnorm=3.046, loss_scale=1, train_wall=11, gb_free=2.8, wall=27852 2021-06-19 02:23:09 | INFO | train_inner | epoch 001: 2516 / 3002 loss=2.837, ppl=7.14, wps=5828.1, ups=0.09, wpb=64809, bsz=128, num_updates=2496, lr=9.9988e-05, gnorm=2.381, loss_scale=1, train_wall=11, gb_free=2.8, wall=27863 2021-06-19 02:23:20 | INFO | train_inner | epoch 001: 2517 / 3002 loss=2.975, ppl=7.86, wps=5859.5, ups=0.09, wpb=64839, bsz=128, num_updates=2497, lr=9.9988e-05, gnorm=2.523, loss_scale=1, train_wall=11, gb_free=2.8, wall=27874 2021-06-19 02:23:31 | INFO | train_inner | epoch 001: 2518 / 3002 loss=2.723, ppl=6.6, wps=5840.6, ups=0.09, wpb=64800, bsz=128, num_updates=2498, lr=9.9988e-05, gnorm=2.362, loss_scale=1, train_wall=11, gb_free=2.8, wall=27885 2021-06-19 02:23:42 | INFO | train_inner | epoch 001: 2519 / 3002 loss=2.782, ppl=6.88, wps=5825.5, ups=0.09, wpb=64791, bsz=128, num_updates=2499, lr=9.9988e-05, gnorm=2.352, loss_scale=1, train_wall=11, gb_free=2.8, wall=27896 2021-06-19 02:23:53 | INFO | train_inner | epoch 001: 2520 / 3002 loss=2.827, ppl=7.1, wps=5837.1, ups=0.09, wpb=64833, bsz=128, num_updates=2500, lr=9.9988e-05, gnorm=2.491, loss_scale=1, train_wall=11, gb_free=2.8, wall=27908 2021-06-19 02:24:04 | INFO | train_inner | epoch 001: 2521 / 3002 loss=2.916, ppl=7.55, wps=5812, ups=0.09, wpb=64839, bsz=128, num_updates=2501, lr=9.9988e-05, gnorm=2.445, loss_scale=1, train_wall=11, gb_free=2.8, wall=27919 2021-06-19 02:24:15 | INFO | train_inner | epoch 001: 2522 / 3002 loss=2.977, ppl=7.87, wps=5887.6, ups=0.09, wpb=64821, bsz=128, num_updates=2502, lr=9.9988e-05, gnorm=2.536, loss_scale=1, train_wall=11, gb_free=2.8, wall=27930 2021-06-19 02:24:27 | INFO | train_inner | epoch 001: 2523 / 3002 loss=2.775, ppl=6.84, wps=5826.7, ups=0.09, wpb=64755, bsz=128, num_updates=2503, lr=9.9988e-05, gnorm=2.365, loss_scale=1, train_wall=11, gb_free=2.8, wall=27941 2021-06-19 02:24:37 | INFO | train_inner | epoch 001: 2524 / 3002 loss=3.171, ppl=9.01, wps=5980.8, ups=0.09, wpb=64939, bsz=128, num_updates=2504, lr=9.9988e-05, gnorm=2.418, loss_scale=1, train_wall=10, gb_free=2.8, wall=27952 2021-06-19 02:24:48 | INFO | train_inner | epoch 001: 2525 / 3002 loss=2.922, ppl=7.58, wps=5890.5, ups=0.09, wpb=64862, bsz=128, num_updates=2505, lr=9.9988e-05, gnorm=2.38, loss_scale=1, train_wall=11, gb_free=2.8, wall=27963 2021-06-19 02:24:59 | INFO | train_inner | epoch 001: 2526 / 3002 loss=3.033, ppl=8.18, wps=5917.4, ups=0.09, wpb=64868, bsz=128, num_updates=2506, lr=9.9988e-05, gnorm=103.216, loss_scale=1, train_wall=11, gb_free=2.8, wall=27974 2021-06-19 02:25:10 | INFO | train_inner | epoch 001: 2527 / 3002 loss=2.997, ppl=7.98, wps=5901.7, ups=0.09, wpb=64916, bsz=128, num_updates=2507, lr=9.99879e-05, gnorm=2.336, loss_scale=1, train_wall=11, gb_free=2.8, wall=27985 2021-06-19 02:25:21 | INFO | train_inner | epoch 001: 2528 / 3002 loss=2.86, ppl=7.26, wps=5847.5, ups=0.09, wpb=64856, bsz=128, num_updates=2508, lr=9.99879e-05, gnorm=2.373, loss_scale=1, train_wall=11, gb_free=2.8, wall=27996 2021-06-19 02:25:32 | INFO | train_inner | epoch 001: 2529 / 3002 loss=2.959, ppl=7.77, wps=5993.9, ups=0.09, wpb=64807, bsz=128, num_updates=2509, lr=9.99879e-05, gnorm=2.662, loss_scale=1, train_wall=10, gb_free=2.8, wall=28007 2021-06-19 02:25:44 | INFO | train_inner | epoch 001: 2530 / 3002 loss=3.005, ppl=8.03, wps=5748.5, ups=0.09, wpb=64866, bsz=128, num_updates=2510, lr=9.99879e-05, gnorm=2.949, loss_scale=1, train_wall=11, gb_free=2.8, wall=28018 2021-06-19 02:25:55 | INFO | train_inner | epoch 001: 2531 / 3002 loss=2.857, ppl=7.25, wps=5833.5, ups=0.09, wpb=64848, bsz=128, num_updates=2511, lr=9.99879e-05, gnorm=2.436, loss_scale=1, train_wall=11, gb_free=2.8, wall=28029 2021-06-19 02:26:06 | INFO | train_inner | epoch 001: 2532 / 3002 loss=3.014, ppl=8.08, wps=5808.3, ups=0.09, wpb=64910, bsz=128, num_updates=2512, lr=9.99879e-05, gnorm=2.419, loss_scale=1, train_wall=11, gb_free=2.8, wall=28040 2021-06-19 02:26:17 | INFO | train_inner | epoch 001: 2533 / 3002 loss=2.842, ppl=7.17, wps=5894, ups=0.09, wpb=64903, bsz=128, num_updates=2513, lr=9.99879e-05, gnorm=3.675, loss_scale=1, train_wall=11, gb_free=2.8, wall=28051 2021-06-19 02:26:28 | INFO | train_inner | epoch 001: 2534 / 3002 loss=2.842, ppl=7.17, wps=5878, ups=0.09, wpb=64864, bsz=128, num_updates=2514, lr=9.99879e-05, gnorm=2.298, loss_scale=1, train_wall=11, gb_free=2.8, wall=28062 2021-06-19 02:26:39 | INFO | train_inner | epoch 001: 2535 / 3002 loss=3.006, ppl=8.04, wps=5781, ups=0.09, wpb=64831, bsz=128, num_updates=2515, lr=9.99879e-05, gnorm=3.171, loss_scale=1, train_wall=11, gb_free=2.8, wall=28073 2021-06-19 02:26:50 | INFO | train_inner | epoch 001: 2536 / 3002 loss=2.988, ppl=7.93, wps=5943.9, ups=0.09, wpb=64864, bsz=128, num_updates=2516, lr=9.99879e-05, gnorm=2.398, loss_scale=1, train_wall=10, gb_free=2.8, wall=28084 2021-06-19 02:27:01 | INFO | train_inner | epoch 001: 2537 / 3002 loss=2.978, ppl=7.88, wps=5751.8, ups=0.09, wpb=64812, bsz=128, num_updates=2517, lr=9.99879e-05, gnorm=6.894, loss_scale=1, train_wall=11, gb_free=2.8, wall=28096 2021-06-19 02:27:12 | INFO | train_inner | epoch 001: 2538 / 3002 loss=2.927, ppl=7.61, wps=5877.3, ups=0.09, wpb=64882, bsz=128, num_updates=2518, lr=9.99879e-05, gnorm=12.744, loss_scale=1, train_wall=11, gb_free=2.8, wall=28107 2021-06-19 02:27:23 | INFO | train_inner | epoch 001: 2539 / 3002 loss=3.124, ppl=8.72, wps=5867.8, ups=0.09, wpb=64779, bsz=128, num_updates=2519, lr=9.99878e-05, gnorm=5.87, loss_scale=1, train_wall=11, gb_free=2.8, wall=28118 2021-06-19 02:27:34 | INFO | train_inner | epoch 001: 2540 / 3002 loss=2.942, ppl=7.69, wps=5896.8, ups=0.09, wpb=64750, bsz=128, num_updates=2520, lr=9.99878e-05, gnorm=11.817, loss_scale=1, train_wall=11, gb_free=2.8, wall=28129 2021-06-19 02:27:45 | INFO | train_inner | epoch 001: 2541 / 3002 loss=3.157, ppl=8.92, wps=5839.8, ups=0.09, wpb=64817, bsz=128, num_updates=2521, lr=9.99878e-05, gnorm=5.065, loss_scale=1, train_wall=11, gb_free=2.8, wall=28140 2021-06-19 02:27:56 | INFO | train_inner | epoch 001: 2542 / 3002 loss=2.96, ppl=7.78, wps=5970.7, ups=0.09, wpb=64841, bsz=128, num_updates=2522, lr=9.99878e-05, gnorm=5.508, loss_scale=1, train_wall=10, gb_free=2.8, wall=28151 2021-06-19 02:28:07 | INFO | train_inner | epoch 001: 2543 / 3002 loss=2.985, ppl=7.92, wps=5851.7, ups=0.09, wpb=64843, bsz=128, num_updates=2523, lr=9.99878e-05, gnorm=2.724, loss_scale=1, train_wall=11, gb_free=2.8, wall=28162 2021-06-19 02:28:19 | INFO | train_inner | epoch 001: 2544 / 3002 loss=3.086, ppl=8.49, wps=5783.6, ups=0.09, wpb=64804, bsz=128, num_updates=2524, lr=9.99878e-05, gnorm=2.638, loss_scale=1, train_wall=11, gb_free=2.8, wall=28173 2021-06-19 02:28:30 | INFO | train_inner | epoch 001: 2545 / 3002 loss=2.984, ppl=7.91, wps=5795.7, ups=0.09, wpb=64760, bsz=128, num_updates=2525, lr=9.99878e-05, gnorm=2.421, loss_scale=1, train_wall=11, gb_free=2.8, wall=28184 2021-06-19 02:28:41 | INFO | train_inner | epoch 001: 2546 / 3002 loss=3.067, ppl=8.38, wps=5969.2, ups=0.09, wpb=64824, bsz=128, num_updates=2526, lr=9.99878e-05, gnorm=2.564, loss_scale=1, train_wall=10, gb_free=2.8, wall=28195 2021-06-19 02:28:52 | INFO | train_inner | epoch 001: 2547 / 3002 loss=2.875, ppl=7.34, wps=5920.8, ups=0.09, wpb=64775, bsz=128, num_updates=2527, lr=9.99878e-05, gnorm=2.418, loss_scale=1, train_wall=10, gb_free=2.8, wall=28206 2021-06-19 02:29:02 | INFO | train_inner | epoch 001: 2548 / 3002 loss=2.849, ppl=7.2, wps=5985.6, ups=0.09, wpb=64856, bsz=128, num_updates=2528, lr=9.99878e-05, gnorm=2.356, loss_scale=1, train_wall=10, gb_free=2.8, wall=28217 2021-06-19 02:29:13 | INFO | train_inner | epoch 001: 2549 / 3002 loss=2.856, ppl=7.24, wps=5859.8, ups=0.09, wpb=64813, bsz=128, num_updates=2529, lr=9.99878e-05, gnorm=2.465, loss_scale=1, train_wall=11, gb_free=2.8, wall=28228 2021-06-19 02:29:25 | INFO | train_inner | epoch 001: 2550 / 3002 loss=3.124, ppl=8.72, wps=5808.1, ups=0.09, wpb=64838, bsz=128, num_updates=2530, lr=9.99878e-05, gnorm=2.546, loss_scale=1, train_wall=11, gb_free=2.8, wall=28239 2021-06-19 02:29:36 | INFO | train_inner | epoch 001: 2551 / 3002 loss=3.072, ppl=8.41, wps=5715.2, ups=0.09, wpb=64783, bsz=128, num_updates=2531, lr=9.99878e-05, gnorm=2.406, loss_scale=1, train_wall=11, gb_free=2.8, wall=28250 2021-06-19 02:29:47 | INFO | train_inner | epoch 001: 2552 / 3002 loss=2.71, ppl=6.54, wps=5911.3, ups=0.09, wpb=64927, bsz=128, num_updates=2532, lr=9.99877e-05, gnorm=2.424, loss_scale=1, train_wall=11, gb_free=2.8, wall=28261 2021-06-19 02:29:58 | INFO | train_inner | epoch 001: 2553 / 3002 loss=2.948, ppl=7.72, wps=5946.5, ups=0.09, wpb=64927, bsz=128, num_updates=2533, lr=9.99877e-05, gnorm=2.642, loss_scale=1, train_wall=10, gb_free=2.8, wall=28272 2021-06-19 02:30:09 | INFO | train_inner | epoch 001: 2554 / 3002 loss=2.773, ppl=6.83, wps=5814.7, ups=0.09, wpb=64861, bsz=128, num_updates=2534, lr=9.99877e-05, gnorm=2.321, loss_scale=1, train_wall=11, gb_free=2.8, wall=28283 2021-06-19 02:30:20 | INFO | train_inner | epoch 001: 2555 / 3002 loss=3.042, ppl=8.24, wps=5771.4, ups=0.09, wpb=64769, bsz=128, num_updates=2535, lr=9.99877e-05, gnorm=2.504, loss_scale=1, train_wall=11, gb_free=2.8, wall=28295 2021-06-19 02:30:31 | INFO | train_inner | epoch 001: 2556 / 3002 loss=2.945, ppl=7.7, wps=5864.6, ups=0.09, wpb=64816, bsz=128, num_updates=2536, lr=9.99877e-05, gnorm=4.323, loss_scale=1, train_wall=11, gb_free=2.8, wall=28306 2021-06-19 02:30:42 | INFO | train_inner | epoch 001: 2557 / 3002 loss=3.109, ppl=8.63, wps=5893.5, ups=0.09, wpb=64797, bsz=128, num_updates=2537, lr=9.99877e-05, gnorm=2.564, loss_scale=1, train_wall=11, gb_free=2.8, wall=28317 2021-06-19 02:30:53 | INFO | train_inner | epoch 001: 2558 / 3002 loss=2.995, ppl=7.97, wps=5838.9, ups=0.09, wpb=64899, bsz=128, num_updates=2538, lr=9.99877e-05, gnorm=2.509, loss_scale=1, train_wall=11, gb_free=2.8, wall=28328 2021-06-19 02:31:05 | INFO | train_inner | epoch 001: 2559 / 3002 loss=3.123, ppl=8.71, wps=5836, ups=0.09, wpb=64825, bsz=128, num_updates=2539, lr=9.99877e-05, gnorm=3.699, loss_scale=1, train_wall=11, gb_free=2.8, wall=28339 2021-06-19 02:31:16 | INFO | train_inner | epoch 001: 2560 / 3002 loss=2.917, ppl=7.55, wps=5783, ups=0.09, wpb=64786, bsz=128, num_updates=2540, lr=9.99877e-05, gnorm=2.679, loss_scale=1, train_wall=11, gb_free=2.8, wall=28350 2021-06-19 02:31:27 | INFO | train_inner | epoch 001: 2561 / 3002 loss=2.875, ppl=7.34, wps=5833.6, ups=0.09, wpb=64816, bsz=128, num_updates=2541, lr=9.99877e-05, gnorm=2.344, loss_scale=1, train_wall=11, gb_free=2.8, wall=28361 2021-06-19 02:31:38 | INFO | train_inner | epoch 001: 2562 / 3002 loss=2.868, ppl=7.3, wps=5942.9, ups=0.09, wpb=64866, bsz=128, num_updates=2542, lr=9.99877e-05, gnorm=2.277, loss_scale=1, train_wall=10, gb_free=2.8, wall=28372 2021-06-19 02:31:49 | INFO | train_inner | epoch 001: 2563 / 3002 loss=2.877, ppl=7.35, wps=5849.7, ups=0.09, wpb=64881, bsz=128, num_updates=2543, lr=9.99877e-05, gnorm=2.314, loss_scale=1, train_wall=11, gb_free=2.8, wall=28383 2021-06-19 02:32:00 | INFO | train_inner | epoch 001: 2564 / 3002 loss=3.08, ppl=8.46, wps=5824.8, ups=0.09, wpb=64765, bsz=128, num_updates=2544, lr=9.99876e-05, gnorm=72.555, loss_scale=2, train_wall=11, gb_free=2.8, wall=28394 2021-06-19 02:32:11 | INFO | train_inner | epoch 001: 2565 / 3002 loss=2.907, ppl=7.5, wps=5766.2, ups=0.09, wpb=64755, bsz=128, num_updates=2545, lr=9.99876e-05, gnorm=2.401, loss_scale=2, train_wall=11, gb_free=2.8, wall=28406 2021-06-19 02:32:22 | INFO | train_inner | epoch 001: 2566 / 3002 loss=2.807, ppl=7, wps=5750.8, ups=0.09, wpb=64821, bsz=128, num_updates=2546, lr=9.99876e-05, gnorm=2.855, loss_scale=2, train_wall=11, gb_free=2.8, wall=28417 2021-06-19 02:32:33 | INFO | train_inner | epoch 001: 2567 / 3002 loss=2.939, ppl=7.67, wps=5929.6, ups=0.09, wpb=64843, bsz=128, num_updates=2547, lr=9.99876e-05, gnorm=2.632, loss_scale=2, train_wall=10, gb_free=2.8, wall=28428 2021-06-19 02:32:45 | INFO | train_inner | epoch 001: 2568 / 3002 loss=2.887, ppl=7.4, wps=5823, ups=0.09, wpb=64811, bsz=128, num_updates=2548, lr=9.99876e-05, gnorm=2.544, loss_scale=2, train_wall=11, gb_free=2.8, wall=28439 2021-06-19 02:32:55 | INFO | train_inner | epoch 001: 2569 / 3002 loss=2.805, ppl=6.99, wps=5930, ups=0.09, wpb=64919, bsz=128, num_updates=2549, lr=9.99876e-05, gnorm=4.183, loss_scale=2, train_wall=10, gb_free=2.8, wall=28450 2021-06-19 02:33:06 | INFO | train_inner | epoch 001: 2570 / 3002 loss=2.88, ppl=7.36, wps=5904.9, ups=0.09, wpb=64798, bsz=128, num_updates=2550, lr=9.99876e-05, gnorm=3.199, loss_scale=2, train_wall=11, gb_free=2.8, wall=28461 2021-06-19 02:33:17 | INFO | train_inner | epoch 001: 2571 / 3002 loss=2.948, ppl=7.72, wps=5918.2, ups=0.09, wpb=64869, bsz=128, num_updates=2551, lr=9.99876e-05, gnorm=2.716, loss_scale=2, train_wall=10, gb_free=2.8, wall=28472 2021-06-19 02:33:28 | INFO | train_inner | epoch 001: 2572 / 3002 loss=2.855, ppl=7.24, wps=5861, ups=0.09, wpb=64833, bsz=128, num_updates=2552, lr=9.99876e-05, gnorm=2.311, loss_scale=2, train_wall=11, gb_free=2.8, wall=28483 2021-06-19 02:33:40 | INFO | train_inner | epoch 001: 2573 / 3002 loss=3.122, ppl=8.71, wps=5736.4, ups=0.09, wpb=64780, bsz=128, num_updates=2553, lr=9.99876e-05, gnorm=3.072, loss_scale=2, train_wall=11, gb_free=2.8, wall=28494 2021-06-19 02:33:51 | INFO | train_inner | epoch 001: 2574 / 3002 loss=2.902, ppl=7.47, wps=5855.9, ups=0.09, wpb=64813, bsz=128, num_updates=2554, lr=9.99876e-05, gnorm=3.108, loss_scale=2, train_wall=11, gb_free=2.8, wall=28505 2021-06-19 02:34:02 | INFO | train_inner | epoch 001: 2575 / 3002 loss=3.034, ppl=8.19, wps=5856, ups=0.09, wpb=64826, bsz=128, num_updates=2555, lr=9.99876e-05, gnorm=2.575, loss_scale=2, train_wall=11, gb_free=2.8, wall=28516 2021-06-19 02:34:13 | INFO | train_inner | epoch 001: 2576 / 3002 loss=2.77, ppl=6.82, wps=5900.3, ups=0.09, wpb=64812, bsz=128, num_updates=2556, lr=9.99876e-05, gnorm=3.463, loss_scale=2, train_wall=10, gb_free=2.8, wall=28527 2021-06-19 02:34:24 | INFO | train_inner | epoch 001: 2577 / 3002 loss=2.828, ppl=7.1, wps=5894.4, ups=0.09, wpb=64827, bsz=128, num_updates=2557, lr=9.99875e-05, gnorm=2.493, loss_scale=2, train_wall=11, gb_free=2.8, wall=28538 2021-06-19 02:34:35 | INFO | train_inner | epoch 001: 2578 / 3002 loss=2.88, ppl=7.36, wps=5779, ups=0.09, wpb=64797, bsz=128, num_updates=2558, lr=9.99875e-05, gnorm=2.471, loss_scale=2, train_wall=11, gb_free=2.8, wall=28549 2021-06-19 02:34:46 | INFO | train_inner | epoch 001: 2579 / 3002 loss=2.859, ppl=7.25, wps=5907.5, ups=0.09, wpb=64868, bsz=128, num_updates=2559, lr=9.99875e-05, gnorm=2.764, loss_scale=2, train_wall=11, gb_free=2.8, wall=28560 2021-06-19 02:34:57 | INFO | train_inner | epoch 001: 2580 / 3002 loss=2.954, ppl=7.75, wps=5917.9, ups=0.09, wpb=64828, bsz=128, num_updates=2560, lr=9.99875e-05, gnorm=2.576, loss_scale=2, train_wall=10, gb_free=2.8, wall=28571 2021-06-19 02:35:08 | INFO | train_inner | epoch 001: 2581 / 3002 loss=2.98, ppl=7.89, wps=5844.4, ups=0.09, wpb=64831, bsz=128, num_updates=2561, lr=9.99875e-05, gnorm=2.642, loss_scale=2, train_wall=11, gb_free=2.8, wall=28582 2021-06-19 02:35:19 | INFO | train_inner | epoch 001: 2582 / 3002 loss=2.934, ppl=7.64, wps=5907.7, ups=0.09, wpb=64867, bsz=128, num_updates=2562, lr=9.99875e-05, gnorm=9.593, loss_scale=2, train_wall=11, gb_free=2.8, wall=28593 2021-06-19 02:35:30 | INFO | train_inner | epoch 001: 2583 / 3002 loss=2.873, ppl=7.32, wps=5924.6, ups=0.09, wpb=64832, bsz=128, num_updates=2563, lr=9.99875e-05, gnorm=6.625, loss_scale=2, train_wall=10, gb_free=2.8, wall=28604 2021-06-19 02:35:41 | INFO | train_inner | epoch 001: 2584 / 3002 loss=2.763, ppl=6.79, wps=5757.8, ups=0.09, wpb=64840, bsz=128, num_updates=2564, lr=9.99875e-05, gnorm=19.318, loss_scale=2, train_wall=11, gb_free=2.8, wall=28616 2021-06-19 02:35:52 | INFO | train_inner | epoch 001: 2585 / 3002 loss=2.984, ppl=7.91, wps=5890.6, ups=0.09, wpb=64853, bsz=128, num_updates=2565, lr=9.99875e-05, gnorm=2.738, loss_scale=2, train_wall=11, gb_free=2.8, wall=28627 2021-06-19 02:36:03 | INFO | train_inner | epoch 001: 2586 / 3002 loss=3.057, ppl=8.32, wps=5805.2, ups=0.09, wpb=64807, bsz=128, num_updates=2566, lr=9.99875e-05, gnorm=4.539, loss_scale=2, train_wall=11, gb_free=2.8, wall=28638 2021-06-19 02:36:15 | INFO | train_inner | epoch 001: 2587 / 3002 loss=3.068, ppl=8.39, wps=5826.6, ups=0.09, wpb=64804, bsz=128, num_updates=2567, lr=9.99875e-05, gnorm=3.037, loss_scale=2, train_wall=11, gb_free=2.8, wall=28649 2021-06-19 02:36:26 | INFO | train_inner | epoch 001: 2588 / 3002 loss=2.887, ppl=7.4, wps=5889.3, ups=0.09, wpb=64843, bsz=128, num_updates=2568, lr=9.99875e-05, gnorm=4.999, loss_scale=2, train_wall=11, gb_free=2.8, wall=28660 2021-06-19 02:36:37 | INFO | train_inner | epoch 001: 2589 / 3002 loss=3.089, ppl=8.51, wps=5832.7, ups=0.09, wpb=64844, bsz=128, num_updates=2569, lr=9.99874e-05, gnorm=2.943, loss_scale=2, train_wall=11, gb_free=2.8, wall=28671 2021-06-19 02:36:48 | INFO | train_inner | epoch 001: 2590 / 3002 loss=2.954, ppl=7.75, wps=5981.2, ups=0.09, wpb=64822, bsz=128, num_updates=2570, lr=9.99874e-05, gnorm=2.924, loss_scale=2, train_wall=10, gb_free=2.8, wall=28682 2021-06-19 02:36:59 | INFO | train_inner | epoch 001: 2591 / 3002 loss=3.007, ppl=8.04, wps=5897.9, ups=0.09, wpb=64853, bsz=128, num_updates=2571, lr=9.99874e-05, gnorm=3.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=28693 2021-06-19 02:37:10 | INFO | train_inner | epoch 001: 2592 / 3002 loss=2.999, ppl=7.99, wps=5779.2, ups=0.09, wpb=64774, bsz=128, num_updates=2572, lr=9.99874e-05, gnorm=2.889, loss_scale=2, train_wall=11, gb_free=2.8, wall=28704 2021-06-19 02:37:21 | INFO | train_inner | epoch 001: 2593 / 3002 loss=2.905, ppl=7.49, wps=5901.8, ups=0.09, wpb=64867, bsz=128, num_updates=2573, lr=9.99874e-05, gnorm=2.742, loss_scale=2, train_wall=11, gb_free=2.8, wall=28715 2021-06-19 02:37:32 | INFO | train_inner | epoch 001: 2594 / 3002 loss=2.818, ppl=7.05, wps=5853.1, ups=0.09, wpb=64824, bsz=128, num_updates=2574, lr=9.99874e-05, gnorm=2.867, loss_scale=2, train_wall=11, gb_free=2.8, wall=28726 2021-06-19 02:37:43 | INFO | train_inner | epoch 001: 2595 / 3002 loss=2.979, ppl=7.88, wps=5904, ups=0.09, wpb=64840, bsz=128, num_updates=2575, lr=9.99874e-05, gnorm=2.68, loss_scale=2, train_wall=11, gb_free=2.8, wall=28737 2021-06-19 02:37:54 | INFO | train_inner | epoch 001: 2596 / 3002 loss=3.049, ppl=8.27, wps=5935.9, ups=0.09, wpb=64862, bsz=128, num_updates=2576, lr=9.99874e-05, gnorm=5.816, loss_scale=2, train_wall=10, gb_free=2.8, wall=28748 2021-06-19 02:38:05 | INFO | train_inner | epoch 001: 2597 / 3002 loss=2.898, ppl=7.45, wps=5834.1, ups=0.09, wpb=64758, bsz=128, num_updates=2577, lr=9.99874e-05, gnorm=3.221, loss_scale=2, train_wall=11, gb_free=2.8, wall=28759 2021-06-19 02:38:16 | INFO | train_inner | epoch 001: 2598 / 3002 loss=2.905, ppl=7.49, wps=5794.8, ups=0.09, wpb=64791, bsz=128, num_updates=2578, lr=9.99874e-05, gnorm=3.106, loss_scale=2, train_wall=11, gb_free=2.8, wall=28770 2021-06-19 02:38:27 | INFO | train_inner | epoch 001: 2599 / 3002 loss=2.894, ppl=7.43, wps=5911.3, ups=0.09, wpb=64796, bsz=128, num_updates=2579, lr=9.99874e-05, gnorm=3.018, loss_scale=2, train_wall=10, gb_free=2.8, wall=28781 2021-06-19 02:38:38 | INFO | train_inner | epoch 001: 2600 / 3002 loss=3.036, ppl=8.2, wps=5866, ups=0.09, wpb=64836, bsz=128, num_updates=2580, lr=9.99874e-05, gnorm=2.726, loss_scale=2, train_wall=11, gb_free=2.8, wall=28792 2021-06-19 02:38:49 | INFO | train_inner | epoch 001: 2601 / 3002 loss=3.002, ppl=8.01, wps=6005.8, ups=0.09, wpb=64901, bsz=128, num_updates=2581, lr=9.99874e-05, gnorm=2.668, loss_scale=2, train_wall=10, gb_free=2.8, wall=28803 2021-06-19 02:39:00 | INFO | train_inner | epoch 001: 2602 / 3002 loss=3.058, ppl=8.33, wps=5873.6, ups=0.09, wpb=64817, bsz=128, num_updates=2582, lr=9.99873e-05, gnorm=2.829, loss_scale=2, train_wall=11, gb_free=2.8, wall=28814 2021-06-19 02:39:11 | INFO | train_inner | epoch 001: 2603 / 3002 loss=2.731, ppl=6.64, wps=5819.7, ups=0.09, wpb=64791, bsz=128, num_updates=2583, lr=9.99873e-05, gnorm=2.375, loss_scale=2, train_wall=11, gb_free=2.8, wall=28825 2021-06-19 02:39:22 | INFO | train_inner | epoch 001: 2604 / 3002 loss=2.976, ppl=7.87, wps=5937.5, ups=0.09, wpb=64863, bsz=128, num_updates=2584, lr=9.99873e-05, gnorm=5.467, loss_scale=2, train_wall=10, gb_free=2.8, wall=28836 2021-06-19 02:39:33 | INFO | train_inner | epoch 001: 2605 / 3002 loss=2.962, ppl=7.79, wps=5796, ups=0.09, wpb=64878, bsz=128, num_updates=2585, lr=9.99873e-05, gnorm=3.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=28847 2021-06-19 02:39:44 | INFO | train_inner | epoch 001: 2606 / 3002 loss=2.928, ppl=7.61, wps=5868.2, ups=0.09, wpb=64804, bsz=128, num_updates=2586, lr=9.99873e-05, gnorm=2.739, loss_scale=2, train_wall=11, gb_free=2.8, wall=28859 2021-06-19 02:39:55 | INFO | train_inner | epoch 001: 2607 / 3002 loss=2.9, ppl=7.46, wps=5822.3, ups=0.09, wpb=64741, bsz=128, num_updates=2587, lr=9.99873e-05, gnorm=2.623, loss_scale=2, train_wall=11, gb_free=2.8, wall=28870 2021-06-19 02:40:06 | INFO | train_inner | epoch 001: 2608 / 3002 loss=2.876, ppl=7.34, wps=5800.5, ups=0.09, wpb=64798, bsz=128, num_updates=2588, lr=9.99873e-05, gnorm=2.411, loss_scale=2, train_wall=11, gb_free=2.8, wall=28881 2021-06-19 02:40:18 | INFO | train_inner | epoch 001: 2609 / 3002 loss=2.969, ppl=7.83, wps=5818.5, ups=0.09, wpb=64831, bsz=128, num_updates=2589, lr=9.99873e-05, gnorm=3.398, loss_scale=2, train_wall=11, gb_free=2.8, wall=28892 2021-06-19 02:40:29 | INFO | train_inner | epoch 001: 2610 / 3002 loss=3.008, ppl=8.05, wps=5958.3, ups=0.09, wpb=64889, bsz=128, num_updates=2590, lr=9.99873e-05, gnorm=2.834, loss_scale=2, train_wall=10, gb_free=2.8, wall=28903 2021-06-19 02:40:39 | INFO | train_inner | epoch 001: 2611 / 3002 loss=2.893, ppl=7.43, wps=6002, ups=0.09, wpb=64808, bsz=128, num_updates=2591, lr=9.99873e-05, gnorm=4.431, loss_scale=2, train_wall=10, gb_free=2.8, wall=28914 2021-06-19 02:40:50 | INFO | train_inner | epoch 001: 2612 / 3002 loss=2.739, ppl=6.68, wps=6020, ups=0.09, wpb=64903, bsz=128, num_updates=2592, lr=9.99873e-05, gnorm=2.485, loss_scale=2, train_wall=10, gb_free=2.8, wall=28924 2021-06-19 02:41:01 | INFO | train_inner | epoch 001: 2613 / 3002 loss=2.846, ppl=7.19, wps=5834, ups=0.09, wpb=64863, bsz=128, num_updates=2593, lr=9.99873e-05, gnorm=2.459, loss_scale=2, train_wall=11, gb_free=2.8, wall=28936 2021-06-19 02:41:12 | INFO | train_inner | epoch 001: 2614 / 3002 loss=2.938, ppl=7.66, wps=5835.4, ups=0.09, wpb=64847, bsz=128, num_updates=2594, lr=9.99872e-05, gnorm=3.098, loss_scale=2, train_wall=11, gb_free=2.8, wall=28947 2021-06-19 02:41:23 | INFO | train_inner | epoch 001: 2615 / 3002 loss=2.806, ppl=6.99, wps=5895.4, ups=0.09, wpb=64872, bsz=128, num_updates=2595, lr=9.99872e-05, gnorm=2.44, loss_scale=2, train_wall=11, gb_free=2.8, wall=28958 2021-06-19 02:41:34 | INFO | train_inner | epoch 001: 2616 / 3002 loss=3.054, ppl=8.3, wps=5851.9, ups=0.09, wpb=64761, bsz=128, num_updates=2596, lr=9.99872e-05, gnorm=2.655, loss_scale=2, train_wall=11, gb_free=2.8, wall=28969 2021-06-19 02:41:45 | INFO | train_inner | epoch 001: 2617 / 3002 loss=2.941, ppl=7.68, wps=5848.6, ups=0.09, wpb=64852, bsz=128, num_updates=2597, lr=9.99872e-05, gnorm=2.408, loss_scale=2, train_wall=11, gb_free=2.8, wall=28980 2021-06-19 02:41:56 | INFO | train_inner | epoch 001: 2618 / 3002 loss=2.847, ppl=7.2, wps=5946.2, ups=0.09, wpb=64951, bsz=128, num_updates=2598, lr=9.99872e-05, gnorm=2.443, loss_scale=2, train_wall=10, gb_free=2.8, wall=28991 2021-06-19 02:42:08 | INFO | train_inner | epoch 001: 2619 / 3002 loss=2.827, ppl=7.1, wps=5780, ups=0.09, wpb=64824, bsz=128, num_updates=2599, lr=9.99872e-05, gnorm=7.084, loss_scale=2, train_wall=11, gb_free=2.8, wall=29002 2021-06-19 02:42:19 | INFO | train_inner | epoch 001: 2620 / 3002 loss=2.844, ppl=7.18, wps=5820, ups=0.09, wpb=64796, bsz=128, num_updates=2600, lr=9.99872e-05, gnorm=2.564, loss_scale=2, train_wall=11, gb_free=2.8, wall=29013 2021-06-19 02:42:30 | INFO | train_inner | epoch 001: 2621 / 3002 loss=3.009, ppl=8.05, wps=5872.6, ups=0.09, wpb=64788, bsz=128, num_updates=2601, lr=9.99872e-05, gnorm=2.762, loss_scale=2, train_wall=11, gb_free=2.8, wall=29024 2021-06-19 02:42:41 | INFO | train_inner | epoch 001: 2622 / 3002 loss=3.011, ppl=8.06, wps=5886.6, ups=0.09, wpb=64825, bsz=128, num_updates=2602, lr=9.99872e-05, gnorm=2.451, loss_scale=2, train_wall=11, gb_free=2.8, wall=29035 2021-06-19 02:42:52 | INFO | train_inner | epoch 001: 2623 / 3002 loss=3.038, ppl=8.22, wps=5944.4, ups=0.09, wpb=64922, bsz=128, num_updates=2603, lr=9.99872e-05, gnorm=2.436, loss_scale=2, train_wall=10, gb_free=2.8, wall=29046 2021-06-19 02:43:03 | INFO | train_inner | epoch 001: 2624 / 3002 loss=2.883, ppl=7.38, wps=5865.3, ups=0.09, wpb=64771, bsz=128, num_updates=2604, lr=9.99872e-05, gnorm=2.419, loss_scale=2, train_wall=11, gb_free=2.8, wall=29057 2021-06-19 02:43:14 | INFO | train_inner | epoch 001: 2625 / 3002 loss=2.945, ppl=7.7, wps=5985.5, ups=0.09, wpb=64856, bsz=128, num_updates=2605, lr=9.99872e-05, gnorm=2.645, loss_scale=2, train_wall=10, gb_free=2.8, wall=29068 2021-06-19 02:43:25 | INFO | train_inner | epoch 001: 2626 / 3002 loss=2.984, ppl=7.91, wps=5925.5, ups=0.09, wpb=64938, bsz=128, num_updates=2606, lr=9.99872e-05, gnorm=3.794, loss_scale=2, train_wall=11, gb_free=2.8, wall=29079 2021-06-19 02:43:36 | INFO | train_inner | epoch 001: 2627 / 3002 loss=3.073, ppl=8.42, wps=5791.9, ups=0.09, wpb=64828, bsz=128, num_updates=2607, lr=9.99871e-05, gnorm=2.517, loss_scale=2, train_wall=11, gb_free=2.8, wall=29090 2021-06-19 02:43:47 | INFO | train_inner | epoch 001: 2628 / 3002 loss=2.769, ppl=6.81, wps=5808.7, ups=0.09, wpb=64807, bsz=128, num_updates=2608, lr=9.99871e-05, gnorm=2.385, loss_scale=2, train_wall=11, gb_free=2.8, wall=29101 2021-06-19 02:43:58 | INFO | train_inner | epoch 001: 2629 / 3002 loss=2.81, ppl=7.01, wps=5948.9, ups=0.09, wpb=64902, bsz=128, num_updates=2609, lr=9.99871e-05, gnorm=2.413, loss_scale=2, train_wall=10, gb_free=2.8, wall=29112 2021-06-19 02:44:09 | INFO | train_inner | epoch 001: 2630 / 3002 loss=2.832, ppl=7.12, wps=5760.2, ups=0.09, wpb=64801, bsz=128, num_updates=2610, lr=9.99871e-05, gnorm=2.433, loss_scale=2, train_wall=11, gb_free=2.8, wall=29123 2021-06-19 02:44:20 | INFO | train_inner | epoch 001: 2631 / 3002 loss=2.782, ppl=6.88, wps=5838.8, ups=0.09, wpb=64917, bsz=128, num_updates=2611, lr=9.99871e-05, gnorm=2.598, loss_scale=2, train_wall=11, gb_free=2.8, wall=29135 2021-06-19 02:44:31 | INFO | train_inner | epoch 001: 2632 / 3002 loss=2.806, ppl=6.99, wps=5855.4, ups=0.09, wpb=64958, bsz=128, num_updates=2612, lr=9.99871e-05, gnorm=2.45, loss_scale=2, train_wall=11, gb_free=2.8, wall=29146 2021-06-19 02:44:42 | INFO | train_inner | epoch 001: 2633 / 3002 loss=2.841, ppl=7.17, wps=5854.5, ups=0.09, wpb=64831, bsz=128, num_updates=2613, lr=9.99871e-05, gnorm=2.377, loss_scale=2, train_wall=11, gb_free=2.8, wall=29157 2021-06-19 02:44:53 | INFO | train_inner | epoch 001: 2634 / 3002 loss=2.814, ppl=7.03, wps=5851, ups=0.09, wpb=64807, bsz=128, num_updates=2614, lr=9.99871e-05, gnorm=2.449, loss_scale=2, train_wall=11, gb_free=2.8, wall=29168 2021-06-19 02:45:05 | INFO | train_inner | epoch 001: 2635 / 3002 loss=2.945, ppl=7.7, wps=5803.3, ups=0.09, wpb=64764, bsz=128, num_updates=2615, lr=9.99871e-05, gnorm=2.438, loss_scale=2, train_wall=11, gb_free=2.8, wall=29179 2021-06-19 02:45:16 | INFO | train_inner | epoch 001: 2636 / 3002 loss=3.058, ppl=8.33, wps=5859.7, ups=0.09, wpb=64835, bsz=128, num_updates=2616, lr=9.99871e-05, gnorm=2.46, loss_scale=2, train_wall=11, gb_free=2.8, wall=29190 2021-06-19 02:45:27 | INFO | train_inner | epoch 001: 2637 / 3002 loss=2.893, ppl=7.43, wps=5819.7, ups=0.09, wpb=64812, bsz=128, num_updates=2617, lr=9.99871e-05, gnorm=2.533, loss_scale=2, train_wall=11, gb_free=2.8, wall=29201 2021-06-19 02:45:38 | INFO | train_inner | epoch 001: 2638 / 3002 loss=3.038, ppl=8.22, wps=5933.6, ups=0.09, wpb=64809, bsz=128, num_updates=2618, lr=9.99871e-05, gnorm=2.318, loss_scale=2, train_wall=10, gb_free=2.8, wall=29212 2021-06-19 02:45:49 | INFO | train_inner | epoch 001: 2639 / 3002 loss=2.854, ppl=7.23, wps=5986.5, ups=0.09, wpb=64907, bsz=128, num_updates=2619, lr=9.9987e-05, gnorm=2.405, loss_scale=2, train_wall=10, gb_free=2.8, wall=29223 2021-06-19 02:46:00 | INFO | train_inner | epoch 001: 2640 / 3002 loss=2.807, ppl=7, wps=5896.1, ups=0.09, wpb=64833, bsz=128, num_updates=2620, lr=9.9987e-05, gnorm=2.371, loss_scale=2, train_wall=11, gb_free=2.8, wall=29234 2021-06-19 02:46:11 | INFO | train_inner | epoch 001: 2641 / 3002 loss=2.902, ppl=7.47, wps=5821.5, ups=0.09, wpb=64812, bsz=128, num_updates=2621, lr=9.9987e-05, gnorm=2.406, loss_scale=2, train_wall=11, gb_free=2.8, wall=29245 2021-06-19 02:46:21 | INFO | train_inner | epoch 001: 2642 / 3002 loss=3.054, ppl=8.31, wps=6009.7, ups=0.09, wpb=64888, bsz=128, num_updates=2622, lr=9.9987e-05, gnorm=2.422, loss_scale=2, train_wall=10, gb_free=2.8, wall=29256 2021-06-19 02:46:32 | INFO | train_inner | epoch 001: 2643 / 3002 loss=2.916, ppl=7.55, wps=5892.6, ups=0.09, wpb=64804, bsz=128, num_updates=2623, lr=9.9987e-05, gnorm=2.495, loss_scale=2, train_wall=11, gb_free=2.8, wall=29267 2021-06-19 02:46:44 | INFO | train_inner | epoch 001: 2644 / 3002 loss=3.161, ppl=8.95, wps=5793.4, ups=0.09, wpb=64829, bsz=128, num_updates=2624, lr=9.9987e-05, gnorm=2.495, loss_scale=2, train_wall=11, gb_free=2.8, wall=29278 2021-06-19 02:46:55 | INFO | train_inner | epoch 001: 2645 / 3002 loss=3.124, ppl=8.72, wps=5888.7, ups=0.09, wpb=64780, bsz=128, num_updates=2625, lr=9.9987e-05, gnorm=2.483, loss_scale=2, train_wall=11, gb_free=2.8, wall=29289 2021-06-19 02:47:06 | INFO | train_inner | epoch 001: 2646 / 3002 loss=2.92, ppl=7.57, wps=5774.6, ups=0.09, wpb=64721, bsz=128, num_updates=2626, lr=9.9987e-05, gnorm=2.748, loss_scale=2, train_wall=11, gb_free=2.8, wall=29300 2021-06-19 02:47:17 | INFO | train_inner | epoch 001: 2647 / 3002 loss=2.775, ppl=6.85, wps=5898.8, ups=0.09, wpb=64903, bsz=128, num_updates=2627, lr=9.9987e-05, gnorm=2.348, loss_scale=2, train_wall=11, gb_free=2.8, wall=29311 2021-06-19 02:47:28 | INFO | train_inner | epoch 001: 2648 / 3002 loss=2.847, ppl=7.19, wps=6029.8, ups=0.09, wpb=64826, bsz=128, num_updates=2628, lr=9.9987e-05, gnorm=2.398, loss_scale=2, train_wall=10, gb_free=2.8, wall=29322 2021-06-19 02:47:39 | INFO | train_inner | epoch 001: 2649 / 3002 loss=2.885, ppl=7.39, wps=5888.1, ups=0.09, wpb=64830, bsz=128, num_updates=2629, lr=9.9987e-05, gnorm=2.709, loss_scale=2, train_wall=11, gb_free=2.8, wall=29333 2021-06-19 02:47:50 | INFO | train_inner | epoch 001: 2650 / 3002 loss=3.018, ppl=8.1, wps=5847.4, ups=0.09, wpb=64791, bsz=128, num_updates=2630, lr=9.9987e-05, gnorm=2.399, loss_scale=2, train_wall=11, gb_free=2.8, wall=29344 2021-06-19 02:48:01 | INFO | train_inner | epoch 001: 2651 / 3002 loss=2.853, ppl=7.23, wps=5925.1, ups=0.09, wpb=64825, bsz=128, num_updates=2631, lr=9.9987e-05, gnorm=2.468, loss_scale=2, train_wall=10, gb_free=2.8, wall=29355 2021-06-19 02:48:12 | INFO | train_inner | epoch 001: 2652 / 3002 loss=2.977, ppl=7.87, wps=5903.2, ups=0.09, wpb=64830, bsz=128, num_updates=2632, lr=9.99869e-05, gnorm=2.625, loss_scale=2, train_wall=11, gb_free=2.8, wall=29366 2021-06-19 02:48:23 | INFO | train_inner | epoch 001: 2653 / 3002 loss=2.911, ppl=7.52, wps=5805, ups=0.09, wpb=64850, bsz=128, num_updates=2633, lr=9.99869e-05, gnorm=2.76, loss_scale=2, train_wall=11, gb_free=2.8, wall=29377 2021-06-19 02:48:34 | INFO | train_inner | epoch 001: 2654 / 3002 loss=3.016, ppl=8.09, wps=5934.2, ups=0.09, wpb=64863, bsz=128, num_updates=2634, lr=9.99869e-05, gnorm=2.377, loss_scale=2, train_wall=10, gb_free=2.8, wall=29388 2021-06-19 02:48:45 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 2021-06-19 02:48:56 | INFO | train_inner | epoch 001: 2656 / 3002 loss=2.949, ppl=7.72, wps=2944.6, ups=0.05, wpb=64860, bsz=128, num_updates=2635, lr=9.99869e-05, gnorm=2.36, loss_scale=1, train_wall=21, gb_free=2.8, wall=29410 2021-06-19 02:49:07 | INFO | train_inner | epoch 001: 2657 / 3002 loss=2.742, ppl=6.69, wps=5867, ups=0.09, wpb=64893, bsz=128, num_updates=2636, lr=9.99869e-05, gnorm=2.404, loss_scale=1, train_wall=11, gb_free=2.8, wall=29421 2021-06-19 02:49:18 | INFO | train_inner | epoch 001: 2658 / 3002 loss=2.903, ppl=7.48, wps=5741.5, ups=0.09, wpb=64868, bsz=128, num_updates=2637, lr=9.99869e-05, gnorm=2.871, loss_scale=1, train_wall=11, gb_free=2.8, wall=29432 2021-06-19 02:49:29 | INFO | train_inner | epoch 001: 2659 / 3002 loss=2.903, ppl=7.48, wps=5766.9, ups=0.09, wpb=64814, bsz=128, num_updates=2638, lr=9.99869e-05, gnorm=6.687, loss_scale=1, train_wall=11, gb_free=2.8, wall=29444 2021-06-19 02:49:40 | INFO | train_inner | epoch 001: 2660 / 3002 loss=2.896, ppl=7.44, wps=5893.2, ups=0.09, wpb=64906, bsz=128, num_updates=2639, lr=9.99869e-05, gnorm=2.519, loss_scale=1, train_wall=11, gb_free=2.8, wall=29455 2021-06-19 02:49:51 | INFO | train_inner | epoch 001: 2661 / 3002 loss=2.717, ppl=6.57, wps=5963.9, ups=0.09, wpb=64894, bsz=128, num_updates=2640, lr=9.99869e-05, gnorm=2.388, loss_scale=1, train_wall=10, gb_free=2.8, wall=29466 2021-06-19 02:50:02 | INFO | train_inner | epoch 001: 2662 / 3002 loss=2.909, ppl=7.51, wps=5889.4, ups=0.09, wpb=64836, bsz=128, num_updates=2641, lr=9.99869e-05, gnorm=2.646, loss_scale=1, train_wall=11, gb_free=2.8, wall=29477 2021-06-19 02:50:13 | INFO | train_inner | epoch 001: 2663 / 3002 loss=2.748, ppl=6.72, wps=5839.7, ups=0.09, wpb=64868, bsz=128, num_updates=2642, lr=9.99869e-05, gnorm=2.329, loss_scale=1, train_wall=11, gb_free=2.8, wall=29488 2021-06-19 02:50:25 | INFO | train_inner | epoch 001: 2664 / 3002 loss=2.764, ppl=6.79, wps=5825.8, ups=0.09, wpb=64787, bsz=128, num_updates=2643, lr=9.99869e-05, gnorm=2.434, loss_scale=1, train_wall=11, gb_free=2.8, wall=29499 2021-06-19 02:50:36 | INFO | train_inner | epoch 001: 2665 / 3002 loss=2.989, ppl=7.94, wps=5767.3, ups=0.09, wpb=64813, bsz=128, num_updates=2644, lr=9.99868e-05, gnorm=2.533, loss_scale=1, train_wall=11, gb_free=2.8, wall=29510 2021-06-19 02:50:47 | INFO | train_inner | epoch 001: 2666 / 3002 loss=3.142, ppl=8.83, wps=5826.4, ups=0.09, wpb=64876, bsz=128, num_updates=2645, lr=9.99868e-05, gnorm=2.373, loss_scale=1, train_wall=11, gb_free=2.8, wall=29521 2021-06-19 02:50:58 | INFO | train_inner | epoch 001: 2667 / 3002 loss=2.813, ppl=7.03, wps=5884.3, ups=0.09, wpb=64855, bsz=128, num_updates=2646, lr=9.99868e-05, gnorm=2.456, loss_scale=1, train_wall=11, gb_free=2.8, wall=29532 2021-06-19 02:51:09 | INFO | train_inner | epoch 001: 2668 / 3002 loss=3.074, ppl=8.42, wps=5806.5, ups=0.09, wpb=64807, bsz=128, num_updates=2647, lr=9.99868e-05, gnorm=2.389, loss_scale=1, train_wall=11, gb_free=2.8, wall=29543 2021-06-19 02:51:20 | INFO | train_inner | epoch 001: 2669 / 3002 loss=2.923, ppl=7.59, wps=6038.8, ups=0.09, wpb=64832, bsz=128, num_updates=2648, lr=9.99868e-05, gnorm=2.718, loss_scale=1, train_wall=10, gb_free=2.8, wall=29554 2021-06-19 02:51:31 | INFO | train_inner | epoch 001: 2670 / 3002 loss=2.858, ppl=7.25, wps=5951.1, ups=0.09, wpb=64810, bsz=128, num_updates=2649, lr=9.99868e-05, gnorm=4.241, loss_scale=1, train_wall=10, gb_free=2.8, wall=29565 2021-06-19 02:51:42 | INFO | train_inner | epoch 001: 2671 / 3002 loss=2.94, ppl=7.67, wps=5840.1, ups=0.09, wpb=64765, bsz=128, num_updates=2650, lr=9.99868e-05, gnorm=2.36, loss_scale=1, train_wall=11, gb_free=2.8, wall=29576 2021-06-19 02:51:53 | INFO | train_inner | epoch 001: 2672 / 3002 loss=2.849, ppl=7.2, wps=5814.2, ups=0.09, wpb=64882, bsz=128, num_updates=2651, lr=9.99868e-05, gnorm=2.423, loss_scale=1, train_wall=11, gb_free=2.8, wall=29587 2021-06-19 02:52:04 | INFO | train_inner | epoch 001: 2673 / 3002 loss=2.716, ppl=6.57, wps=5930.5, ups=0.09, wpb=64927, bsz=128, num_updates=2652, lr=9.99868e-05, gnorm=2.26, loss_scale=1, train_wall=11, gb_free=2.8, wall=29598 2021-06-19 02:52:15 | INFO | train_inner | epoch 001: 2674 / 3002 loss=2.954, ppl=7.75, wps=5895.6, ups=0.09, wpb=64857, bsz=128, num_updates=2653, lr=9.99868e-05, gnorm=2.378, loss_scale=1, train_wall=11, gb_free=2.8, wall=29609 2021-06-19 02:52:26 | INFO | train_inner | epoch 001: 2675 / 3002 loss=3.003, ppl=8.02, wps=5912.6, ups=0.09, wpb=64792, bsz=128, num_updates=2654, lr=9.99868e-05, gnorm=3.632, loss_scale=1, train_wall=11, gb_free=2.8, wall=29620 2021-06-19 02:52:37 | INFO | train_inner | epoch 001: 2676 / 3002 loss=2.886, ppl=7.39, wps=5916.6, ups=0.09, wpb=64850, bsz=128, num_updates=2655, lr=9.99868e-05, gnorm=2.359, loss_scale=1, train_wall=11, gb_free=2.8, wall=29631 2021-06-19 02:52:48 | INFO | train_inner | epoch 001: 2677 / 3002 loss=3.069, ppl=8.39, wps=5788.5, ups=0.09, wpb=64736, bsz=128, num_updates=2656, lr=9.99868e-05, gnorm=2.319, loss_scale=1, train_wall=11, gb_free=2.8, wall=29642 2021-06-19 02:52:59 | INFO | train_inner | epoch 001: 2678 / 3002 loss=2.939, ppl=7.67, wps=6112.7, ups=0.09, wpb=64904, bsz=128, num_updates=2657, lr=9.99867e-05, gnorm=2.464, loss_scale=1, train_wall=10, gb_free=2.8, wall=29653 2021-06-19 02:53:10 | INFO | train_inner | epoch 001: 2679 / 3002 loss=2.788, ppl=6.91, wps=5826.2, ups=0.09, wpb=64829, bsz=128, num_updates=2658, lr=9.99867e-05, gnorm=2.382, loss_scale=1, train_wall=11, gb_free=2.8, wall=29664 2021-06-19 02:53:21 | INFO | train_inner | epoch 001: 2680 / 3002 loss=2.875, ppl=7.33, wps=5834, ups=0.09, wpb=64884, bsz=128, num_updates=2659, lr=9.99867e-05, gnorm=2.502, loss_scale=1, train_wall=11, gb_free=2.8, wall=29675 2021-06-19 02:53:32 | INFO | train_inner | epoch 001: 2681 / 3002 loss=2.812, ppl=7.02, wps=5883.7, ups=0.09, wpb=64879, bsz=128, num_updates=2660, lr=9.99867e-05, gnorm=2.339, loss_scale=1, train_wall=11, gb_free=2.8, wall=29686 2021-06-19 02:53:43 | INFO | train_inner | epoch 001: 2682 / 3002 loss=2.945, ppl=7.7, wps=5901.1, ups=0.09, wpb=64821, bsz=128, num_updates=2661, lr=9.99867e-05, gnorm=2.525, loss_scale=1, train_wall=11, gb_free=2.8, wall=29697 2021-06-19 02:53:54 | INFO | train_inner | epoch 001: 2683 / 3002 loss=2.98, ppl=7.89, wps=5842.5, ups=0.09, wpb=64900, bsz=128, num_updates=2662, lr=9.99867e-05, gnorm=2.36, loss_scale=1, train_wall=11, gb_free=2.8, wall=29708 2021-06-19 02:54:05 | INFO | train_inner | epoch 001: 2684 / 3002 loss=2.833, ppl=7.12, wps=5878.5, ups=0.09, wpb=64820, bsz=128, num_updates=2663, lr=9.99867e-05, gnorm=2.569, loss_scale=1, train_wall=11, gb_free=2.8, wall=29719 2021-06-19 02:54:16 | INFO | train_inner | epoch 001: 2685 / 3002 loss=2.873, ppl=7.32, wps=5884.1, ups=0.09, wpb=64828, bsz=128, num_updates=2664, lr=9.99867e-05, gnorm=2.341, loss_scale=1, train_wall=11, gb_free=2.8, wall=29730 2021-06-19 02:54:27 | INFO | train_inner | epoch 001: 2686 / 3002 loss=3.003, ppl=8.01, wps=5753.7, ups=0.09, wpb=64733, bsz=128, num_updates=2665, lr=9.99867e-05, gnorm=2.336, loss_scale=1, train_wall=11, gb_free=2.8, wall=29742 2021-06-19 02:54:38 | INFO | train_inner | epoch 001: 2687 / 3002 loss=2.848, ppl=7.2, wps=5857.9, ups=0.09, wpb=64852, bsz=128, num_updates=2666, lr=9.99867e-05, gnorm=2.403, loss_scale=1, train_wall=11, gb_free=2.8, wall=29753 2021-06-19 02:54:49 | INFO | train_inner | epoch 001: 2688 / 3002 loss=2.899, ppl=7.46, wps=5832.8, ups=0.09, wpb=64909, bsz=128, num_updates=2667, lr=9.99867e-05, gnorm=2.331, loss_scale=1, train_wall=11, gb_free=2.8, wall=29764 2021-06-19 02:55:01 | INFO | train_inner | epoch 001: 2689 / 3002 loss=2.984, ppl=7.91, wps=5841.4, ups=0.09, wpb=64851, bsz=128, num_updates=2668, lr=9.99867e-05, gnorm=2.687, loss_scale=1, train_wall=11, gb_free=2.8, wall=29775 2021-06-19 02:55:12 | INFO | train_inner | epoch 001: 2690 / 3002 loss=2.699, ppl=6.49, wps=5826.7, ups=0.09, wpb=64900, bsz=128, num_updates=2669, lr=9.99866e-05, gnorm=2.253, loss_scale=1, train_wall=11, gb_free=2.8, wall=29786 2021-06-19 02:55:23 | INFO | train_inner | epoch 001: 2691 / 3002 loss=2.806, ppl=6.99, wps=5866.6, ups=0.09, wpb=64851, bsz=128, num_updates=2670, lr=9.99866e-05, gnorm=2.375, loss_scale=1, train_wall=11, gb_free=2.8, wall=29797 2021-06-19 02:55:34 | INFO | train_inner | epoch 001: 2692 / 3002 loss=2.907, ppl=7.5, wps=5878.6, ups=0.09, wpb=64778, bsz=128, num_updates=2671, lr=9.99866e-05, gnorm=2.36, loss_scale=1, train_wall=11, gb_free=2.8, wall=29808 2021-06-19 02:55:45 | INFO | train_inner | epoch 001: 2693 / 3002 loss=2.809, ppl=7.01, wps=5926.4, ups=0.09, wpb=64806, bsz=128, num_updates=2672, lr=9.99866e-05, gnorm=2.428, loss_scale=1, train_wall=11, gb_free=2.8, wall=29819 2021-06-19 02:55:56 | INFO | train_inner | epoch 001: 2694 / 3002 loss=3.041, ppl=8.23, wps=5979.6, ups=0.09, wpb=64872, bsz=128, num_updates=2673, lr=9.99866e-05, gnorm=2.511, loss_scale=1, train_wall=10, gb_free=2.8, wall=29830 2021-06-19 02:56:07 | INFO | train_inner | epoch 001: 2695 / 3002 loss=2.698, ppl=6.49, wps=5709.7, ups=0.09, wpb=64843, bsz=128, num_updates=2674, lr=9.99866e-05, gnorm=2.437, loss_scale=1, train_wall=11, gb_free=2.8, wall=29841 2021-06-19 02:56:18 | INFO | train_inner | epoch 001: 2696 / 3002 loss=2.892, ppl=7.42, wps=5948, ups=0.09, wpb=64842, bsz=128, num_updates=2675, lr=9.99866e-05, gnorm=2.418, loss_scale=1, train_wall=10, gb_free=2.8, wall=29852 2021-06-19 02:56:29 | INFO | train_inner | epoch 001: 2697 / 3002 loss=2.976, ppl=7.87, wps=5805, ups=0.09, wpb=64841, bsz=128, num_updates=2676, lr=9.99866e-05, gnorm=2.438, loss_scale=1, train_wall=11, gb_free=2.8, wall=29863 2021-06-19 02:56:40 | INFO | train_inner | epoch 001: 2698 / 3002 loss=3.005, ppl=8.03, wps=5869.8, ups=0.09, wpb=64923, bsz=128, num_updates=2677, lr=9.99866e-05, gnorm=2.539, loss_scale=1, train_wall=11, gb_free=2.8, wall=29874 2021-06-19 02:56:51 | INFO | train_inner | epoch 001: 2699 / 3002 loss=2.866, ppl=7.29, wps=5891.5, ups=0.09, wpb=64819, bsz=128, num_updates=2678, lr=9.99866e-05, gnorm=5.749, loss_scale=1, train_wall=11, gb_free=2.8, wall=29885 2021-06-19 02:57:02 | INFO | train_inner | epoch 001: 2700 / 3002 loss=2.933, ppl=7.64, wps=5877.5, ups=0.09, wpb=64813, bsz=128, num_updates=2679, lr=9.99866e-05, gnorm=2.353, loss_scale=1, train_wall=11, gb_free=2.8, wall=29896 2021-06-19 02:57:13 | INFO | train_inner | epoch 001: 2701 / 3002 loss=2.899, ppl=7.46, wps=5843.1, ups=0.09, wpb=64844, bsz=128, num_updates=2680, lr=9.99866e-05, gnorm=2.341, loss_scale=1, train_wall=11, gb_free=2.8, wall=29908 2021-06-19 02:57:24 | INFO | train_inner | epoch 001: 2702 / 3002 loss=3.027, ppl=8.15, wps=5751.6, ups=0.09, wpb=64858, bsz=128, num_updates=2681, lr=9.99866e-05, gnorm=2.388, loss_scale=1, train_wall=11, gb_free=2.8, wall=29919 2021-06-19 02:57:36 | INFO | train_inner | epoch 001: 2703 / 3002 loss=2.794, ppl=6.93, wps=5824.7, ups=0.09, wpb=64856, bsz=128, num_updates=2682, lr=9.99865e-05, gnorm=2.415, loss_scale=1, train_wall=11, gb_free=2.8, wall=29930 2021-06-19 02:57:47 | INFO | train_inner | epoch 001: 2704 / 3002 loss=2.734, ppl=6.65, wps=5857.8, ups=0.09, wpb=64765, bsz=128, num_updates=2683, lr=9.99865e-05, gnorm=2.379, loss_scale=1, train_wall=11, gb_free=2.8, wall=29941 2021-06-19 02:57:58 | INFO | train_inner | epoch 001: 2705 / 3002 loss=2.917, ppl=7.55, wps=5883.7, ups=0.09, wpb=64912, bsz=128, num_updates=2684, lr=9.99865e-05, gnorm=2.54, loss_scale=1, train_wall=11, gb_free=2.8, wall=29952 2021-06-19 02:58:09 | INFO | train_inner | epoch 001: 2706 / 3002 loss=2.832, ppl=7.12, wps=5808.4, ups=0.09, wpb=64745, bsz=128, num_updates=2685, lr=9.99865e-05, gnorm=2.351, loss_scale=1, train_wall=11, gb_free=2.8, wall=29963 2021-06-19 02:58:20 | INFO | train_inner | epoch 001: 2707 / 3002 loss=2.929, ppl=7.62, wps=5900.7, ups=0.09, wpb=64870, bsz=128, num_updates=2686, lr=9.99865e-05, gnorm=2.337, loss_scale=1, train_wall=11, gb_free=2.8, wall=29974 2021-06-19 02:58:31 | INFO | train_inner | epoch 001: 2708 / 3002 loss=2.989, ppl=7.94, wps=5885.5, ups=0.09, wpb=64867, bsz=128, num_updates=2687, lr=9.99865e-05, gnorm=2.396, loss_scale=1, train_wall=11, gb_free=2.8, wall=29985 2021-06-19 02:58:42 | INFO | train_inner | epoch 001: 2709 / 3002 loss=2.772, ppl=6.83, wps=5797.9, ups=0.09, wpb=64776, bsz=128, num_updates=2688, lr=9.99865e-05, gnorm=2.684, loss_scale=1, train_wall=11, gb_free=2.8, wall=29996 2021-06-19 02:58:53 | INFO | train_inner | epoch 001: 2710 / 3002 loss=2.652, ppl=6.29, wps=5837.7, ups=0.09, wpb=64863, bsz=128, num_updates=2689, lr=9.99865e-05, gnorm=7.233, loss_scale=1, train_wall=11, gb_free=2.8, wall=30007 2021-06-19 02:59:04 | INFO | train_inner | epoch 001: 2711 / 3002 loss=2.733, ppl=6.65, wps=5876.7, ups=0.09, wpb=64845, bsz=128, num_updates=2690, lr=9.99865e-05, gnorm=2.355, loss_scale=1, train_wall=11, gb_free=2.8, wall=30019 2021-06-19 02:59:15 | INFO | train_inner | epoch 001: 2712 / 3002 loss=3.009, ppl=8.05, wps=5892.2, ups=0.09, wpb=64820, bsz=128, num_updates=2691, lr=9.99865e-05, gnorm=2.61, loss_scale=1, train_wall=11, gb_free=2.8, wall=30030 2021-06-19 02:59:26 | INFO | train_inner | epoch 001: 2713 / 3002 loss=2.786, ppl=6.9, wps=5895.4, ups=0.09, wpb=64814, bsz=128, num_updates=2692, lr=9.99865e-05, gnorm=7.02, loss_scale=1, train_wall=11, gb_free=2.8, wall=30041 2021-06-19 02:59:37 | INFO | train_inner | epoch 001: 2714 / 3002 loss=2.635, ppl=6.21, wps=5897.9, ups=0.09, wpb=64884, bsz=128, num_updates=2693, lr=9.99865e-05, gnorm=2.778, loss_scale=1, train_wall=11, gb_free=2.8, wall=30052 2021-06-19 02:59:48 | INFO | train_inner | epoch 001: 2715 / 3002 loss=2.917, ppl=7.55, wps=5795.4, ups=0.09, wpb=64845, bsz=128, num_updates=2694, lr=9.99864e-05, gnorm=5.651, loss_scale=1, train_wall=11, gb_free=2.8, wall=30063 2021-06-19 03:00:00 | INFO | train_inner | epoch 001: 2716 / 3002 loss=2.88, ppl=7.36, wps=5796.6, ups=0.09, wpb=64822, bsz=128, num_updates=2695, lr=9.99864e-05, gnorm=2.78, loss_scale=1, train_wall=11, gb_free=2.8, wall=30074 2021-06-19 03:00:11 | INFO | train_inner | epoch 001: 2717 / 3002 loss=2.806, ppl=6.99, wps=5732.8, ups=0.09, wpb=64753, bsz=128, num_updates=2696, lr=9.99864e-05, gnorm=2.317, loss_scale=1, train_wall=11, gb_free=2.8, wall=30085 2021-06-19 03:00:22 | INFO | train_inner | epoch 001: 2718 / 3002 loss=2.773, ppl=6.84, wps=5868.8, ups=0.09, wpb=64821, bsz=128, num_updates=2697, lr=9.99864e-05, gnorm=2.424, loss_scale=1, train_wall=11, gb_free=2.8, wall=30096 2021-06-19 03:00:33 | INFO | train_inner | epoch 001: 2719 / 3002 loss=3.072, ppl=8.41, wps=5923.8, ups=0.09, wpb=64771, bsz=128, num_updates=2698, lr=9.99864e-05, gnorm=2.723, loss_scale=1, train_wall=11, gb_free=2.8, wall=30107 2021-06-19 03:00:44 | INFO | train_inner | epoch 001: 2720 / 3002 loss=2.982, ppl=7.9, wps=5830.4, ups=0.09, wpb=64735, bsz=128, num_updates=2699, lr=9.99864e-05, gnorm=2.319, loss_scale=1, train_wall=11, gb_free=2.8, wall=30118 2021-06-19 03:00:55 | INFO | train_inner | epoch 001: 2721 / 3002 loss=2.858, ppl=7.25, wps=5806.7, ups=0.09, wpb=64904, bsz=128, num_updates=2700, lr=9.99864e-05, gnorm=2.429, loss_scale=1, train_wall=11, gb_free=2.8, wall=30129 2021-06-19 03:01:06 | INFO | train_inner | epoch 001: 2722 / 3002 loss=2.943, ppl=7.69, wps=5822.6, ups=0.09, wpb=64806, bsz=128, num_updates=2701, lr=9.99864e-05, gnorm=2.331, loss_scale=1, train_wall=11, gb_free=2.8, wall=30141 2021-06-19 03:01:17 | INFO | train_inner | epoch 001: 2723 / 3002 loss=2.865, ppl=7.29, wps=5898.3, ups=0.09, wpb=64841, bsz=128, num_updates=2702, lr=9.99864e-05, gnorm=2.729, loss_scale=1, train_wall=11, gb_free=2.8, wall=30152 2021-06-19 03:01:28 | INFO | train_inner | epoch 001: 2724 / 3002 loss=2.871, ppl=7.32, wps=5913.3, ups=0.09, wpb=64846, bsz=128, num_updates=2703, lr=9.99864e-05, gnorm=2.544, loss_scale=1, train_wall=11, gb_free=2.8, wall=30163 2021-06-19 03:01:39 | INFO | train_inner | epoch 001: 2725 / 3002 loss=2.884, ppl=7.38, wps=5893.3, ups=0.09, wpb=64868, bsz=128, num_updates=2704, lr=9.99864e-05, gnorm=2.564, loss_scale=1, train_wall=11, gb_free=2.8, wall=30174 2021-06-19 03:01:50 | INFO | train_inner | epoch 001: 2726 / 3002 loss=2.827, ppl=7.1, wps=5780.6, ups=0.09, wpb=64829, bsz=128, num_updates=2705, lr=9.99864e-05, gnorm=2.416, loss_scale=1, train_wall=11, gb_free=2.8, wall=30185 2021-06-19 03:02:01 | INFO | train_inner | epoch 001: 2727 / 3002 loss=2.932, ppl=7.63, wps=5851.7, ups=0.09, wpb=64824, bsz=128, num_updates=2706, lr=9.99864e-05, gnorm=2.475, loss_scale=1, train_wall=11, gb_free=2.8, wall=30196 2021-06-19 03:02:13 | INFO | train_inner | epoch 001: 2728 / 3002 loss=3.027, ppl=8.15, wps=5778.7, ups=0.09, wpb=64756, bsz=128, num_updates=2707, lr=9.99863e-05, gnorm=2.459, loss_scale=1, train_wall=11, gb_free=2.8, wall=30207 2021-06-19 03:02:24 | INFO | train_inner | epoch 001: 2729 / 3002 loss=2.816, ppl=7.04, wps=5907.8, ups=0.09, wpb=64882, bsz=128, num_updates=2708, lr=9.99863e-05, gnorm=2.477, loss_scale=1, train_wall=11, gb_free=2.8, wall=30218 2021-06-19 03:02:35 | INFO | train_inner | epoch 001: 2730 / 3002 loss=2.854, ppl=7.23, wps=5844.9, ups=0.09, wpb=64813, bsz=128, num_updates=2709, lr=9.99863e-05, gnorm=2.542, loss_scale=1, train_wall=11, gb_free=2.8, wall=30229 2021-06-19 03:02:46 | INFO | train_inner | epoch 001: 2731 / 3002 loss=2.859, ppl=7.25, wps=5867.8, ups=0.09, wpb=64831, bsz=128, num_updates=2710, lr=9.99863e-05, gnorm=2.623, loss_scale=1, train_wall=11, gb_free=2.8, wall=30240 2021-06-19 03:02:57 | INFO | train_inner | epoch 001: 2732 / 3002 loss=2.985, ppl=7.92, wps=5919.5, ups=0.09, wpb=64827, bsz=128, num_updates=2711, lr=9.99863e-05, gnorm=3.929, loss_scale=1, train_wall=10, gb_free=2.8, wall=30251 2021-06-19 03:03:08 | INFO | train_inner | epoch 001: 2733 / 3002 loss=2.922, ppl=7.58, wps=5825.5, ups=0.09, wpb=64814, bsz=128, num_updates=2712, lr=9.99863e-05, gnorm=2.682, loss_scale=1, train_wall=11, gb_free=2.8, wall=30262 2021-06-19 03:03:19 | INFO | train_inner | epoch 001: 2734 / 3002 loss=2.87, ppl=7.31, wps=5820.6, ups=0.09, wpb=64839, bsz=128, num_updates=2713, lr=9.99863e-05, gnorm=2.345, loss_scale=1, train_wall=11, gb_free=2.8, wall=30273 2021-06-19 03:03:30 | INFO | train_inner | epoch 001: 2735 / 3002 loss=2.874, ppl=7.33, wps=5779.6, ups=0.09, wpb=64874, bsz=128, num_updates=2714, lr=9.99863e-05, gnorm=2.408, loss_scale=1, train_wall=11, gb_free=2.8, wall=30285 2021-06-19 03:03:41 | INFO | train_inner | epoch 001: 2736 / 3002 loss=2.702, ppl=6.51, wps=5894.1, ups=0.09, wpb=64787, bsz=128, num_updates=2715, lr=9.99863e-05, gnorm=2.393, loss_scale=1, train_wall=11, gb_free=2.8, wall=30296 2021-06-19 03:03:52 | INFO | train_inner | epoch 001: 2737 / 3002 loss=2.758, ppl=6.76, wps=5929.9, ups=0.09, wpb=64922, bsz=128, num_updates=2716, lr=9.99863e-05, gnorm=2.322, loss_scale=1, train_wall=10, gb_free=2.8, wall=30307 2021-06-19 03:04:03 | INFO | train_inner | epoch 001: 2738 / 3002 loss=2.924, ppl=7.59, wps=5770.7, ups=0.09, wpb=64755, bsz=128, num_updates=2717, lr=9.99863e-05, gnorm=2.337, loss_scale=1, train_wall=11, gb_free=2.8, wall=30318 2021-06-19 03:04:15 | INFO | train_inner | epoch 001: 2739 / 3002 loss=2.832, ppl=7.12, wps=5739.7, ups=0.09, wpb=64803, bsz=128, num_updates=2718, lr=9.99863e-05, gnorm=2.48, loss_scale=1, train_wall=11, gb_free=2.8, wall=30329 2021-06-19 03:04:26 | INFO | train_inner | epoch 001: 2740 / 3002 loss=2.929, ppl=7.62, wps=5844.7, ups=0.09, wpb=64778, bsz=128, num_updates=2719, lr=9.99862e-05, gnorm=2.342, loss_scale=1, train_wall=11, gb_free=2.8, wall=30340 2021-06-19 03:04:37 | INFO | train_inner | epoch 001: 2741 / 3002 loss=2.929, ppl=7.62, wps=5930.8, ups=0.09, wpb=64845, bsz=128, num_updates=2720, lr=9.99862e-05, gnorm=2.453, loss_scale=1, train_wall=11, gb_free=2.8, wall=30351 2021-06-19 03:04:48 | INFO | train_inner | epoch 001: 2742 / 3002 loss=2.805, ppl=6.99, wps=5851.7, ups=0.09, wpb=64928, bsz=128, num_updates=2721, lr=9.99862e-05, gnorm=2.31, loss_scale=1, train_wall=11, gb_free=2.8, wall=30362 2021-06-19 03:04:59 | INFO | train_inner | epoch 001: 2743 / 3002 loss=2.732, ppl=6.64, wps=5871.2, ups=0.09, wpb=64798, bsz=128, num_updates=2722, lr=9.99862e-05, gnorm=2.453, loss_scale=1, train_wall=11, gb_free=2.8, wall=30373 2021-06-19 03:05:10 | INFO | train_inner | epoch 001: 2744 / 3002 loss=2.833, ppl=7.13, wps=5768.7, ups=0.09, wpb=64777, bsz=128, num_updates=2723, lr=9.99862e-05, gnorm=2.411, loss_scale=1, train_wall=11, gb_free=2.8, wall=30384 2021-06-19 03:05:21 | INFO | train_inner | epoch 001: 2745 / 3002 loss=3.021, ppl=8.12, wps=5935.7, ups=0.09, wpb=64752, bsz=128, num_updates=2724, lr=9.99862e-05, gnorm=2.38, loss_scale=1, train_wall=10, gb_free=2.8, wall=30395 2021-06-19 03:05:32 | INFO | train_inner | epoch 001: 2746 / 3002 loss=2.805, ppl=6.99, wps=5816.5, ups=0.09, wpb=64788, bsz=128, num_updates=2725, lr=9.99862e-05, gnorm=3.495, loss_scale=1, train_wall=11, gb_free=2.8, wall=30406 2021-06-19 03:05:43 | INFO | train_inner | epoch 001: 2747 / 3002 loss=2.965, ppl=7.81, wps=5781.8, ups=0.09, wpb=64821, bsz=128, num_updates=2726, lr=9.99862e-05, gnorm=10.288, loss_scale=1, train_wall=11, gb_free=2.8, wall=30418 2021-06-19 03:05:54 | INFO | train_inner | epoch 001: 2748 / 3002 loss=2.744, ppl=6.7, wps=5857.1, ups=0.09, wpb=64897, bsz=128, num_updates=2727, lr=9.99862e-05, gnorm=2.26, loss_scale=1, train_wall=11, gb_free=2.8, wall=30429 2021-06-19 03:06:05 | INFO | train_inner | epoch 001: 2749 / 3002 loss=2.953, ppl=7.74, wps=5932.7, ups=0.09, wpb=64799, bsz=128, num_updates=2728, lr=9.99862e-05, gnorm=2.393, loss_scale=1, train_wall=10, gb_free=2.8, wall=30440 2021-06-19 03:06:16 | INFO | train_inner | epoch 001: 2750 / 3002 loss=2.83, ppl=7.11, wps=5853.5, ups=0.09, wpb=64835, bsz=128, num_updates=2729, lr=9.99862e-05, gnorm=2.886, loss_scale=1, train_wall=11, gb_free=2.8, wall=30451 2021-06-19 03:06:27 | INFO | train_inner | epoch 001: 2751 / 3002 loss=2.937, ppl=7.66, wps=5880.4, ups=0.09, wpb=64719, bsz=128, num_updates=2730, lr=9.99862e-05, gnorm=2.303, loss_scale=1, train_wall=11, gb_free=2.8, wall=30462 2021-06-19 03:06:38 | INFO | train_inner | epoch 001: 2752 / 3002 loss=3.078, ppl=8.44, wps=5860.5, ups=0.09, wpb=64775, bsz=128, num_updates=2731, lr=9.99862e-05, gnorm=2.379, loss_scale=1, train_wall=11, gb_free=2.8, wall=30473 2021-06-19 03:06:49 | INFO | train_inner | epoch 001: 2753 / 3002 loss=2.89, ppl=7.41, wps=5981.7, ups=0.09, wpb=64865, bsz=128, num_updates=2732, lr=9.99861e-05, gnorm=4.006, loss_scale=1, train_wall=10, gb_free=2.8, wall=30484 2021-06-19 03:07:00 | INFO | train_inner | epoch 001: 2754 / 3002 loss=2.827, ppl=7.1, wps=5807.1, ups=0.09, wpb=64771, bsz=128, num_updates=2733, lr=9.99861e-05, gnorm=2.408, loss_scale=1, train_wall=11, gb_free=2.8, wall=30495 2021-06-19 03:07:12 | INFO | train_inner | epoch 001: 2755 / 3002 loss=2.88, ppl=7.36, wps=5747.8, ups=0.09, wpb=64790, bsz=128, num_updates=2734, lr=9.99861e-05, gnorm=2.34, loss_scale=1, train_wall=11, gb_free=2.8, wall=30506 2021-06-19 03:07:23 | INFO | train_inner | epoch 001: 2756 / 3002 loss=2.822, ppl=7.07, wps=5949.1, ups=0.09, wpb=64860, bsz=128, num_updates=2735, lr=9.99861e-05, gnorm=2.405, loss_scale=1, train_wall=10, gb_free=2.8, wall=30517 2021-06-19 03:07:34 | INFO | train_inner | epoch 001: 2757 / 3002 loss=2.959, ppl=7.77, wps=5745.9, ups=0.09, wpb=64771, bsz=128, num_updates=2736, lr=9.99861e-05, gnorm=2.61, loss_scale=1, train_wall=11, gb_free=2.8, wall=30528 2021-06-19 03:07:45 | INFO | train_inner | epoch 001: 2758 / 3002 loss=2.891, ppl=7.42, wps=5858.6, ups=0.09, wpb=64848, bsz=128, num_updates=2737, lr=9.99861e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=30539 2021-06-19 03:07:56 | INFO | train_inner | epoch 001: 2759 / 3002 loss=2.831, ppl=7.12, wps=5884.3, ups=0.09, wpb=64837, bsz=128, num_updates=2738, lr=9.99861e-05, gnorm=2.436, loss_scale=1, train_wall=11, gb_free=2.8, wall=30550 2021-06-19 03:08:07 | INFO | train_inner | epoch 001: 2760 / 3002 loss=2.908, ppl=7.51, wps=5881, ups=0.09, wpb=64818, bsz=128, num_updates=2739, lr=9.99861e-05, gnorm=2.648, loss_scale=1, train_wall=11, gb_free=2.8, wall=30561 2021-06-19 03:08:18 | INFO | train_inner | epoch 001: 2761 / 3002 loss=2.918, ppl=7.56, wps=5836.4, ups=0.09, wpb=64786, bsz=128, num_updates=2740, lr=9.99861e-05, gnorm=2.363, loss_scale=1, train_wall=11, gb_free=2.8, wall=30572 2021-06-19 03:08:29 | INFO | train_inner | epoch 001: 2762 / 3002 loss=2.86, ppl=7.26, wps=5909.6, ups=0.09, wpb=64869, bsz=128, num_updates=2741, lr=9.99861e-05, gnorm=2.409, loss_scale=1, train_wall=11, gb_free=2.8, wall=30583 2021-06-19 03:08:40 | INFO | train_inner | epoch 001: 2763 / 3002 loss=2.998, ppl=7.99, wps=5810.1, ups=0.09, wpb=64874, bsz=128, num_updates=2742, lr=9.99861e-05, gnorm=2.437, loss_scale=1, train_wall=11, gb_free=2.8, wall=30595 2021-06-19 03:08:51 | INFO | train_inner | epoch 001: 2764 / 3002 loss=2.857, ppl=7.25, wps=5974.7, ups=0.09, wpb=64915, bsz=128, num_updates=2743, lr=9.99861e-05, gnorm=2.442, loss_scale=1, train_wall=10, gb_free=2.8, wall=30605 2021-06-19 03:09:02 | INFO | train_inner | epoch 001: 2765 / 3002 loss=2.785, ppl=6.89, wps=5954.2, ups=0.09, wpb=64880, bsz=128, num_updates=2744, lr=9.9986e-05, gnorm=2.379, loss_scale=1, train_wall=10, gb_free=2.8, wall=30616 2021-06-19 03:09:13 | INFO | train_inner | epoch 001: 2766 / 3002 loss=2.896, ppl=7.44, wps=5823.5, ups=0.09, wpb=64707, bsz=128, num_updates=2745, lr=9.9986e-05, gnorm=2.382, loss_scale=1, train_wall=11, gb_free=2.8, wall=30628 2021-06-19 03:09:24 | INFO | train_inner | epoch 001: 2767 / 3002 loss=2.904, ppl=7.48, wps=5904.8, ups=0.09, wpb=64797, bsz=128, num_updates=2746, lr=9.9986e-05, gnorm=2.446, loss_scale=1, train_wall=11, gb_free=2.8, wall=30638 2021-06-19 03:09:35 | INFO | train_inner | epoch 001: 2768 / 3002 loss=2.788, ppl=6.91, wps=5790.2, ups=0.09, wpb=64735, bsz=128, num_updates=2747, lr=9.9986e-05, gnorm=2.343, loss_scale=1, train_wall=11, gb_free=2.8, wall=30650 2021-06-19 03:09:47 | INFO | train_inner | epoch 001: 2769 / 3002 loss=3.09, ppl=8.51, wps=5781.1, ups=0.09, wpb=64825, bsz=128, num_updates=2748, lr=9.9986e-05, gnorm=2.386, loss_scale=1, train_wall=11, gb_free=2.8, wall=30661 2021-06-19 03:09:58 | INFO | train_inner | epoch 001: 2770 / 3002 loss=2.817, ppl=7.05, wps=5812.8, ups=0.09, wpb=64745, bsz=128, num_updates=2749, lr=9.9986e-05, gnorm=2.725, loss_scale=1, train_wall=11, gb_free=2.8, wall=30672 2021-06-19 03:10:09 | INFO | train_inner | epoch 001: 2771 / 3002 loss=2.782, ppl=6.88, wps=5724.1, ups=0.09, wpb=64840, bsz=128, num_updates=2750, lr=9.9986e-05, gnorm=2.387, loss_scale=1, train_wall=11, gb_free=2.8, wall=30683 2021-06-19 03:10:20 | INFO | train_inner | epoch 001: 2772 / 3002 loss=2.825, ppl=7.09, wps=5821.9, ups=0.09, wpb=64822, bsz=128, num_updates=2751, lr=9.9986e-05, gnorm=2.358, loss_scale=1, train_wall=11, gb_free=2.8, wall=30694 2021-06-19 03:10:31 | INFO | train_inner | epoch 001: 2773 / 3002 loss=2.926, ppl=7.6, wps=5843.2, ups=0.09, wpb=64819, bsz=128, num_updates=2752, lr=9.9986e-05, gnorm=2.602, loss_scale=1, train_wall=11, gb_free=2.8, wall=30706 2021-06-19 03:10:42 | INFO | train_inner | epoch 001: 2774 / 3002 loss=2.768, ppl=6.81, wps=5883.7, ups=0.09, wpb=64832, bsz=128, num_updates=2753, lr=9.9986e-05, gnorm=2.384, loss_scale=1, train_wall=11, gb_free=2.8, wall=30717 2021-06-19 03:10:53 | INFO | train_inner | epoch 001: 2775 / 3002 loss=2.788, ppl=6.91, wps=5836.4, ups=0.09, wpb=64877, bsz=128, num_updates=2754, lr=9.9986e-05, gnorm=2.39, loss_scale=1, train_wall=11, gb_free=2.8, wall=30728 2021-06-19 03:11:04 | INFO | train_inner | epoch 001: 2776 / 3002 loss=3.032, ppl=8.18, wps=5894.9, ups=0.09, wpb=64804, bsz=128, num_updates=2755, lr=9.9986e-05, gnorm=3.253, loss_scale=1, train_wall=11, gb_free=2.8, wall=30739 2021-06-19 03:11:16 | INFO | train_inner | epoch 001: 2777 / 3002 loss=2.962, ppl=7.79, wps=5731.2, ups=0.09, wpb=64786, bsz=128, num_updates=2756, lr=9.9986e-05, gnorm=2.475, loss_scale=1, train_wall=11, gb_free=2.8, wall=30750 2021-06-19 03:11:27 | INFO | train_inner | epoch 001: 2778 / 3002 loss=2.936, ppl=7.65, wps=5813.1, ups=0.09, wpb=64797, bsz=128, num_updates=2757, lr=9.99859e-05, gnorm=2.453, loss_scale=1, train_wall=11, gb_free=2.8, wall=30761 2021-06-19 03:11:38 | INFO | train_inner | epoch 001: 2779 / 3002 loss=2.882, ppl=7.37, wps=6018.3, ups=0.09, wpb=64866, bsz=128, num_updates=2758, lr=9.99859e-05, gnorm=3.048, loss_scale=1, train_wall=10, gb_free=2.8, wall=30772 2021-06-19 03:11:49 | INFO | train_inner | epoch 001: 2780 / 3002 loss=2.928, ppl=7.61, wps=5785.7, ups=0.09, wpb=64812, bsz=128, num_updates=2759, lr=9.99859e-05, gnorm=2.541, loss_scale=1, train_wall=11, gb_free=2.8, wall=30783 2021-06-19 03:12:00 | INFO | train_inner | epoch 001: 2781 / 3002 loss=2.944, ppl=7.7, wps=5989.1, ups=0.09, wpb=64879, bsz=128, num_updates=2760, lr=9.99859e-05, gnorm=2.29, loss_scale=1, train_wall=10, gb_free=2.8, wall=30794 2021-06-19 03:12:11 | INFO | train_inner | epoch 001: 2782 / 3002 loss=2.899, ppl=7.46, wps=5871.7, ups=0.09, wpb=64876, bsz=128, num_updates=2761, lr=9.99859e-05, gnorm=2.235, loss_scale=1, train_wall=11, gb_free=2.8, wall=30805 2021-06-19 03:12:22 | INFO | train_inner | epoch 001: 2783 / 3002 loss=2.874, ppl=7.33, wps=5835.3, ups=0.09, wpb=64835, bsz=128, num_updates=2762, lr=9.99859e-05, gnorm=2.319, loss_scale=2, train_wall=11, gb_free=2.8, wall=30816 2021-06-19 03:12:33 | INFO | train_inner | epoch 001: 2784 / 3002 loss=2.97, ppl=7.84, wps=5819.3, ups=0.09, wpb=64837, bsz=128, num_updates=2763, lr=9.99859e-05, gnorm=2.581, loss_scale=2, train_wall=11, gb_free=2.8, wall=30827 2021-06-19 03:12:44 | INFO | train_inner | epoch 001: 2785 / 3002 loss=3.036, ppl=8.2, wps=5886.1, ups=0.09, wpb=64939, bsz=128, num_updates=2764, lr=9.99859e-05, gnorm=2.449, loss_scale=2, train_wall=11, gb_free=2.8, wall=30838 2021-06-19 03:12:55 | INFO | train_inner | epoch 001: 2786 / 3002 loss=2.774, ppl=6.84, wps=5945, ups=0.09, wpb=64860, bsz=128, num_updates=2765, lr=9.99859e-05, gnorm=2.296, loss_scale=2, train_wall=10, gb_free=2.8, wall=30849 2021-06-19 03:13:06 | INFO | train_inner | epoch 001: 2787 / 3002 loss=2.8, ppl=6.97, wps=5802.1, ups=0.09, wpb=64860, bsz=128, num_updates=2766, lr=9.99859e-05, gnorm=2.276, loss_scale=2, train_wall=11, gb_free=2.8, wall=30860 2021-06-19 03:13:17 | INFO | train_inner | epoch 001: 2788 / 3002 loss=2.724, ppl=6.61, wps=5788.5, ups=0.09, wpb=64843, bsz=128, num_updates=2767, lr=9.99859e-05, gnorm=3.501, loss_scale=2, train_wall=11, gb_free=2.8, wall=30872 2021-06-19 03:13:28 | INFO | train_inner | epoch 001: 2789 / 3002 loss=2.832, ppl=7.12, wps=5984.8, ups=0.09, wpb=64879, bsz=128, num_updates=2768, lr=9.99859e-05, gnorm=2.317, loss_scale=2, train_wall=10, gb_free=2.8, wall=30882 2021-06-19 03:13:39 | INFO | train_inner | epoch 001: 2790 / 3002 loss=2.749, ppl=6.72, wps=5898.7, ups=0.09, wpb=64852, bsz=128, num_updates=2769, lr=9.99858e-05, gnorm=2.371, loss_scale=2, train_wall=11, gb_free=2.8, wall=30893 2021-06-19 03:13:50 | INFO | train_inner | epoch 001: 2791 / 3002 loss=3.067, ppl=8.38, wps=5927.1, ups=0.09, wpb=64868, bsz=128, num_updates=2770, lr=9.99858e-05, gnorm=2.454, loss_scale=2, train_wall=11, gb_free=2.8, wall=30904 2021-06-19 03:14:01 | INFO | train_inner | epoch 001: 2792 / 3002 loss=2.86, ppl=7.26, wps=5905.1, ups=0.09, wpb=64845, bsz=128, num_updates=2771, lr=9.99858e-05, gnorm=2.239, loss_scale=2, train_wall=11, gb_free=2.8, wall=30915 2021-06-19 03:14:12 | INFO | train_inner | epoch 001: 2793 / 3002 loss=2.824, ppl=7.08, wps=5899.3, ups=0.09, wpb=64827, bsz=128, num_updates=2772, lr=9.99858e-05, gnorm=2.384, loss_scale=2, train_wall=11, gb_free=2.8, wall=30926 2021-06-19 03:14:23 | INFO | train_inner | epoch 001: 2794 / 3002 loss=2.817, ppl=7.05, wps=5881, ups=0.09, wpb=64826, bsz=128, num_updates=2773, lr=9.99858e-05, gnorm=2.283, loss_scale=2, train_wall=11, gb_free=2.8, wall=30937 2021-06-19 03:14:34 | INFO | train_inner | epoch 001: 2795 / 3002 loss=2.863, ppl=7.28, wps=5962.9, ups=0.09, wpb=64874, bsz=128, num_updates=2774, lr=9.99858e-05, gnorm=2.252, loss_scale=2, train_wall=10, gb_free=2.8, wall=30948 2021-06-19 03:14:45 | INFO | train_inner | epoch 001: 2796 / 3002 loss=2.874, ppl=7.33, wps=5791.7, ups=0.09, wpb=64834, bsz=128, num_updates=2775, lr=9.99858e-05, gnorm=2.817, loss_scale=2, train_wall=11, gb_free=2.8, wall=30959 2021-06-19 03:14:56 | INFO | train_inner | epoch 001: 2797 / 3002 loss=2.928, ppl=7.61, wps=5903.9, ups=0.09, wpb=64830, bsz=128, num_updates=2776, lr=9.99858e-05, gnorm=3.619, loss_scale=2, train_wall=11, gb_free=2.8, wall=30970 2021-06-19 03:15:07 | INFO | train_inner | epoch 001: 2798 / 3002 loss=2.711, ppl=6.55, wps=5820, ups=0.09, wpb=64872, bsz=128, num_updates=2777, lr=9.99858e-05, gnorm=7.553, loss_scale=2, train_wall=11, gb_free=2.8, wall=30982 2021-06-19 03:15:18 | INFO | train_inner | epoch 001: 2799 / 3002 loss=2.863, ppl=7.27, wps=5924.6, ups=0.09, wpb=64729, bsz=128, num_updates=2778, lr=9.99858e-05, gnorm=2.299, loss_scale=2, train_wall=10, gb_free=2.8, wall=30992 2021-06-19 03:15:29 | INFO | train_inner | epoch 001: 2800 / 3002 loss=3.063, ppl=8.36, wps=5860.8, ups=0.09, wpb=64807, bsz=128, num_updates=2779, lr=9.99858e-05, gnorm=2.434, loss_scale=2, train_wall=11, gb_free=2.8, wall=31004 2021-06-19 03:15:40 | INFO | train_inner | epoch 001: 2801 / 3002 loss=2.844, ppl=7.18, wps=5808.6, ups=0.09, wpb=64669, bsz=128, num_updates=2780, lr=9.99858e-05, gnorm=2.287, loss_scale=2, train_wall=11, gb_free=2.8, wall=31015 2021-06-19 03:15:51 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 2021-06-19 03:16:03 | INFO | train_inner | epoch 001: 2803 / 3002 loss=2.881, ppl=7.37, wps=2914.7, ups=0.04, wpb=64832, bsz=128, num_updates=2781, lr=9.99858e-05, gnorm=2.316, loss_scale=1, train_wall=21, gb_free=2.8, wall=31037 2021-06-19 03:16:14 | INFO | train_inner | epoch 001: 2804 / 3002 loss=2.768, ppl=6.81, wps=5931.1, ups=0.09, wpb=64863, bsz=128, num_updates=2782, lr=9.99857e-05, gnorm=2.374, loss_scale=1, train_wall=10, gb_free=2.8, wall=31048 2021-06-19 03:16:25 | INFO | train_inner | epoch 001: 2805 / 3002 loss=2.874, ppl=7.33, wps=5808.6, ups=0.09, wpb=64914, bsz=128, num_updates=2783, lr=9.99857e-05, gnorm=4.365, loss_scale=1, train_wall=11, gb_free=2.8, wall=31059 2021-06-19 03:16:36 | INFO | train_inner | epoch 001: 2806 / 3002 loss=2.963, ppl=7.8, wps=5841.5, ups=0.09, wpb=64787, bsz=128, num_updates=2784, lr=9.99857e-05, gnorm=2.556, loss_scale=1, train_wall=11, gb_free=2.8, wall=31070 2021-06-19 03:16:47 | INFO | train_inner | epoch 001: 2807 / 3002 loss=2.802, ppl=6.98, wps=5921.2, ups=0.09, wpb=64801, bsz=128, num_updates=2785, lr=9.99857e-05, gnorm=2.409, loss_scale=1, train_wall=10, gb_free=2.8, wall=31081 2021-06-19 03:16:58 | INFO | train_inner | epoch 001: 2808 / 3002 loss=2.751, ppl=6.73, wps=5849.1, ups=0.09, wpb=64893, bsz=128, num_updates=2786, lr=9.99857e-05, gnorm=2.383, loss_scale=1, train_wall=11, gb_free=2.8, wall=31092 2021-06-19 03:17:09 | INFO | train_inner | epoch 001: 2809 / 3002 loss=2.89, ppl=7.41, wps=5900.6, ups=0.09, wpb=64895, bsz=128, num_updates=2787, lr=9.99857e-05, gnorm=2.446, loss_scale=1, train_wall=11, gb_free=2.8, wall=31103 2021-06-19 03:17:20 | INFO | train_inner | epoch 001: 2810 / 3002 loss=2.966, ppl=7.82, wps=5950.6, ups=0.09, wpb=64875, bsz=128, num_updates=2788, lr=9.99857e-05, gnorm=2.778, loss_scale=1, train_wall=10, gb_free=2.8, wall=31114 2021-06-19 03:17:31 | INFO | train_inner | epoch 001: 2811 / 3002 loss=2.789, ppl=6.91, wps=5873.4, ups=0.09, wpb=64798, bsz=128, num_updates=2789, lr=9.99857e-05, gnorm=2.391, loss_scale=1, train_wall=11, gb_free=2.8, wall=31125 2021-06-19 03:17:42 | INFO | train_inner | epoch 001: 2812 / 3002 loss=2.753, ppl=6.74, wps=5826.1, ups=0.09, wpb=64843, bsz=128, num_updates=2790, lr=9.99857e-05, gnorm=2.279, loss_scale=1, train_wall=11, gb_free=2.8, wall=31136 2021-06-19 03:17:53 | INFO | train_inner | epoch 001: 2813 / 3002 loss=3.016, ppl=8.09, wps=5723.8, ups=0.09, wpb=64777, bsz=128, num_updates=2791, lr=9.99857e-05, gnorm=7.411, loss_scale=1, train_wall=11, gb_free=2.8, wall=31148 2021-06-19 03:18:04 | INFO | train_inner | epoch 001: 2814 / 3002 loss=2.929, ppl=7.62, wps=5855.2, ups=0.09, wpb=64827, bsz=128, num_updates=2792, lr=9.99857e-05, gnorm=2.551, loss_scale=1, train_wall=11, gb_free=2.8, wall=31159 2021-06-19 03:18:15 | INFO | train_inner | epoch 001: 2815 / 3002 loss=2.98, ppl=7.89, wps=5885.7, ups=0.09, wpb=64843, bsz=128, num_updates=2793, lr=9.99857e-05, gnorm=6.066, loss_scale=1, train_wall=11, gb_free=2.8, wall=31170 2021-06-19 03:18:26 | INFO | train_inner | epoch 001: 2816 / 3002 loss=2.817, ppl=7.05, wps=5927.3, ups=0.09, wpb=64837, bsz=128, num_updates=2794, lr=9.99856e-05, gnorm=2.499, loss_scale=1, train_wall=11, gb_free=2.8, wall=31181 2021-06-19 03:18:37 | INFO | train_inner | epoch 001: 2817 / 3002 loss=2.715, ppl=6.57, wps=5840.5, ups=0.09, wpb=64867, bsz=128, num_updates=2795, lr=9.99856e-05, gnorm=3.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=31192 2021-06-19 03:18:49 | INFO | train_inner | epoch 001: 2818 / 3002 loss=2.866, ppl=7.29, wps=5807.7, ups=0.09, wpb=64814, bsz=128, num_updates=2796, lr=9.99856e-05, gnorm=2.385, loss_scale=1, train_wall=11, gb_free=2.8, wall=31203 2021-06-19 03:19:00 | INFO | train_inner | epoch 001: 2819 / 3002 loss=2.905, ppl=7.49, wps=5883, ups=0.09, wpb=64810, bsz=128, num_updates=2797, lr=9.99856e-05, gnorm=4.789, loss_scale=1, train_wall=11, gb_free=2.8, wall=31214 2021-06-19 03:19:11 | INFO | train_inner | epoch 001: 2820 / 3002 loss=2.916, ppl=7.55, wps=5852.8, ups=0.09, wpb=64704, bsz=128, num_updates=2798, lr=9.99856e-05, gnorm=2.539, loss_scale=1, train_wall=11, gb_free=2.8, wall=31225 2021-06-19 03:19:22 | INFO | train_inner | epoch 001: 2821 / 3002 loss=2.853, ppl=7.23, wps=5897.1, ups=0.09, wpb=64889, bsz=128, num_updates=2799, lr=9.99856e-05, gnorm=2.401, loss_scale=1, train_wall=11, gb_free=2.8, wall=31236 2021-06-19 03:19:33 | INFO | train_inner | epoch 001: 2822 / 3002 loss=2.79, ppl=6.92, wps=5847.6, ups=0.09, wpb=64806, bsz=128, num_updates=2800, lr=9.99856e-05, gnorm=2.302, loss_scale=1, train_wall=11, gb_free=2.8, wall=31247 2021-06-19 03:19:44 | INFO | train_inner | epoch 001: 2823 / 3002 loss=2.91, ppl=7.52, wps=5930.9, ups=0.09, wpb=64866, bsz=128, num_updates=2801, lr=9.99856e-05, gnorm=2.438, loss_scale=1, train_wall=10, gb_free=2.8, wall=31258 2021-06-19 03:19:55 | INFO | train_inner | epoch 001: 2824 / 3002 loss=2.805, ppl=6.99, wps=5898, ups=0.09, wpb=64855, bsz=128, num_updates=2802, lr=9.99856e-05, gnorm=9.287, loss_scale=1, train_wall=11, gb_free=2.8, wall=31269 2021-06-19 03:20:06 | INFO | train_inner | epoch 001: 2825 / 3002 loss=2.863, ppl=7.28, wps=5836.8, ups=0.09, wpb=64779, bsz=128, num_updates=2803, lr=9.99856e-05, gnorm=2.579, loss_scale=1, train_wall=11, gb_free=2.8, wall=31280 2021-06-19 03:20:17 | INFO | train_inner | epoch 001: 2826 / 3002 loss=2.886, ppl=7.39, wps=5842.5, ups=0.09, wpb=64848, bsz=128, num_updates=2804, lr=9.99856e-05, gnorm=2.399, loss_scale=1, train_wall=11, gb_free=2.8, wall=31291 2021-06-19 03:20:28 | INFO | train_inner | epoch 001: 2827 / 3002 loss=2.646, ppl=6.26, wps=5908.2, ups=0.09, wpb=64814, bsz=128, num_updates=2805, lr=9.99856e-05, gnorm=2.626, loss_scale=1, train_wall=11, gb_free=2.8, wall=31302 2021-06-19 03:20:39 | INFO | train_inner | epoch 001: 2828 / 3002 loss=2.986, ppl=7.92, wps=5877.2, ups=0.09, wpb=64790, bsz=128, num_updates=2806, lr=9.99856e-05, gnorm=2.457, loss_scale=1, train_wall=11, gb_free=2.8, wall=31313 2021-06-19 03:20:50 | INFO | train_inner | epoch 001: 2829 / 3002 loss=2.84, ppl=7.16, wps=5874.7, ups=0.09, wpb=64804, bsz=128, num_updates=2807, lr=9.99855e-05, gnorm=2.656, loss_scale=1, train_wall=11, gb_free=2.8, wall=31324 2021-06-19 03:21:01 | INFO | train_inner | epoch 001: 2830 / 3002 loss=2.841, ppl=7.17, wps=5872.9, ups=0.09, wpb=64862, bsz=128, num_updates=2808, lr=9.99855e-05, gnorm=2.486, loss_scale=1, train_wall=11, gb_free=2.8, wall=31335 2021-06-19 03:21:12 | INFO | train_inner | epoch 001: 2831 / 3002 loss=2.992, ppl=7.95, wps=5861.6, ups=0.09, wpb=64764, bsz=128, num_updates=2809, lr=9.99855e-05, gnorm=3.391, loss_scale=1, train_wall=11, gb_free=2.8, wall=31346 2021-06-19 03:21:23 | INFO | train_inner | epoch 001: 2832 / 3002 loss=3.055, ppl=8.31, wps=5767.3, ups=0.09, wpb=64875, bsz=128, num_updates=2810, lr=9.99855e-05, gnorm=9.738, loss_scale=1, train_wall=11, gb_free=2.8, wall=31358 2021-06-19 03:21:34 | INFO | train_inner | epoch 001: 2833 / 3002 loss=2.787, ppl=6.9, wps=5840.9, ups=0.09, wpb=64869, bsz=128, num_updates=2811, lr=9.99855e-05, gnorm=17.12, loss_scale=1, train_wall=11, gb_free=2.8, wall=31369 2021-06-19 03:21:45 | INFO | train_inner | epoch 001: 2834 / 3002 loss=3.018, ppl=8.1, wps=5827.8, ups=0.09, wpb=64841, bsz=128, num_updates=2812, lr=9.99855e-05, gnorm=10.946, loss_scale=1, train_wall=11, gb_free=2.8, wall=31380 2021-06-19 03:21:56 | INFO | train_inner | epoch 001: 2835 / 3002 loss=2.883, ppl=7.38, wps=5903.5, ups=0.09, wpb=64837, bsz=128, num_updates=2813, lr=9.99855e-05, gnorm=4.1, loss_scale=1, train_wall=11, gb_free=2.8, wall=31391 2021-06-19 03:22:07 | INFO | train_inner | epoch 001: 2836 / 3002 loss=2.792, ppl=6.93, wps=5839.9, ups=0.09, wpb=64833, bsz=128, num_updates=2814, lr=9.99855e-05, gnorm=9.498, loss_scale=1, train_wall=11, gb_free=2.8, wall=31402 2021-06-19 03:22:19 | INFO | train_inner | epoch 001: 2837 / 3002 loss=2.774, ppl=6.84, wps=5831.1, ups=0.09, wpb=64967, bsz=128, num_updates=2815, lr=9.99855e-05, gnorm=2.905, loss_scale=1, train_wall=11, gb_free=2.8, wall=31413 2021-06-19 03:22:30 | INFO | train_inner | epoch 001: 2838 / 3002 loss=3.016, ppl=8.09, wps=5842.8, ups=0.09, wpb=64794, bsz=128, num_updates=2816, lr=9.99855e-05, gnorm=4.564, loss_scale=1, train_wall=11, gb_free=2.8, wall=31424 2021-06-19 03:22:41 | INFO | train_inner | epoch 001: 2839 / 3002 loss=3.004, ppl=8.02, wps=5796, ups=0.09, wpb=64822, bsz=128, num_updates=2817, lr=9.99855e-05, gnorm=3.534, loss_scale=1, train_wall=11, gb_free=2.8, wall=31435 2021-06-19 03:22:52 | INFO | train_inner | epoch 001: 2840 / 3002 loss=2.86, ppl=7.26, wps=5838.6, ups=0.09, wpb=64859, bsz=128, num_updates=2818, lr=9.99855e-05, gnorm=2.5, loss_scale=1, train_wall=11, gb_free=2.8, wall=31446 2021-06-19 03:23:03 | INFO | train_inner | epoch 001: 2841 / 3002 loss=2.795, ppl=6.94, wps=5866.4, ups=0.09, wpb=64766, bsz=128, num_updates=2819, lr=9.99854e-05, gnorm=2.52, loss_scale=1, train_wall=11, gb_free=2.8, wall=31457 2021-06-19 03:23:14 | INFO | train_inner | epoch 001: 2842 / 3002 loss=2.764, ppl=6.79, wps=5811.3, ups=0.09, wpb=64813, bsz=128, num_updates=2820, lr=9.99854e-05, gnorm=2.573, loss_scale=1, train_wall=11, gb_free=2.8, wall=31469 2021-06-19 03:23:25 | INFO | train_inner | epoch 001: 2843 / 3002 loss=2.875, ppl=7.34, wps=5898.3, ups=0.09, wpb=64887, bsz=128, num_updates=2821, lr=9.99854e-05, gnorm=2.756, loss_scale=1, train_wall=11, gb_free=2.8, wall=31480 2021-06-19 03:23:36 | INFO | train_inner | epoch 001: 2844 / 3002 loss=2.802, ppl=6.97, wps=5983, ups=0.09, wpb=64774, bsz=128, num_updates=2822, lr=9.99854e-05, gnorm=2.397, loss_scale=1, train_wall=10, gb_free=2.8, wall=31490 2021-06-19 03:23:47 | INFO | train_inner | epoch 001: 2845 / 3002 loss=2.908, ppl=7.51, wps=5858.9, ups=0.09, wpb=64765, bsz=128, num_updates=2823, lr=9.99854e-05, gnorm=2.456, loss_scale=1, train_wall=11, gb_free=2.8, wall=31501 2021-06-19 03:23:58 | INFO | train_inner | epoch 001: 2846 / 3002 loss=2.988, ppl=7.93, wps=5876, ups=0.09, wpb=64850, bsz=128, num_updates=2824, lr=9.99854e-05, gnorm=2.536, loss_scale=1, train_wall=11, gb_free=2.8, wall=31512 2021-06-19 03:24:09 | INFO | train_inner | epoch 001: 2847 / 3002 loss=2.963, ppl=7.8, wps=5895.5, ups=0.09, wpb=64827, bsz=128, num_updates=2825, lr=9.99854e-05, gnorm=2.463, loss_scale=1, train_wall=11, gb_free=2.8, wall=31523 2021-06-19 03:24:20 | INFO | train_inner | epoch 001: 2848 / 3002 loss=3.048, ppl=8.27, wps=5910.3, ups=0.09, wpb=64890, bsz=128, num_updates=2826, lr=9.99854e-05, gnorm=3.473, loss_scale=1, train_wall=10, gb_free=2.8, wall=31534 2021-06-19 03:24:31 | INFO | train_inner | epoch 001: 2849 / 3002 loss=2.733, ppl=6.65, wps=5868.5, ups=0.09, wpb=64874, bsz=128, num_updates=2827, lr=9.99854e-05, gnorm=2.323, loss_scale=1, train_wall=11, gb_free=2.8, wall=31545 2021-06-19 03:24:42 | INFO | train_inner | epoch 001: 2850 / 3002 loss=2.985, ppl=7.92, wps=5919.5, ups=0.09, wpb=64879, bsz=128, num_updates=2828, lr=9.99854e-05, gnorm=2.557, loss_scale=1, train_wall=11, gb_free=2.8, wall=31556 2021-06-19 03:24:53 | INFO | train_inner | epoch 001: 2851 / 3002 loss=3.176, ppl=9.04, wps=5850.6, ups=0.09, wpb=64791, bsz=128, num_updates=2829, lr=9.99854e-05, gnorm=2.536, loss_scale=1, train_wall=11, gb_free=2.8, wall=31568 2021-06-19 03:25:04 | INFO | train_inner | epoch 001: 2852 / 3002 loss=2.918, ppl=7.56, wps=5848.7, ups=0.09, wpb=64817, bsz=128, num_updates=2830, lr=9.99854e-05, gnorm=2.567, loss_scale=1, train_wall=11, gb_free=2.8, wall=31579 2021-06-19 03:25:16 | INFO | train_inner | epoch 001: 2853 / 3002 loss=2.956, ppl=7.76, wps=5767.5, ups=0.09, wpb=64817, bsz=128, num_updates=2831, lr=9.99854e-05, gnorm=2.482, loss_scale=1, train_wall=11, gb_free=2.8, wall=31590 2021-06-19 03:25:26 | INFO | train_inner | epoch 001: 2854 / 3002 loss=2.844, ppl=7.18, wps=5978, ups=0.09, wpb=64917, bsz=128, num_updates=2832, lr=9.99853e-05, gnorm=2.466, loss_scale=1, train_wall=10, gb_free=2.8, wall=31601 2021-06-19 03:25:37 | INFO | train_inner | epoch 001: 2855 / 3002 loss=2.912, ppl=7.52, wps=5880.2, ups=0.09, wpb=64758, bsz=128, num_updates=2833, lr=9.99853e-05, gnorm=2.359, loss_scale=1, train_wall=11, gb_free=2.8, wall=31612 2021-06-19 03:25:49 | INFO | train_inner | epoch 001: 2856 / 3002 loss=2.913, ppl=7.53, wps=5737.3, ups=0.09, wpb=64832, bsz=128, num_updates=2834, lr=9.99853e-05, gnorm=2.408, loss_scale=1, train_wall=11, gb_free=2.8, wall=31623 2021-06-19 03:26:00 | INFO | train_inner | epoch 001: 2857 / 3002 loss=2.938, ppl=7.67, wps=5824, ups=0.09, wpb=64778, bsz=128, num_updates=2835, lr=9.99853e-05, gnorm=2.448, loss_scale=1, train_wall=11, gb_free=2.8, wall=31634 2021-06-19 03:26:11 | INFO | train_inner | epoch 001: 2858 / 3002 loss=2.869, ppl=7.3, wps=5808.9, ups=0.09, wpb=64810, bsz=128, num_updates=2836, lr=9.99853e-05, gnorm=2.343, loss_scale=1, train_wall=11, gb_free=2.8, wall=31645 2021-06-19 03:26:22 | INFO | train_inner | epoch 001: 2859 / 3002 loss=2.986, ppl=7.92, wps=5877.5, ups=0.09, wpb=64827, bsz=128, num_updates=2837, lr=9.99853e-05, gnorm=2.298, loss_scale=1, train_wall=11, gb_free=2.8, wall=31656 2021-06-19 03:26:33 | INFO | train_inner | epoch 001: 2860 / 3002 loss=2.867, ppl=7.3, wps=5833.9, ups=0.09, wpb=64840, bsz=128, num_updates=2838, lr=9.99853e-05, gnorm=2.438, loss_scale=1, train_wall=11, gb_free=2.8, wall=31667 2021-06-19 03:26:44 | INFO | train_inner | epoch 001: 2861 / 3002 loss=2.838, ppl=7.15, wps=5817.1, ups=0.09, wpb=64783, bsz=128, num_updates=2839, lr=9.99853e-05, gnorm=2.366, loss_scale=1, train_wall=11, gb_free=2.8, wall=31679 2021-06-19 03:26:55 | INFO | train_inner | epoch 001: 2862 / 3002 loss=3.039, ppl=8.22, wps=5863.4, ups=0.09, wpb=64768, bsz=128, num_updates=2840, lr=9.99853e-05, gnorm=3.058, loss_scale=1, train_wall=11, gb_free=2.8, wall=31690 2021-06-19 03:27:07 | INFO | train_inner | epoch 001: 2863 / 3002 loss=2.799, ppl=6.96, wps=5708.8, ups=0.09, wpb=64825, bsz=128, num_updates=2841, lr=9.99853e-05, gnorm=2.554, loss_scale=1, train_wall=11, gb_free=2.8, wall=31701 2021-06-19 03:27:18 | INFO | train_inner | epoch 001: 2864 / 3002 loss=2.957, ppl=7.77, wps=5890.2, ups=0.09, wpb=64837, bsz=128, num_updates=2842, lr=9.99853e-05, gnorm=3.069, loss_scale=1, train_wall=11, gb_free=2.8, wall=31712 2021-06-19 03:27:29 | INFO | train_inner | epoch 001: 2865 / 3002 loss=2.831, ppl=7.12, wps=5882.6, ups=0.09, wpb=64801, bsz=128, num_updates=2843, lr=9.99853e-05, gnorm=2.493, loss_scale=1, train_wall=11, gb_free=2.8, wall=31723 2021-06-19 03:27:40 | INFO | train_inner | epoch 001: 2866 / 3002 loss=2.884, ppl=7.38, wps=5901.8, ups=0.09, wpb=64786, bsz=128, num_updates=2844, lr=9.99852e-05, gnorm=2.326, loss_scale=1, train_wall=11, gb_free=2.8, wall=31734 2021-06-19 03:27:51 | INFO | train_inner | epoch 001: 2867 / 3002 loss=2.914, ppl=7.54, wps=5908.5, ups=0.09, wpb=64887, bsz=128, num_updates=2845, lr=9.99852e-05, gnorm=2.333, loss_scale=1, train_wall=11, gb_free=2.8, wall=31745 2021-06-19 03:28:02 | INFO | train_inner | epoch 001: 2868 / 3002 loss=2.926, ppl=7.6, wps=5826.4, ups=0.09, wpb=64884, bsz=128, num_updates=2846, lr=9.99852e-05, gnorm=2.393, loss_scale=1, train_wall=11, gb_free=2.8, wall=31756 2021-06-19 03:28:13 | INFO | train_inner | epoch 001: 2869 / 3002 loss=2.961, ppl=7.79, wps=5829.7, ups=0.09, wpb=64815, bsz=128, num_updates=2847, lr=9.99852e-05, gnorm=2.773, loss_scale=1, train_wall=11, gb_free=2.8, wall=31767 2021-06-19 03:28:24 | INFO | train_inner | epoch 001: 2870 / 3002 loss=2.896, ppl=7.44, wps=5711.8, ups=0.09, wpb=64875, bsz=128, num_updates=2848, lr=9.99852e-05, gnorm=2.611, loss_scale=1, train_wall=11, gb_free=2.8, wall=31779 2021-06-19 03:28:35 | INFO | train_inner | epoch 001: 2871 / 3002 loss=2.952, ppl=7.74, wps=5911.8, ups=0.09, wpb=64874, bsz=128, num_updates=2849, lr=9.99852e-05, gnorm=2.401, loss_scale=1, train_wall=10, gb_free=2.8, wall=31790 2021-06-19 03:28:46 | INFO | train_inner | epoch 001: 2872 / 3002 loss=2.908, ppl=7.51, wps=5901.8, ups=0.09, wpb=64930, bsz=128, num_updates=2850, lr=9.99852e-05, gnorm=2.333, loss_scale=1, train_wall=11, gb_free=2.8, wall=31801 2021-06-19 03:28:57 | INFO | train_inner | epoch 001: 2873 / 3002 loss=2.845, ppl=7.19, wps=5855.7, ups=0.09, wpb=64854, bsz=128, num_updates=2851, lr=9.99852e-05, gnorm=2.257, loss_scale=1, train_wall=11, gb_free=2.8, wall=31812 2021-06-19 03:29:08 | INFO | train_inner | epoch 001: 2874 / 3002 loss=3.032, ppl=8.18, wps=5809, ups=0.09, wpb=64856, bsz=128, num_updates=2852, lr=9.99852e-05, gnorm=3.107, loss_scale=1, train_wall=11, gb_free=2.8, wall=31823 2021-06-19 03:29:20 | INFO | train_inner | epoch 001: 2875 / 3002 loss=2.813, ppl=7.03, wps=5743.1, ups=0.09, wpb=64820, bsz=128, num_updates=2853, lr=9.99852e-05, gnorm=2.859, loss_scale=1, train_wall=11, gb_free=2.8, wall=31834 2021-06-19 03:29:31 | INFO | train_inner | epoch 001: 2876 / 3002 loss=3.02, ppl=8.11, wps=5745.2, ups=0.09, wpb=64720, bsz=128, num_updates=2854, lr=9.99852e-05, gnorm=2.513, loss_scale=1, train_wall=11, gb_free=2.8, wall=31845 2021-06-19 03:29:42 | INFO | train_inner | epoch 001: 2877 / 3002 loss=2.792, ppl=6.92, wps=5848.3, ups=0.09, wpb=64819, bsz=128, num_updates=2855, lr=9.99852e-05, gnorm=2.33, loss_scale=1, train_wall=11, gb_free=2.8, wall=31856 2021-06-19 03:29:53 | INFO | train_inner | epoch 001: 2878 / 3002 loss=2.809, ppl=7.01, wps=5907.1, ups=0.09, wpb=64847, bsz=128, num_updates=2856, lr=9.99852e-05, gnorm=2.365, loss_scale=1, train_wall=11, gb_free=2.8, wall=31867 2021-06-19 03:30:04 | INFO | train_inner | epoch 001: 2879 / 3002 loss=2.669, ppl=6.36, wps=5847.3, ups=0.09, wpb=64861, bsz=128, num_updates=2857, lr=9.99851e-05, gnorm=2.447, loss_scale=1, train_wall=11, gb_free=2.8, wall=31879 2021-06-19 03:30:15 | INFO | train_inner | epoch 001: 2880 / 3002 loss=2.864, ppl=7.28, wps=5775.6, ups=0.09, wpb=64856, bsz=128, num_updates=2858, lr=9.99851e-05, gnorm=2.322, loss_scale=1, train_wall=11, gb_free=2.8, wall=31890 2021-06-19 03:30:27 | INFO | train_inner | epoch 001: 2881 / 3002 loss=2.791, ppl=6.92, wps=5745.4, ups=0.09, wpb=64839, bsz=128, num_updates=2859, lr=9.99851e-05, gnorm=4.43, loss_scale=1, train_wall=11, gb_free=2.8, wall=31901 2021-06-19 03:30:38 | INFO | train_inner | epoch 001: 2882 / 3002 loss=2.922, ppl=7.58, wps=5829.9, ups=0.09, wpb=64731, bsz=128, num_updates=2860, lr=9.99851e-05, gnorm=2.334, loss_scale=1, train_wall=11, gb_free=2.8, wall=31912 2021-06-19 03:30:49 | INFO | train_inner | epoch 001: 2883 / 3002 loss=2.797, ppl=6.95, wps=5980.4, ups=0.09, wpb=64837, bsz=128, num_updates=2861, lr=9.99851e-05, gnorm=2.351, loss_scale=1, train_wall=10, gb_free=2.8, wall=31923 2021-06-19 03:31:00 | INFO | train_inner | epoch 001: 2884 / 3002 loss=2.852, ppl=7.22, wps=5846.1, ups=0.09, wpb=64795, bsz=128, num_updates=2862, lr=9.99851e-05, gnorm=2.869, loss_scale=1, train_wall=11, gb_free=2.8, wall=31934 2021-06-19 03:31:11 | INFO | train_inner | epoch 001: 2885 / 3002 loss=2.907, ppl=7.5, wps=5886.4, ups=0.09, wpb=64799, bsz=128, num_updates=2863, lr=9.99851e-05, gnorm=2.38, loss_scale=1, train_wall=11, gb_free=2.8, wall=31945 2021-06-19 03:31:22 | INFO | train_inner | epoch 001: 2886 / 3002 loss=2.84, ppl=7.16, wps=5884.8, ups=0.09, wpb=64823, bsz=128, num_updates=2864, lr=9.99851e-05, gnorm=2.338, loss_scale=1, train_wall=11, gb_free=2.8, wall=31956 2021-06-19 03:31:33 | INFO | train_inner | epoch 001: 2887 / 3002 loss=2.879, ppl=7.36, wps=5906.9, ups=0.09, wpb=64779, bsz=128, num_updates=2865, lr=9.99851e-05, gnorm=6.771, loss_scale=1, train_wall=11, gb_free=2.8, wall=31967 2021-06-19 03:31:44 | INFO | train_inner | epoch 001: 2888 / 3002 loss=2.926, ppl=7.6, wps=5893.6, ups=0.09, wpb=64819, bsz=128, num_updates=2866, lr=9.99851e-05, gnorm=2.376, loss_scale=1, train_wall=11, gb_free=2.8, wall=31978 2021-06-19 03:31:55 | INFO | train_inner | epoch 001: 2889 / 3002 loss=2.944, ppl=7.7, wps=5896.3, ups=0.09, wpb=64750, bsz=128, num_updates=2867, lr=9.99851e-05, gnorm=2.352, loss_scale=1, train_wall=11, gb_free=2.8, wall=31989 2021-06-19 03:32:06 | INFO | train_inner | epoch 001: 2890 / 3002 loss=3.051, ppl=8.29, wps=5849.7, ups=0.09, wpb=64783, bsz=128, num_updates=2868, lr=9.99851e-05, gnorm=2.469, loss_scale=1, train_wall=11, gb_free=2.8, wall=32000 2021-06-19 03:32:17 | INFO | train_inner | epoch 001: 2891 / 3002 loss=2.845, ppl=7.18, wps=5840.8, ups=0.09, wpb=64901, bsz=128, num_updates=2869, lr=9.9985e-05, gnorm=2.177, loss_scale=1, train_wall=11, gb_free=2.8, wall=32011 2021-06-19 03:32:28 | INFO | train_inner | epoch 001: 2892 / 3002 loss=2.928, ppl=7.61, wps=5783.3, ups=0.09, wpb=64782, bsz=128, num_updates=2870, lr=9.9985e-05, gnorm=4.007, loss_scale=1, train_wall=11, gb_free=2.8, wall=32022 2021-06-19 03:32:39 | INFO | train_inner | epoch 001: 2893 / 3002 loss=2.907, ppl=7.5, wps=5919.9, ups=0.09, wpb=64912, bsz=128, num_updates=2871, lr=9.9985e-05, gnorm=2.456, loss_scale=1, train_wall=11, gb_free=2.8, wall=32033 2021-06-19 03:32:50 | INFO | train_inner | epoch 001: 2894 / 3002 loss=2.906, ppl=7.5, wps=5764.3, ups=0.09, wpb=64885, bsz=128, num_updates=2872, lr=9.9985e-05, gnorm=2.346, loss_scale=1, train_wall=11, gb_free=2.8, wall=32045 2021-06-19 03:33:01 | INFO | train_inner | epoch 001: 2895 / 3002 loss=2.839, ppl=7.15, wps=5965.3, ups=0.09, wpb=64832, bsz=128, num_updates=2873, lr=9.9985e-05, gnorm=2.671, loss_scale=1, train_wall=10, gb_free=2.8, wall=32056 2021-06-19 03:33:12 | INFO | train_inner | epoch 001: 2896 / 3002 loss=3.044, ppl=8.24, wps=5798.4, ups=0.09, wpb=64737, bsz=128, num_updates=2874, lr=9.9985e-05, gnorm=6.355, loss_scale=1, train_wall=11, gb_free=2.8, wall=32067 2021-06-19 03:33:24 | INFO | train_inner | epoch 001: 2897 / 3002 loss=2.711, ppl=6.55, wps=5723, ups=0.09, wpb=64820, bsz=128, num_updates=2875, lr=9.9985e-05, gnorm=2.596, loss_scale=1, train_wall=11, gb_free=2.8, wall=32078 2021-06-19 03:33:35 | INFO | train_inner | epoch 001: 2898 / 3002 loss=2.74, ppl=6.68, wps=5764.6, ups=0.09, wpb=64829, bsz=128, num_updates=2876, lr=9.9985e-05, gnorm=2.559, loss_scale=1, train_wall=11, gb_free=2.8, wall=32089 2021-06-19 03:33:46 | INFO | train_inner | epoch 001: 2899 / 3002 loss=2.817, ppl=7.05, wps=5884.8, ups=0.09, wpb=64730, bsz=128, num_updates=2877, lr=9.9985e-05, gnorm=2.696, loss_scale=1, train_wall=11, gb_free=2.8, wall=32100 2021-06-19 03:33:57 | INFO | train_inner | epoch 001: 2900 / 3002 loss=2.96, ppl=7.78, wps=5818.3, ups=0.09, wpb=64733, bsz=128, num_updates=2878, lr=9.9985e-05, gnorm=2.221, loss_scale=1, train_wall=11, gb_free=2.8, wall=32111 2021-06-19 03:34:08 | INFO | train_inner | epoch 001: 2901 / 3002 loss=2.803, ppl=6.98, wps=5743.3, ups=0.09, wpb=64806, bsz=128, num_updates=2879, lr=9.9985e-05, gnorm=2.324, loss_scale=1, train_wall=11, gb_free=2.8, wall=32123 2021-06-19 03:34:19 | INFO | train_inner | epoch 001: 2902 / 3002 loss=2.862, ppl=7.27, wps=5911.5, ups=0.09, wpb=64856, bsz=128, num_updates=2880, lr=9.9985e-05, gnorm=2.456, loss_scale=1, train_wall=11, gb_free=2.8, wall=32134 2021-06-19 03:34:31 | INFO | train_inner | epoch 001: 2903 / 3002 loss=2.934, ppl=7.64, wps=5777.8, ups=0.09, wpb=64824, bsz=128, num_updates=2881, lr=9.9985e-05, gnorm=2.415, loss_scale=1, train_wall=11, gb_free=2.8, wall=32145 2021-06-19 03:34:42 | INFO | train_inner | epoch 001: 2904 / 3002 loss=2.807, ppl=7, wps=5817.8, ups=0.09, wpb=64833, bsz=128, num_updates=2882, lr=9.99849e-05, gnorm=3.174, loss_scale=1, train_wall=11, gb_free=2.8, wall=32156 2021-06-19 03:34:53 | INFO | train_inner | epoch 001: 2905 / 3002 loss=2.744, ppl=6.7, wps=5893.6, ups=0.09, wpb=64868, bsz=128, num_updates=2883, lr=9.99849e-05, gnorm=2.315, loss_scale=1, train_wall=11, gb_free=2.8, wall=32167 2021-06-19 03:35:04 | INFO | train_inner | epoch 001: 2906 / 3002 loss=2.998, ppl=7.99, wps=5919, ups=0.09, wpb=64812, bsz=128, num_updates=2884, lr=9.99849e-05, gnorm=2.368, loss_scale=1, train_wall=10, gb_free=2.8, wall=32178 2021-06-19 03:35:15 | INFO | train_inner | epoch 001: 2907 / 3002 loss=2.941, ppl=7.68, wps=5678.1, ups=0.09, wpb=64874, bsz=128, num_updates=2885, lr=9.99849e-05, gnorm=2.346, loss_scale=1, train_wall=11, gb_free=2.8, wall=32189 2021-06-19 03:35:26 | INFO | train_inner | epoch 001: 2908 / 3002 loss=2.851, ppl=7.22, wps=5884.7, ups=0.09, wpb=64861, bsz=128, num_updates=2886, lr=9.99849e-05, gnorm=2.482, loss_scale=1, train_wall=11, gb_free=2.8, wall=32200 2021-06-19 03:35:37 | INFO | train_inner | epoch 001: 2909 / 3002 loss=2.88, ppl=7.36, wps=5970.5, ups=0.09, wpb=64827, bsz=128, num_updates=2887, lr=9.99849e-05, gnorm=2.411, loss_scale=1, train_wall=10, gb_free=2.8, wall=32211 2021-06-19 03:35:48 | INFO | train_inner | epoch 001: 2910 / 3002 loss=2.941, ppl=7.68, wps=5886.2, ups=0.09, wpb=64759, bsz=128, num_updates=2888, lr=9.99849e-05, gnorm=2.505, loss_scale=1, train_wall=11, gb_free=2.8, wall=32222 2021-06-19 03:35:59 | INFO | train_inner | epoch 001: 2911 / 3002 loss=2.924, ppl=7.59, wps=5927.9, ups=0.09, wpb=64810, bsz=128, num_updates=2889, lr=9.99849e-05, gnorm=2.326, loss_scale=1, train_wall=10, gb_free=2.8, wall=32233 2021-06-19 03:36:10 | INFO | train_inner | epoch 001: 2912 / 3002 loss=2.773, ppl=6.84, wps=5950.1, ups=0.09, wpb=64788, bsz=128, num_updates=2890, lr=9.99849e-05, gnorm=2.323, loss_scale=1, train_wall=10, gb_free=2.8, wall=32244 2021-06-19 03:36:21 | INFO | train_inner | epoch 001: 2913 / 3002 loss=2.84, ppl=7.16, wps=5933.5, ups=0.09, wpb=64894, bsz=128, num_updates=2891, lr=9.99849e-05, gnorm=2.414, loss_scale=1, train_wall=10, gb_free=2.8, wall=32255 2021-06-19 03:36:31 | INFO | train_inner | epoch 001: 2914 / 3002 loss=2.774, ppl=6.84, wps=6011.8, ups=0.09, wpb=64862, bsz=128, num_updates=2892, lr=9.99849e-05, gnorm=2.268, loss_scale=1, train_wall=10, gb_free=2.8, wall=32266 2021-06-19 03:36:43 | INFO | train_inner | epoch 001: 2915 / 3002 loss=3.009, ppl=8.05, wps=5833, ups=0.09, wpb=64859, bsz=128, num_updates=2893, lr=9.99849e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=32277 2021-06-19 03:36:54 | INFO | train_inner | epoch 001: 2916 / 3002 loss=2.94, ppl=7.68, wps=5751, ups=0.09, wpb=64951, bsz=128, num_updates=2894, lr=9.99848e-05, gnorm=2.373, loss_scale=1, train_wall=11, gb_free=2.8, wall=32288 2021-06-19 03:37:05 | INFO | train_inner | epoch 001: 2917 / 3002 loss=2.721, ppl=6.59, wps=5902.2, ups=0.09, wpb=64875, bsz=128, num_updates=2895, lr=9.99848e-05, gnorm=2.395, loss_scale=1, train_wall=11, gb_free=2.8, wall=32299 2021-06-19 03:37:16 | INFO | train_inner | epoch 001: 2918 / 3002 loss=2.979, ppl=7.88, wps=5711.7, ups=0.09, wpb=64873, bsz=128, num_updates=2896, lr=9.99848e-05, gnorm=2.409, loss_scale=1, train_wall=11, gb_free=2.8, wall=32311 2021-06-19 03:37:28 | INFO | train_inner | epoch 001: 2919 / 3002 loss=2.968, ppl=7.83, wps=5644.5, ups=0.09, wpb=64809, bsz=128, num_updates=2897, lr=9.99848e-05, gnorm=2.402, loss_scale=1, train_wall=11, gb_free=2.8, wall=32322 2021-06-19 03:37:39 | INFO | train_inner | epoch 001: 2920 / 3002 loss=2.657, ppl=6.31, wps=5891.5, ups=0.09, wpb=64783, bsz=128, num_updates=2898, lr=9.99848e-05, gnorm=72.113, loss_scale=1, train_wall=11, gb_free=2.8, wall=32333 2021-06-19 03:37:50 | INFO | train_inner | epoch 001: 2921 / 3002 loss=2.816, ppl=7.04, wps=5830.2, ups=0.09, wpb=64836, bsz=128, num_updates=2899, lr=9.99848e-05, gnorm=2.413, loss_scale=1, train_wall=11, gb_free=2.8, wall=32344 2021-06-19 03:38:01 | INFO | train_inner | epoch 001: 2922 / 3002 loss=2.917, ppl=7.55, wps=5860.7, ups=0.09, wpb=64794, bsz=128, num_updates=2900, lr=9.99848e-05, gnorm=2.673, loss_scale=1, train_wall=11, gb_free=2.8, wall=32355 2021-06-19 03:38:12 | INFO | train_inner | epoch 001: 2923 / 3002 loss=2.873, ppl=7.32, wps=5895.6, ups=0.09, wpb=64807, bsz=128, num_updates=2901, lr=9.99848e-05, gnorm=2.363, loss_scale=1, train_wall=11, gb_free=2.8, wall=32366 2021-06-19 03:38:23 | INFO | train_inner | epoch 001: 2924 / 3002 loss=3.037, ppl=8.21, wps=5924.1, ups=0.09, wpb=64820, bsz=128, num_updates=2902, lr=9.99848e-05, gnorm=2.54, loss_scale=1, train_wall=10, gb_free=2.8, wall=32377 2021-06-19 03:38:34 | INFO | train_inner | epoch 001: 2925 / 3002 loss=2.869, ppl=7.31, wps=5824.9, ups=0.09, wpb=64825, bsz=128, num_updates=2903, lr=9.99848e-05, gnorm=2.508, loss_scale=1, train_wall=11, gb_free=2.8, wall=32388 2021-06-19 03:38:45 | INFO | train_inner | epoch 001: 2926 / 3002 loss=2.797, ppl=6.95, wps=5884.1, ups=0.09, wpb=64745, bsz=128, num_updates=2904, lr=9.99848e-05, gnorm=2.569, loss_scale=1, train_wall=11, gb_free=2.8, wall=32399 2021-06-19 03:38:56 | INFO | train_inner | epoch 001: 2927 / 3002 loss=2.919, ppl=7.57, wps=5793.3, ups=0.09, wpb=64822, bsz=128, num_updates=2905, lr=9.99848e-05, gnorm=2.478, loss_scale=1, train_wall=11, gb_free=2.8, wall=32410 2021-06-19 03:39:07 | INFO | train_inner | epoch 001: 2928 / 3002 loss=2.799, ppl=6.96, wps=5723.2, ups=0.09, wpb=64826, bsz=128, num_updates=2906, lr=9.99848e-05, gnorm=2.918, loss_scale=1, train_wall=11, gb_free=2.8, wall=32422 2021-06-19 03:39:18 | INFO | train_inner | epoch 001: 2929 / 3002 loss=2.784, ppl=6.89, wps=5895, ups=0.09, wpb=64829, bsz=128, num_updates=2907, lr=9.99847e-05, gnorm=2.442, loss_scale=1, train_wall=11, gb_free=2.8, wall=32433 2021-06-19 03:39:30 | INFO | train_inner | epoch 001: 2930 / 3002 loss=2.778, ppl=6.86, wps=5721.2, ups=0.09, wpb=64868, bsz=128, num_updates=2908, lr=9.99847e-05, gnorm=2.571, loss_scale=2, train_wall=11, gb_free=2.8, wall=32444 2021-06-19 03:39:41 | INFO | train_inner | epoch 001: 2931 / 3002 loss=2.934, ppl=7.64, wps=5863.4, ups=0.09, wpb=64800, bsz=128, num_updates=2909, lr=9.99847e-05, gnorm=2.625, loss_scale=2, train_wall=11, gb_free=2.8, wall=32455 2021-06-19 03:39:52 | INFO | train_inner | epoch 001: 2932 / 3002 loss=2.674, ppl=6.38, wps=5891.5, ups=0.09, wpb=64864, bsz=128, num_updates=2910, lr=9.99847e-05, gnorm=2.698, loss_scale=2, train_wall=11, gb_free=2.8, wall=32466 2021-06-19 03:40:03 | INFO | train_inner | epoch 001: 2933 / 3002 loss=2.9, ppl=7.47, wps=5768.8, ups=0.09, wpb=64890, bsz=128, num_updates=2911, lr=9.99847e-05, gnorm=2.477, loss_scale=2, train_wall=11, gb_free=2.8, wall=32477 2021-06-19 03:40:14 | INFO | train_inner | epoch 001: 2934 / 3002 loss=2.779, ppl=6.87, wps=5784, ups=0.09, wpb=64758, bsz=128, num_updates=2912, lr=9.99847e-05, gnorm=2.477, loss_scale=2, train_wall=11, gb_free=2.8, wall=32489 2021-06-19 03:40:25 | INFO | train_inner | epoch 001: 2935 / 3002 loss=2.934, ppl=7.64, wps=5988.3, ups=0.09, wpb=64896, bsz=128, num_updates=2913, lr=9.99847e-05, gnorm=2.536, loss_scale=2, train_wall=10, gb_free=2.8, wall=32499 2021-06-19 03:40:36 | INFO | train_inner | epoch 001: 2936 / 3002 loss=2.929, ppl=7.62, wps=5774, ups=0.09, wpb=64814, bsz=128, num_updates=2914, lr=9.99847e-05, gnorm=2.544, loss_scale=2, train_wall=11, gb_free=2.8, wall=32511 2021-06-19 03:40:48 | INFO | train_inner | epoch 001: 2937 / 3002 loss=2.843, ppl=7.17, wps=5792.6, ups=0.09, wpb=64804, bsz=128, num_updates=2915, lr=9.99847e-05, gnorm=16.926, loss_scale=2, train_wall=11, gb_free=2.8, wall=32522 2021-06-19 03:40:59 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 2021-06-19 03:41:10 | INFO | train_inner | epoch 001: 2939 / 3002 loss=2.918, ppl=7.56, wps=2941.8, ups=0.05, wpb=64782, bsz=128, num_updates=2916, lr=9.99847e-05, gnorm=2.486, loss_scale=1, train_wall=21, gb_free=2.8, wall=32544 2021-06-19 03:41:20 | INFO | train_inner | epoch 001: 2940 / 3002 loss=2.853, ppl=7.22, wps=5931.8, ups=0.09, wpb=64739, bsz=128, num_updates=2917, lr=9.99847e-05, gnorm=2.538, loss_scale=1, train_wall=10, gb_free=2.8, wall=32555 2021-06-19 03:41:32 | INFO | train_inner | epoch 001: 2941 / 3002 loss=2.982, ppl=7.9, wps=5804, ups=0.09, wpb=64812, bsz=128, num_updates=2918, lr=9.99847e-05, gnorm=2.475, loss_scale=1, train_wall=11, gb_free=2.8, wall=32566 2021-06-19 03:41:43 | INFO | train_inner | epoch 001: 2942 / 3002 loss=2.902, ppl=7.47, wps=5828.6, ups=0.09, wpb=64826, bsz=128, num_updates=2919, lr=9.99846e-05, gnorm=2.618, loss_scale=1, train_wall=11, gb_free=2.8, wall=32577 2021-06-19 03:41:54 | INFO | train_inner | epoch 001: 2943 / 3002 loss=2.954, ppl=7.75, wps=5943.7, ups=0.09, wpb=64781, bsz=128, num_updates=2920, lr=9.99846e-05, gnorm=2.444, loss_scale=1, train_wall=10, gb_free=2.8, wall=32588 2021-06-19 03:42:04 | INFO | train_inner | epoch 001: 2944 / 3002 loss=2.673, ppl=6.38, wps=6022.1, ups=0.09, wpb=64821, bsz=128, num_updates=2921, lr=9.99846e-05, gnorm=2.834, loss_scale=1, train_wall=10, gb_free=2.8, wall=32599 2021-06-19 03:42:16 | INFO | train_inner | epoch 001: 2945 / 3002 loss=2.967, ppl=7.82, wps=5793.1, ups=0.09, wpb=64837, bsz=128, num_updates=2922, lr=9.99846e-05, gnorm=2.562, loss_scale=1, train_wall=11, gb_free=2.8, wall=32610 2021-06-19 03:42:27 | INFO | train_inner | epoch 001: 2946 / 3002 loss=2.997, ppl=7.98, wps=5822.1, ups=0.09, wpb=64761, bsz=128, num_updates=2923, lr=9.99846e-05, gnorm=2.345, loss_scale=1, train_wall=11, gb_free=2.8, wall=32621 2021-06-19 03:42:38 | INFO | train_inner | epoch 001: 2947 / 3002 loss=2.851, ppl=7.21, wps=5914.2, ups=0.09, wpb=64825, bsz=128, num_updates=2924, lr=9.99846e-05, gnorm=2.524, loss_scale=1, train_wall=10, gb_free=2.8, wall=32632 2021-06-19 03:42:49 | INFO | train_inner | epoch 001: 2948 / 3002 loss=2.897, ppl=7.45, wps=5865.1, ups=0.09, wpb=64834, bsz=128, num_updates=2925, lr=9.99846e-05, gnorm=2.363, loss_scale=1, train_wall=11, gb_free=2.8, wall=32643 2021-06-19 03:43:00 | INFO | train_inner | epoch 001: 2949 / 3002 loss=2.893, ppl=7.43, wps=5980.5, ups=0.09, wpb=64822, bsz=128, num_updates=2926, lr=9.99846e-05, gnorm=2.503, loss_scale=1, train_wall=10, gb_free=2.8, wall=32654 2021-06-19 03:43:11 | INFO | train_inner | epoch 001: 2950 / 3002 loss=2.838, ppl=7.15, wps=5902.7, ups=0.09, wpb=64887, bsz=128, num_updates=2927, lr=9.99846e-05, gnorm=2.59, loss_scale=1, train_wall=11, gb_free=2.8, wall=32665 2021-06-19 03:43:22 | INFO | train_inner | epoch 001: 2951 / 3002 loss=2.857, ppl=7.25, wps=5809.1, ups=0.09, wpb=64863, bsz=128, num_updates=2928, lr=9.99846e-05, gnorm=2.463, loss_scale=1, train_wall=11, gb_free=2.8, wall=32676 2021-06-19 03:43:33 | INFO | train_inner | epoch 001: 2952 / 3002 loss=3.059, ppl=8.33, wps=5953, ups=0.09, wpb=64887, bsz=128, num_updates=2929, lr=9.99846e-05, gnorm=2.456, loss_scale=1, train_wall=10, gb_free=2.8, wall=32687 2021-06-19 03:43:44 | INFO | train_inner | epoch 001: 2953 / 3002 loss=2.827, ppl=7.09, wps=5723.8, ups=0.09, wpb=64748, bsz=128, num_updates=2930, lr=9.99846e-05, gnorm=2.378, loss_scale=1, train_wall=11, gb_free=2.8, wall=32698 2021-06-19 03:43:55 | INFO | train_inner | epoch 001: 2954 / 3002 loss=2.786, ppl=6.89, wps=5856.8, ups=0.09, wpb=64872, bsz=128, num_updates=2931, lr=9.99846e-05, gnorm=2.285, loss_scale=1, train_wall=11, gb_free=2.8, wall=32709 2021-06-19 03:44:06 | INFO | train_inner | epoch 001: 2955 / 3002 loss=2.974, ppl=7.86, wps=5802.2, ups=0.09, wpb=64827, bsz=128, num_updates=2932, lr=9.99845e-05, gnorm=2.412, loss_scale=1, train_wall=11, gb_free=2.8, wall=32721 2021-06-19 03:44:17 | INFO | train_inner | epoch 001: 2956 / 3002 loss=2.845, ppl=7.19, wps=5935, ups=0.09, wpb=64907, bsz=128, num_updates=2933, lr=9.99845e-05, gnorm=2.268, loss_scale=1, train_wall=10, gb_free=2.8, wall=32732 2021-06-19 03:44:28 | INFO | train_inner | epoch 001: 2957 / 3002 loss=2.817, ppl=7.05, wps=5869.2, ups=0.09, wpb=64844, bsz=128, num_updates=2934, lr=9.99845e-05, gnorm=2.403, loss_scale=1, train_wall=11, gb_free=2.8, wall=32743 2021-06-19 03:44:39 | INFO | train_inner | epoch 001: 2958 / 3002 loss=2.937, ppl=7.66, wps=5797.3, ups=0.09, wpb=64880, bsz=128, num_updates=2935, lr=9.99845e-05, gnorm=2.382, loss_scale=1, train_wall=11, gb_free=2.8, wall=32754 2021-06-19 03:44:50 | INFO | train_inner | epoch 001: 2959 / 3002 loss=2.991, ppl=7.95, wps=5876.2, ups=0.09, wpb=64822, bsz=128, num_updates=2936, lr=9.99845e-05, gnorm=2.426, loss_scale=1, train_wall=11, gb_free=2.8, wall=32765 2021-06-19 03:45:01 | INFO | train_inner | epoch 001: 2960 / 3002 loss=2.864, ppl=7.28, wps=6013.8, ups=0.09, wpb=64902, bsz=128, num_updates=2937, lr=9.99845e-05, gnorm=2.593, loss_scale=1, train_wall=10, gb_free=2.8, wall=32776 2021-06-19 03:45:12 | INFO | train_inner | epoch 001: 2961 / 3002 loss=2.729, ppl=6.63, wps=5929.6, ups=0.09, wpb=64764, bsz=128, num_updates=2938, lr=9.99845e-05, gnorm=2.336, loss_scale=1, train_wall=10, gb_free=2.8, wall=32787 2021-06-19 03:45:23 | INFO | train_inner | epoch 001: 2962 / 3002 loss=2.901, ppl=7.47, wps=5808.9, ups=0.09, wpb=64792, bsz=128, num_updates=2939, lr=9.99845e-05, gnorm=2.264, loss_scale=1, train_wall=11, gb_free=2.8, wall=32798 2021-06-19 03:45:34 | INFO | train_inner | epoch 001: 2963 / 3002 loss=2.887, ppl=7.4, wps=5931.4, ups=0.09, wpb=64800, bsz=128, num_updates=2940, lr=9.99845e-05, gnorm=2.421, loss_scale=1, train_wall=10, gb_free=2.8, wall=32809 2021-06-19 03:45:45 | INFO | train_inner | epoch 001: 2964 / 3002 loss=2.92, ppl=7.57, wps=5786.2, ups=0.09, wpb=64795, bsz=128, num_updates=2941, lr=9.99845e-05, gnorm=2.702, loss_scale=1, train_wall=11, gb_free=2.8, wall=32820 2021-06-19 03:45:57 | INFO | train_inner | epoch 001: 2965 / 3002 loss=2.846, ppl=7.19, wps=5817.3, ups=0.09, wpb=64875, bsz=128, num_updates=2942, lr=9.99845e-05, gnorm=2.408, loss_scale=1, train_wall=11, gb_free=2.8, wall=32831 2021-06-19 03:46:08 | INFO | train_inner | epoch 001: 2966 / 3002 loss=2.634, ppl=6.21, wps=5790.8, ups=0.09, wpb=64871, bsz=128, num_updates=2943, lr=9.99845e-05, gnorm=2.305, loss_scale=1, train_wall=11, gb_free=2.8, wall=32842 2021-06-19 03:46:19 | INFO | train_inner | epoch 001: 2967 / 3002 loss=2.798, ppl=6.96, wps=5885.4, ups=0.09, wpb=64799, bsz=128, num_updates=2944, lr=9.99844e-05, gnorm=2.48, loss_scale=1, train_wall=11, gb_free=2.8, wall=32853 2021-06-19 03:46:30 | INFO | train_inner | epoch 001: 2968 / 3002 loss=2.776, ppl=6.85, wps=5739.3, ups=0.09, wpb=64815, bsz=128, num_updates=2945, lr=9.99844e-05, gnorm=2.453, loss_scale=1, train_wall=11, gb_free=2.8, wall=32864 2021-06-19 03:46:41 | INFO | train_inner | epoch 001: 2969 / 3002 loss=2.838, ppl=7.15, wps=5894.2, ups=0.09, wpb=64818, bsz=128, num_updates=2946, lr=9.99844e-05, gnorm=2.449, loss_scale=1, train_wall=11, gb_free=2.8, wall=32875 2021-06-19 03:46:52 | INFO | train_inner | epoch 001: 2970 / 3002 loss=2.83, ppl=7.11, wps=5842.5, ups=0.09, wpb=64803, bsz=128, num_updates=2947, lr=9.99844e-05, gnorm=2.364, loss_scale=1, train_wall=11, gb_free=2.8, wall=32887 2021-06-19 03:47:03 | INFO | train_inner | epoch 001: 2971 / 3002 loss=2.869, ppl=7.31, wps=5767.9, ups=0.09, wpb=64787, bsz=128, num_updates=2948, lr=9.99844e-05, gnorm=2.441, loss_scale=1, train_wall=11, gb_free=2.8, wall=32898 2021-06-19 03:47:15 | INFO | train_inner | epoch 001: 2972 / 3002 loss=2.831, ppl=7.11, wps=5758.5, ups=0.09, wpb=64841, bsz=128, num_updates=2949, lr=9.99844e-05, gnorm=2.464, loss_scale=1, train_wall=11, gb_free=2.8, wall=32909 2021-06-19 03:47:26 | INFO | train_inner | epoch 001: 2973 / 3002 loss=2.779, ppl=6.86, wps=5887.9, ups=0.09, wpb=64805, bsz=128, num_updates=2950, lr=9.99844e-05, gnorm=2.622, loss_scale=1, train_wall=11, gb_free=2.8, wall=32920 2021-06-19 03:47:37 | INFO | train_inner | epoch 001: 2974 / 3002 loss=2.779, ppl=6.86, wps=5941.9, ups=0.09, wpb=64809, bsz=128, num_updates=2951, lr=9.99844e-05, gnorm=3.213, loss_scale=1, train_wall=10, gb_free=2.8, wall=32931 2021-06-19 03:47:48 | INFO | train_inner | epoch 001: 2975 / 3002 loss=2.923, ppl=7.59, wps=5787.2, ups=0.09, wpb=64767, bsz=128, num_updates=2952, lr=9.99844e-05, gnorm=2.386, loss_scale=1, train_wall=11, gb_free=2.8, wall=32942 2021-06-19 03:47:59 | INFO | train_inner | epoch 001: 2976 / 3002 loss=2.874, ppl=7.33, wps=5746.1, ups=0.09, wpb=64807, bsz=128, num_updates=2953, lr=9.99844e-05, gnorm=2.457, loss_scale=1, train_wall=11, gb_free=2.8, wall=32953 2021-06-19 03:48:10 | INFO | train_inner | epoch 001: 2977 / 3002 loss=2.891, ppl=7.42, wps=5778.3, ups=0.09, wpb=64783, bsz=128, num_updates=2954, lr=9.99844e-05, gnorm=2.336, loss_scale=1, train_wall=11, gb_free=2.8, wall=32965 2021-06-19 03:48:21 | INFO | train_inner | epoch 001: 2978 / 3002 loss=2.82, ppl=7.06, wps=5878.4, ups=0.09, wpb=64909, bsz=128, num_updates=2955, lr=9.99844e-05, gnorm=2.261, loss_scale=1, train_wall=11, gb_free=2.8, wall=32976 2021-06-19 03:48:32 | INFO | train_inner | epoch 001: 2979 / 3002 loss=2.928, ppl=7.61, wps=5851.9, ups=0.09, wpb=64846, bsz=128, num_updates=2956, lr=9.99844e-05, gnorm=2.406, loss_scale=1, train_wall=11, gb_free=2.8, wall=32987 2021-06-19 03:48:44 | INFO | train_inner | epoch 001: 2980 / 3002 loss=2.754, ppl=6.75, wps=5783.8, ups=0.09, wpb=64819, bsz=128, num_updates=2957, lr=9.99843e-05, gnorm=2.319, loss_scale=1, train_wall=11, gb_free=2.8, wall=32998 2021-06-19 03:48:55 | INFO | train_inner | epoch 001: 2981 / 3002 loss=2.847, ppl=7.2, wps=5846.3, ups=0.09, wpb=64850, bsz=128, num_updates=2958, lr=9.99843e-05, gnorm=2.381, loss_scale=1, train_wall=11, gb_free=2.8, wall=33009 2021-06-19 03:49:05 | INFO | train_inner | epoch 001: 2982 / 3002 loss=2.977, ppl=7.87, wps=6053.9, ups=0.09, wpb=64902, bsz=128, num_updates=2959, lr=9.99843e-05, gnorm=2.363, loss_scale=1, train_wall=10, gb_free=2.8, wall=33020 2021-06-19 03:49:16 | INFO | train_inner | epoch 001: 2983 / 3002 loss=2.799, ppl=6.96, wps=5961.9, ups=0.09, wpb=64865, bsz=128, num_updates=2960, lr=9.99843e-05, gnorm=2.392, loss_scale=1, train_wall=10, gb_free=2.8, wall=33031 2021-06-19 03:49:27 | INFO | train_inner | epoch 001: 2984 / 3002 loss=2.757, ppl=6.76, wps=5909.7, ups=0.09, wpb=64855, bsz=128, num_updates=2961, lr=9.99843e-05, gnorm=2.509, loss_scale=1, train_wall=11, gb_free=2.8, wall=33042 2021-06-19 03:49:38 | INFO | train_inner | epoch 001: 2985 / 3002 loss=2.591, ppl=6.02, wps=5822.8, ups=0.09, wpb=64848, bsz=128, num_updates=2962, lr=9.99843e-05, gnorm=2.27, loss_scale=1, train_wall=11, gb_free=2.8, wall=33053 2021-06-19 03:49:50 | INFO | train_inner | epoch 001: 2986 / 3002 loss=2.863, ppl=7.27, wps=5798.3, ups=0.09, wpb=64825, bsz=128, num_updates=2963, lr=9.99843e-05, gnorm=2.418, loss_scale=1, train_wall=11, gb_free=2.8, wall=33064 2021-06-19 03:50:01 | INFO | train_inner | epoch 001: 2987 / 3002 loss=2.879, ppl=7.36, wps=5862, ups=0.09, wpb=64834, bsz=128, num_updates=2964, lr=9.99843e-05, gnorm=4.748, loss_scale=1, train_wall=11, gb_free=2.8, wall=33075 2021-06-19 03:50:12 | INFO | train_inner | epoch 001: 2988 / 3002 loss=2.954, ppl=7.75, wps=5823.7, ups=0.09, wpb=64811, bsz=128, num_updates=2965, lr=9.99843e-05, gnorm=2.289, loss_scale=1, train_wall=11, gb_free=2.8, wall=33086 2021-06-19 03:50:23 | INFO | train_inner | epoch 001: 2989 / 3002 loss=2.858, ppl=7.25, wps=5810.8, ups=0.09, wpb=64744, bsz=128, num_updates=2966, lr=9.99843e-05, gnorm=2.329, loss_scale=1, train_wall=11, gb_free=2.8, wall=33097 2021-06-19 03:50:34 | INFO | train_inner | epoch 001: 2990 / 3002 loss=2.823, ppl=7.08, wps=5859.6, ups=0.09, wpb=64768, bsz=128, num_updates=2967, lr=9.99843e-05, gnorm=2.525, loss_scale=1, train_wall=11, gb_free=2.8, wall=33108 2021-06-19 03:50:45 | INFO | train_inner | epoch 001: 2991 / 3002 loss=2.895, ppl=7.44, wps=5903.9, ups=0.09, wpb=64837, bsz=128, num_updates=2968, lr=9.99843e-05, gnorm=2.299, loss_scale=1, train_wall=11, gb_free=2.8, wall=33119 2021-06-19 03:50:56 | INFO | train_inner | epoch 001: 2992 / 3002 loss=3.091, ppl=8.52, wps=5767.4, ups=0.09, wpb=64819, bsz=128, num_updates=2969, lr=9.99842e-05, gnorm=2.315, loss_scale=1, train_wall=11, gb_free=2.8, wall=33131 2021-06-19 03:51:07 | INFO | train_inner | epoch 001: 2993 / 3002 loss=2.952, ppl=7.74, wps=5902.2, ups=0.09, wpb=64854, bsz=128, num_updates=2970, lr=9.99842e-05, gnorm=2.414, loss_scale=1, train_wall=11, gb_free=2.8, wall=33142 2021-06-19 03:51:18 | INFO | train_inner | epoch 001: 2994 / 3002 loss=2.83, ppl=7.11, wps=5825, ups=0.09, wpb=64895, bsz=128, num_updates=2971, lr=9.99842e-05, gnorm=2.31, loss_scale=1, train_wall=11, gb_free=2.8, wall=33153 2021-06-19 03:51:29 | INFO | train_inner | epoch 001: 2995 / 3002 loss=2.963, ppl=7.8, wps=5852.6, ups=0.09, wpb=64777, bsz=128, num_updates=2972, lr=9.99842e-05, gnorm=2.438, loss_scale=1, train_wall=11, gb_free=2.8, wall=33164 2021-06-19 03:51:40 | INFO | train_inner | epoch 001: 2996 / 3002 loss=2.981, ppl=7.9, wps=5882.9, ups=0.09, wpb=64877, bsz=128, num_updates=2973, lr=9.99842e-05, gnorm=2.382, loss_scale=1, train_wall=11, gb_free=2.8, wall=33175 2021-06-19 03:51:51 | INFO | train_inner | epoch 001: 2997 / 3002 loss=2.887, ppl=7.4, wps=5870.5, ups=0.09, wpb=64867, bsz=128, num_updates=2974, lr=9.99842e-05, gnorm=2.235, loss_scale=1, train_wall=11, gb_free=2.8, wall=33186 2021-06-19 03:52:02 | INFO | train_inner | epoch 001: 2998 / 3002 loss=2.845, ppl=7.18, wps=5931.2, ups=0.09, wpb=64865, bsz=128, num_updates=2975, lr=9.99842e-05, gnorm=2.266, loss_scale=1, train_wall=10, gb_free=2.8, wall=33197 2021-06-19 03:52:14 | INFO | train_inner | epoch 001: 2999 / 3002 loss=2.772, ppl=6.83, wps=5746.4, ups=0.09, wpb=64811, bsz=128, num_updates=2976, lr=9.99842e-05, gnorm=2.27, loss_scale=1, train_wall=11, gb_free=2.8, wall=33208 2021-06-19 03:52:25 | INFO | train_inner | epoch 001: 3000 / 3002 loss=2.727, ppl=6.62, wps=5921.5, ups=0.09, wpb=64831, bsz=128, num_updates=2977, lr=9.99842e-05, gnorm=2.179, loss_scale=1, train_wall=10, gb_free=2.8, wall=33219 2021-06-19 03:52:36 | INFO | train_inner | epoch 001: 3001 / 3002 loss=2.723, ppl=6.6, wps=5817.7, ups=0.09, wpb=64783, bsz=128, num_updates=2978, lr=9.99842e-05, gnorm=2.285, loss_scale=1, train_wall=11, gb_free=2.8, wall=33230 2021-06-19 03:52:42 | INFO | train_inner | epoch 001: 3002 / 3002 loss=2.979, ppl=7.89, wps=5835.1, ups=0.16, wpb=36447, bsz=72, num_updates=2979, lr=9.99842e-05, gnorm=3.173, loss_scale=1, train_wall=6, gb_free=2.8, wall=33236 2021-06-19 03:52:42 | INFO | fairseq_cli.train | begin validation on "valid" subset 2021-06-19 04:07:33 | INFO | valid | epoch 001 | valid on 'valid' subset | loss 2.7 | ppl 6.5 | wps 19818.9 | wpb 506.5 | bsz 1 | num_updates 2979 2021-06-19 04:07:33 | INFO | fairseq.checkpoint_utils | Preparing to save checkpoint for epoch 1 @ 2979 updates 2021-06-19 04:07:33 | INFO | fairseq.trainer | Saving checkpoint to checkpoints/checkpoint1.pt 2021-06-19 04:07:47 | INFO | fairseq.trainer | Finished saving checkpoint to checkpoints/checkpoint1.pt 2021-06-19 04:10:24 | INFO | fairseq.checkpoint_utils | Saved checkpoint checkpoints/checkpoint1.pt (epoch 1 @ 2979 updates, score 2.7) (writing took 171.51024247800524 seconds) 2021-06-19 04:10:24 | INFO | fairseq_cli.train | end of epoch 1 (average epoch stats below) 2021-06-19 04:10:24 | INFO | train | epoch 001 | loss 3.084 | ppl 8.48 | wps 5640.6 | ups 0.09 | wpb 64819.4 | bsz 128 | num_updates 2979 | lr 9.99842e-05 | gnorm 3.456 | loss_scale 1 | train_wall 31865 | gb_free 2.8 | wall 34298 2021-06-19 04:10:24 | INFO | fairseq.trainer | begin training epoch 2 2021-06-19 04:10:24 | INFO | fairseq_cli.train | Start iterating over samples 2021-06-19 04:10:35 | INFO | train_inner | epoch 002: 1 / 3002 loss=2.719, ppl=6.59, wps=60.3, ups=0, wpb=64738, bsz=128, num_updates=2980, lr=9.99842e-05, gnorm=2.404, loss_scale=1, train_wall=10, gb_free=2.8, wall=34309 2021-06-19 04:10:46 | INFO | train_inner | epoch 002: 2 / 3002 loss=2.681, ppl=6.41, wps=5934.7, ups=0.09, wpb=64845, bsz=128, num_updates=2981, lr=9.99842e-05, gnorm=2.33, loss_scale=1, train_wall=10, gb_free=2.8, wall=34320 2021-06-19 04:10:57 | INFO | train_inner | epoch 002: 3 / 3002 loss=2.763, ppl=6.79, wps=5966.7, ups=0.09, wpb=64892, bsz=128, num_updates=2982, lr=9.99841e-05, gnorm=2.417, loss_scale=1, train_wall=10, gb_free=2.8, wall=34331 2021-06-19 04:11:08 | INFO | train_inner | epoch 002: 4 / 3002 loss=2.893, ppl=7.43, wps=6039.8, ups=0.09, wpb=64824, bsz=128, num_updates=2983, lr=9.99841e-05, gnorm=2.373, loss_scale=1, train_wall=10, gb_free=2.8, wall=34342 2021-06-19 04:11:19 | INFO | train_inner | epoch 002: 5 / 3002 loss=2.813, ppl=7.03, wps=5902.8, ups=0.09, wpb=64913, bsz=128, num_updates=2984, lr=9.99841e-05, gnorm=2.482, loss_scale=1, train_wall=10, gb_free=2.8, wall=34353 2021-06-19 04:11:29 | INFO | train_inner | epoch 002: 6 / 3002 loss=2.763, ppl=6.79, wps=6029.1, ups=0.09, wpb=64879, bsz=128, num_updates=2985, lr=9.99841e-05, gnorm=2.247, loss_scale=1, train_wall=10, gb_free=2.8, wall=34364 2021-06-19 04:11:40 | INFO | train_inner | epoch 002: 7 / 3002 loss=2.792, ppl=6.92, wps=6039.6, ups=0.09, wpb=64804, bsz=128, num_updates=2986, lr=9.99841e-05, gnorm=2.369, loss_scale=1, train_wall=10, gb_free=2.8, wall=34375 2021-06-19 04:11:51 | INFO | train_inner | epoch 002: 8 / 3002 loss=2.77, ppl=6.82, wps=5991.4, ups=0.09, wpb=64764, bsz=128, num_updates=2987, lr=9.99841e-05, gnorm=2.568, loss_scale=1, train_wall=10, gb_free=2.8, wall=34385 2021-06-19 04:12:02 | INFO | train_inner | epoch 002: 9 / 3002 loss=2.822, ppl=7.07, wps=6013.2, ups=0.09, wpb=64792, bsz=128, num_updates=2988, lr=9.99841e-05, gnorm=2.42, loss_scale=1, train_wall=10, gb_free=2.8, wall=34396 2021-06-19 04:12:13 | INFO | train_inner | epoch 002: 10 / 3002 loss=2.905, ppl=7.49, wps=5978.8, ups=0.09, wpb=64763, bsz=128, num_updates=2989, lr=9.99841e-05, gnorm=2.263, loss_scale=1, train_wall=10, gb_free=2.8, wall=34407 2021-06-19 04:12:24 | INFO | train_inner | epoch 002: 11 / 3002 loss=2.929, ppl=7.62, wps=5796.3, ups=0.09, wpb=64776, bsz=128, num_updates=2990, lr=9.99841e-05, gnorm=2.487, loss_scale=1, train_wall=11, gb_free=2.8, wall=34418 2021-06-19 04:12:35 | INFO | train_inner | epoch 002: 12 / 3002 loss=2.876, ppl=7.34, wps=5848.8, ups=0.09, wpb=64880, bsz=128, num_updates=2991, lr=9.99841e-05, gnorm=2.283, loss_scale=1, train_wall=11, gb_free=2.8, wall=34429 2021-06-19 04:12:46 | INFO | train_inner | epoch 002: 13 / 3002 loss=2.759, ppl=6.77, wps=6036.2, ups=0.09, wpb=64829, bsz=128, num_updates=2992, lr=9.99841e-05, gnorm=2.194, loss_scale=1, train_wall=10, gb_free=2.8, wall=34440 2021-06-19 04:12:57 | INFO | train_inner | epoch 002: 14 / 3002 loss=2.946, ppl=7.71, wps=5802.9, ups=0.09, wpb=64808, bsz=128, num_updates=2993, lr=9.99841e-05, gnorm=7.363, loss_scale=1, train_wall=11, gb_free=2.8, wall=34451 2021-06-19 04:13:08 | INFO | train_inner | epoch 002: 15 / 3002 loss=2.838, ppl=7.15, wps=5908.3, ups=0.09, wpb=64788, bsz=128, num_updates=2994, lr=9.9984e-05, gnorm=2.432, loss_scale=1, train_wall=11, gb_free=2.8, wall=34462 2021-06-19 04:13:19 | INFO | train_inner | epoch 002: 16 / 3002 loss=2.784, ppl=6.89, wps=5916, ups=0.09, wpb=64840, bsz=128, num_updates=2995, lr=9.9984e-05, gnorm=2.535, loss_scale=1, train_wall=11, gb_free=2.8, wall=34473 2021-06-19 04:13:30 | INFO | train_inner | epoch 002: 17 / 3002 loss=2.77, ppl=6.82, wps=5874.5, ups=0.09, wpb=64828, bsz=128, num_updates=2996, lr=9.9984e-05, gnorm=2.27, loss_scale=1, train_wall=11, gb_free=2.8, wall=34484 2021-06-19 04:13:41 | INFO | train_inner | epoch 002: 18 / 3002 loss=3.046, ppl=8.26, wps=5787.6, ups=0.09, wpb=64788, bsz=128, num_updates=2997, lr=9.9984e-05, gnorm=2.402, loss_scale=1, train_wall=11, gb_free=2.8, wall=34495 2021-06-19 04:13:52 | INFO | train_inner | epoch 002: 19 / 3002 loss=2.898, ppl=7.46, wps=5709.7, ups=0.09, wpb=64742, bsz=128, num_updates=2998, lr=9.9984e-05, gnorm=2.434, loss_scale=1, train_wall=11, gb_free=2.8, wall=34507 2021-06-19 04:14:03 | INFO | train_inner | epoch 002: 20 / 3002 loss=3.067, ppl=8.38, wps=5869.2, ups=0.09, wpb=64851, bsz=128, num_updates=2999, lr=9.9984e-05, gnorm=2.886, loss_scale=1, train_wall=11, gb_free=2.8, wall=34518 2021-06-19 04:14:14 | INFO | train_inner | epoch 002: 21 / 3002 loss=2.846, ppl=7.19, wps=5803.9, ups=0.09, wpb=64800, bsz=128, num_updates=3000, lr=9.9984e-05, gnorm=2.3, loss_scale=1, train_wall=11, gb_free=2.8, wall=34529 2021-06-19 04:14:26 | INFO | train_inner | epoch 002: 22 / 3002 loss=2.83, ppl=7.11, wps=5757.6, ups=0.09, wpb=64869, bsz=128, num_updates=3001, lr=9.9984e-05, gnorm=32.828, loss_scale=1, train_wall=11, gb_free=2.8, wall=34540 2021-06-19 04:14:37 | INFO | train_inner | epoch 002: 23 / 3002 loss=2.892, ppl=7.42, wps=5818.4, ups=0.09, wpb=64760, bsz=128, num_updates=3002, lr=9.9984e-05, gnorm=2.287, loss_scale=1, train_wall=11, gb_free=2.8, wall=34551 2021-06-19 04:14:48 | INFO | train_inner | epoch 002: 24 / 3002 loss=2.897, ppl=7.45, wps=5864.3, ups=0.09, wpb=64847, bsz=128, num_updates=3003, lr=9.9984e-05, gnorm=4.044, loss_scale=1, train_wall=11, gb_free=2.8, wall=34562 2021-06-19 04:14:59 | INFO | train_inner | epoch 002: 25 / 3002 loss=2.962, ppl=7.79, wps=5861.1, ups=0.09, wpb=64792, bsz=128, num_updates=3004, lr=9.9984e-05, gnorm=4.734, loss_scale=1, train_wall=11, gb_free=2.8, wall=34573 2021-06-19 04:15:10 | INFO | train_inner | epoch 002: 26 / 3002 loss=2.901, ppl=7.47, wps=5859.2, ups=0.09, wpb=64804, bsz=128, num_updates=3005, lr=9.9984e-05, gnorm=2.297, loss_scale=1, train_wall=11, gb_free=2.8, wall=34584 2021-06-19 04:15:21 | INFO | train_inner | epoch 002: 27 / 3002 loss=2.915, ppl=7.54, wps=5789.7, ups=0.09, wpb=64828, bsz=128, num_updates=3006, lr=9.9984e-05, gnorm=2.304, loss_scale=1, train_wall=11, gb_free=2.8, wall=34596 2021-06-19 04:15:32 | INFO | train_inner | epoch 002: 28 / 3002 loss=3.03, ppl=8.17, wps=5823.7, ups=0.09, wpb=64750, bsz=128, num_updates=3007, lr=9.99839e-05, gnorm=2.33, loss_scale=1, train_wall=11, gb_free=2.8, wall=34607 2021-06-19 04:15:43 | INFO | train_inner | epoch 002: 29 / 3002 loss=2.887, ppl=7.4, wps=5911.2, ups=0.09, wpb=64806, bsz=128, num_updates=3008, lr=9.99839e-05, gnorm=2.325, loss_scale=1, train_wall=11, gb_free=2.8, wall=34618 2021-06-19 04:15:55 | INFO | train_inner | epoch 002: 30 / 3002 loss=2.975, ppl=7.86, wps=5789.1, ups=0.09, wpb=64753, bsz=128, num_updates=3009, lr=9.99839e-05, gnorm=2.356, loss_scale=1, train_wall=11, gb_free=2.8, wall=34629 2021-06-19 04:16:06 | INFO | train_inner | epoch 002: 31 / 3002 loss=2.812, ppl=7.02, wps=5857.4, ups=0.09, wpb=64810, bsz=128, num_updates=3010, lr=9.99839e-05, gnorm=2.481, loss_scale=1, train_wall=11, gb_free=2.8, wall=34640 2021-06-19 04:16:17 | INFO | train_inner | epoch 002: 32 / 3002 loss=2.901, ppl=7.47, wps=5828.2, ups=0.09, wpb=64815, bsz=128, num_updates=3011, lr=9.99839e-05, gnorm=2.336, loss_scale=1, train_wall=11, gb_free=2.8, wall=34651 2021-06-19 04:16:28 | INFO | train_inner | epoch 002: 33 / 3002 loss=2.885, ppl=7.39, wps=5887.3, ups=0.09, wpb=64822, bsz=128, num_updates=3012, lr=9.99839e-05, gnorm=2.508, loss_scale=1, train_wall=11, gb_free=2.8, wall=34662 2021-06-19 04:16:39 | INFO | train_inner | epoch 002: 34 / 3002 loss=2.938, ppl=7.66, wps=5900.6, ups=0.09, wpb=64917, bsz=128, num_updates=3013, lr=9.99839e-05, gnorm=2.502, loss_scale=1, train_wall=11, gb_free=2.8, wall=34673 2021-06-19 04:16:50 | INFO | train_inner | epoch 002: 35 / 3002 loss=2.894, ppl=7.44, wps=5849.6, ups=0.09, wpb=64767, bsz=128, num_updates=3014, lr=9.99839e-05, gnorm=3.59, loss_scale=1, train_wall=11, gb_free=2.8, wall=34684 2021-06-19 04:17:01 | INFO | train_inner | epoch 002: 36 / 3002 loss=2.933, ppl=7.64, wps=5695.8, ups=0.09, wpb=64744, bsz=128, num_updates=3015, lr=9.99839e-05, gnorm=2.459, loss_scale=1, train_wall=11, gb_free=2.8, wall=34696 2021-06-19 04:17:12 | INFO | train_inner | epoch 002: 37 / 3002 loss=2.918, ppl=7.56, wps=5783.9, ups=0.09, wpb=64744, bsz=128, num_updates=3016, lr=9.99839e-05, gnorm=2.448, loss_scale=1, train_wall=11, gb_free=2.8, wall=34707 2021-06-19 04:17:24 | INFO | train_inner | epoch 002: 38 / 3002 loss=2.896, ppl=7.44, wps=5809.3, ups=0.09, wpb=64851, bsz=128, num_updates=3017, lr=9.99839e-05, gnorm=2.537, loss_scale=1, train_wall=11, gb_free=2.8, wall=34718 2021-06-19 04:17:35 | INFO | train_inner | epoch 002: 39 / 3002 loss=2.852, ppl=7.22, wps=5836.9, ups=0.09, wpb=64783, bsz=128, num_updates=3018, lr=9.99839e-05, gnorm=2.313, loss_scale=1, train_wall=11, gb_free=2.8, wall=34729 2021-06-19 04:17:46 | INFO | train_inner | epoch 002: 40 / 3002 loss=2.931, ppl=7.63, wps=5899.1, ups=0.09, wpb=64836, bsz=128, num_updates=3019, lr=9.99838e-05, gnorm=2.31, loss_scale=1, train_wall=11, gb_free=2.8, wall=34740 2021-06-19 04:17:57 | INFO | train_inner | epoch 002: 41 / 3002 loss=2.902, ppl=7.48, wps=5879.1, ups=0.09, wpb=64854, bsz=128, num_updates=3020, lr=9.99838e-05, gnorm=2.386, loss_scale=1, train_wall=11, gb_free=2.8, wall=34751 2021-06-19 04:18:08 | INFO | train_inner | epoch 002: 42 / 3002 loss=2.725, ppl=6.61, wps=5834.3, ups=0.09, wpb=64857, bsz=128, num_updates=3021, lr=9.99838e-05, gnorm=2.333, loss_scale=1, train_wall=11, gb_free=2.8, wall=34762 2021-06-19 04:18:19 | INFO | train_inner | epoch 002: 43 / 3002 loss=2.984, ppl=7.91, wps=5947.6, ups=0.09, wpb=64827, bsz=128, num_updates=3022, lr=9.99838e-05, gnorm=2.425, loss_scale=1, train_wall=10, gb_free=2.8, wall=34773 2021-06-19 04:18:30 | INFO | train_inner | epoch 002: 44 / 3002 loss=3.075, ppl=8.43, wps=5823.8, ups=0.09, wpb=64785, bsz=128, num_updates=3023, lr=9.99838e-05, gnorm=2.49, loss_scale=1, train_wall=11, gb_free=2.8, wall=34784 2021-06-19 04:18:41 | INFO | train_inner | epoch 002: 45 / 3002 loss=2.653, ppl=6.29, wps=5908.6, ups=0.09, wpb=64889, bsz=128, num_updates=3024, lr=9.99838e-05, gnorm=2.271, loss_scale=1, train_wall=11, gb_free=2.8, wall=34795 2021-06-19 04:18:52 | INFO | train_inner | epoch 002: 46 / 3002 loss=2.902, ppl=7.47, wps=5815, ups=0.09, wpb=64841, bsz=128, num_updates=3025, lr=9.99838e-05, gnorm=2.697, loss_scale=1, train_wall=11, gb_free=2.8, wall=34806 2021-06-19 04:19:03 | INFO | train_inner | epoch 002: 47 / 3002 loss=2.884, ppl=7.38, wps=5796.6, ups=0.09, wpb=64800, bsz=128, num_updates=3026, lr=9.99838e-05, gnorm=2.298, loss_scale=1, train_wall=11, gb_free=2.8, wall=34817 2021-06-19 04:19:14 | INFO | train_inner | epoch 002: 48 / 3002 loss=2.963, ppl=7.79, wps=5777.6, ups=0.09, wpb=64762, bsz=128, num_updates=3027, lr=9.99838e-05, gnorm=2.415, loss_scale=1, train_wall=11, gb_free=2.8, wall=34829 2021-06-19 04:19:26 | INFO | train_inner | epoch 002: 49 / 3002 loss=2.877, ppl=7.35, wps=5743, ups=0.09, wpb=64780, bsz=128, num_updates=3028, lr=9.99838e-05, gnorm=2.31, loss_scale=1, train_wall=11, gb_free=2.8, wall=34840 2021-06-19 04:19:37 | INFO | train_inner | epoch 002: 50 / 3002 loss=2.845, ppl=7.19, wps=5847.3, ups=0.09, wpb=64822, bsz=128, num_updates=3029, lr=9.99838e-05, gnorm=2.356, loss_scale=1, train_wall=11, gb_free=2.8, wall=34851 2021-06-19 04:19:48 | INFO | train_inner | epoch 002: 51 / 3002 loss=2.971, ppl=7.84, wps=5972.2, ups=0.09, wpb=64844, bsz=128, num_updates=3030, lr=9.99838e-05, gnorm=2.366, loss_scale=1, train_wall=10, gb_free=2.8, wall=34862 2021-06-19 04:19:59 | INFO | train_inner | epoch 002: 52 / 3002 loss=2.732, ppl=6.64, wps=5732.3, ups=0.09, wpb=64823, bsz=128, num_updates=3031, lr=9.99838e-05, gnorm=2.368, loss_scale=1, train_wall=11, gb_free=2.8, wall=34873 2021-06-19 04:20:10 | INFO | train_inner | epoch 002: 53 / 3002 loss=2.791, ppl=6.92, wps=5869.4, ups=0.09, wpb=64823, bsz=128, num_updates=3032, lr=9.99837e-05, gnorm=2.431, loss_scale=1, train_wall=11, gb_free=2.8, wall=34884 2021-06-19 04:20:21 | INFO | train_inner | epoch 002: 54 / 3002 loss=2.836, ppl=7.14, wps=5992.6, ups=0.09, wpb=64839, bsz=128, num_updates=3033, lr=9.99837e-05, gnorm=2.355, loss_scale=1, train_wall=10, gb_free=2.8, wall=34895 2021-06-19 04:20:32 | INFO | train_inner | epoch 002: 55 / 3002 loss=2.993, ppl=7.96, wps=5830.3, ups=0.09, wpb=64845, bsz=128, num_updates=3034, lr=9.99837e-05, gnorm=2.448, loss_scale=1, train_wall=11, gb_free=2.8, wall=34906 2021-06-19 04:20:43 | INFO | train_inner | epoch 002: 56 / 3002 loss=2.694, ppl=6.47, wps=5901.9, ups=0.09, wpb=64885, bsz=128, num_updates=3035, lr=9.99837e-05, gnorm=2.324, loss_scale=1, train_wall=11, gb_free=2.8, wall=34917 2021-06-19 04:20:54 | INFO | train_inner | epoch 002: 57 / 3002 loss=2.998, ppl=7.99, wps=5861.6, ups=0.09, wpb=64872, bsz=128, num_updates=3036, lr=9.99837e-05, gnorm=2.514, loss_scale=1, train_wall=11, gb_free=2.8, wall=34928 2021-06-19 04:21:05 | INFO | train_inner | epoch 002: 58 / 3002 loss=2.893, ppl=7.43, wps=5799.9, ups=0.09, wpb=64827, bsz=128, num_updates=3037, lr=9.99837e-05, gnorm=2.374, loss_scale=1, train_wall=11, gb_free=2.8, wall=34939 2021-06-19 04:21:16 | INFO | train_inner | epoch 002: 59 / 3002 loss=2.894, ppl=7.44, wps=5916.1, ups=0.09, wpb=64880, bsz=128, num_updates=3038, lr=9.99837e-05, gnorm=2.426, loss_scale=1, train_wall=11, gb_free=2.8, wall=34950 2021-06-19 04:21:27 | INFO | train_inner | epoch 002: 60 / 3002 loss=2.891, ppl=7.42, wps=5771.6, ups=0.09, wpb=64805, bsz=128, num_updates=3039, lr=9.99837e-05, gnorm=5.77, loss_scale=1, train_wall=11, gb_free=2.8, wall=34962 2021-06-19 04:21:39 | INFO | train_inner | epoch 002: 61 / 3002 loss=2.796, ppl=6.95, wps=5711.5, ups=0.09, wpb=64788, bsz=128, num_updates=3040, lr=9.99837e-05, gnorm=2.794, loss_scale=1, train_wall=11, gb_free=2.8, wall=34973 2021-06-19 04:21:50 | INFO | train_inner | epoch 002: 62 / 3002 loss=2.837, ppl=7.14, wps=5877.8, ups=0.09, wpb=64909, bsz=128, num_updates=3041, lr=9.99837e-05, gnorm=2.339, loss_scale=1, train_wall=11, gb_free=2.8, wall=34984 2021-06-19 04:22:01 | INFO | train_inner | epoch 002: 63 / 3002 loss=2.846, ppl=7.19, wps=5895.1, ups=0.09, wpb=64803, bsz=128, num_updates=3042, lr=9.99837e-05, gnorm=2.389, loss_scale=1, train_wall=11, gb_free=2.8, wall=34995 2021-06-19 04:22:12 | INFO | train_inner | epoch 002: 64 / 3002 loss=2.945, ppl=7.7, wps=5789, ups=0.09, wpb=64783, bsz=128, num_updates=3043, lr=9.99837e-05, gnorm=3.268, loss_scale=2, train_wall=11, gb_free=2.8, wall=35006 2021-06-19 04:22:23 | INFO | train_inner | epoch 002: 65 / 3002 loss=2.871, ppl=7.32, wps=5769.2, ups=0.09, wpb=64703, bsz=128, num_updates=3044, lr=9.99836e-05, gnorm=2.356, loss_scale=2, train_wall=11, gb_free=2.8, wall=35017 2021-06-19 04:22:34 | INFO | train_inner | epoch 002: 66 / 3002 loss=2.829, ppl=7.11, wps=5842.8, ups=0.09, wpb=64804, bsz=128, num_updates=3045, lr=9.99836e-05, gnorm=3.084, loss_scale=2, train_wall=11, gb_free=2.8, wall=35028 2021-06-19 04:22:45 | INFO | train_inner | epoch 002: 67 / 3002 loss=2.825, ppl=7.09, wps=5887.7, ups=0.09, wpb=64792, bsz=128, num_updates=3046, lr=9.99836e-05, gnorm=2.379, loss_scale=2, train_wall=11, gb_free=2.8, wall=35039 2021-06-19 04:22:56 | INFO | train_inner | epoch 002: 68 / 3002 loss=2.964, ppl=7.8, wps=5955.7, ups=0.09, wpb=64749, bsz=128, num_updates=3047, lr=9.99836e-05, gnorm=2.602, loss_scale=2, train_wall=10, gb_free=2.8, wall=35050 2021-06-19 04:23:07 | INFO | train_inner | epoch 002: 69 / 3002 loss=2.925, ppl=7.59, wps=5856.6, ups=0.09, wpb=64819, bsz=128, num_updates=3048, lr=9.99836e-05, gnorm=5.751, loss_scale=2, train_wall=11, gb_free=2.8, wall=35061 2021-06-19 04:23:18 | INFO | train_inner | epoch 002: 70 / 3002 loss=3.018, ppl=8.1, wps=5762.9, ups=0.09, wpb=64788, bsz=128, num_updates=3049, lr=9.99836e-05, gnorm=2.759, loss_scale=2, train_wall=11, gb_free=2.8, wall=35073 2021-06-19 04:23:29 | INFO | train_inner | epoch 002: 71 / 3002 loss=2.928, ppl=7.61, wps=5882.5, ups=0.09, wpb=64822, bsz=128, num_updates=3050, lr=9.99836e-05, gnorm=2.616, loss_scale=2, train_wall=11, gb_free=2.8, wall=35084 2021-06-19 04:23:40 | INFO | train_inner | epoch 002: 72 / 3002 loss=2.883, ppl=7.38, wps=5939.2, ups=0.09, wpb=64876, bsz=128, num_updates=3051, lr=9.99836e-05, gnorm=4.896, loss_scale=2, train_wall=10, gb_free=2.8, wall=35095 2021-06-19 04:23:52 | INFO | train_inner | epoch 002: 73 / 3002 loss=2.696, ppl=6.48, wps=5752.3, ups=0.09, wpb=64814, bsz=128, num_updates=3052, lr=9.99836e-05, gnorm=2.467, loss_scale=2, train_wall=11, gb_free=2.8, wall=35106 2021-06-19 04:24:03 | INFO | train_inner | epoch 002: 74 / 3002 loss=2.884, ppl=7.38, wps=5804.6, ups=0.09, wpb=64821, bsz=128, num_updates=3053, lr=9.99836e-05, gnorm=6.645, loss_scale=2, train_wall=11, gb_free=2.8, wall=35117 2021-06-19 04:24:14 | INFO | train_inner | epoch 002: 75 / 3002 loss=2.726, ppl=6.62, wps=5825.6, ups=0.09, wpb=64924, bsz=128, num_updates=3054, lr=9.99836e-05, gnorm=2.44, loss_scale=2, train_wall=11, gb_free=2.8, wall=35128 2021-06-19 04:24:25 | INFO | train_inner | epoch 002: 76 / 3002 loss=3.016, ppl=8.09, wps=5951.8, ups=0.09, wpb=64816, bsz=128, num_updates=3055, lr=9.99836e-05, gnorm=2.549, loss_scale=2, train_wall=10, gb_free=2.8, wall=35139 2021-06-19 04:24:36 | INFO | train_inner | epoch 002: 77 / 3002 loss=2.964, ppl=7.8, wps=5903, ups=0.09, wpb=64850, bsz=128, num_updates=3056, lr=9.99836e-05, gnorm=2.657, loss_scale=2, train_wall=11, gb_free=2.8, wall=35150 2021-06-19 04:24:47 | INFO | train_inner | epoch 002: 78 / 3002 loss=2.891, ppl=7.42, wps=5831.5, ups=0.09, wpb=64829, bsz=128, num_updates=3057, lr=9.99835e-05, gnorm=2.482, loss_scale=2, train_wall=11, gb_free=2.8, wall=35161 2021-06-19 04:24:58 | INFO | train_inner | epoch 002: 79 / 3002 loss=2.819, ppl=7.06, wps=5805, ups=0.09, wpb=64855, bsz=128, num_updates=3058, lr=9.99835e-05, gnorm=2.457, loss_scale=2, train_wall=11, gb_free=2.8, wall=35172 2021-06-19 04:25:09 | INFO | train_inner | epoch 002: 80 / 3002 loss=2.873, ppl=7.32, wps=5899.7, ups=0.09, wpb=64835, bsz=128, num_updates=3059, lr=9.99835e-05, gnorm=2.746, loss_scale=2, train_wall=11, gb_free=2.8, wall=35183 2021-06-19 04:25:20 | INFO | train_inner | epoch 002: 81 / 3002 loss=2.79, ppl=6.91, wps=5916.4, ups=0.09, wpb=64825, bsz=128, num_updates=3060, lr=9.99835e-05, gnorm=2.541, loss_scale=2, train_wall=10, gb_free=2.8, wall=35194 2021-06-19 04:25:31 | INFO | train_inner | epoch 002: 82 / 3002 loss=2.872, ppl=7.32, wps=5959.6, ups=0.09, wpb=64894, bsz=128, num_updates=3061, lr=9.99835e-05, gnorm=2.623, loss_scale=2, train_wall=10, gb_free=2.8, wall=35205 2021-06-19 04:25:42 | INFO | train_inner | epoch 002: 83 / 3002 loss=2.834, ppl=7.13, wps=5866.8, ups=0.09, wpb=64848, bsz=128, num_updates=3062, lr=9.99835e-05, gnorm=2.704, loss_scale=2, train_wall=11, gb_free=2.8, wall=35216 2021-06-19 04:25:53 | INFO | train_inner | epoch 002: 84 / 3002 loss=2.75, ppl=6.73, wps=5820.6, ups=0.09, wpb=64784, bsz=128, num_updates=3063, lr=9.99835e-05, gnorm=2.419, loss_scale=2, train_wall=11, gb_free=2.8, wall=35227 2021-06-19 04:26:04 | INFO | train_inner | epoch 002: 85 / 3002 loss=2.86, ppl=7.26, wps=5776.8, ups=0.09, wpb=64818, bsz=128, num_updates=3064, lr=9.99835e-05, gnorm=2.323, loss_scale=2, train_wall=11, gb_free=2.8, wall=35239 2021-06-19 04:26:15 | INFO | train_inner | epoch 002: 86 / 3002 loss=2.72, ppl=6.59, wps=5895.5, ups=0.09, wpb=64901, bsz=128, num_updates=3065, lr=9.99835e-05, gnorm=2.484, loss_scale=2, train_wall=11, gb_free=2.8, wall=35250 2021-06-19 04:26:26 | INFO | train_inner | epoch 002: 87 / 3002 loss=2.996, ppl=7.98, wps=5839.5, ups=0.09, wpb=64836, bsz=128, num_updates=3066, lr=9.99835e-05, gnorm=2.548, loss_scale=2, train_wall=11, gb_free=2.8, wall=35261 2021-06-19 04:26:38 | INFO | train_inner | epoch 002: 88 / 3002 loss=2.862, ppl=7.27, wps=5821, ups=0.09, wpb=64806, bsz=128, num_updates=3067, lr=9.99835e-05, gnorm=2.428, loss_scale=2, train_wall=11, gb_free=2.8, wall=35272 2021-06-19 04:26:48 | INFO | train_inner | epoch 002: 89 / 3002 loss=2.808, ppl=7, wps=5948.9, ups=0.09, wpb=64821, bsz=128, num_updates=3068, lr=9.99835e-05, gnorm=2.386, loss_scale=2, train_wall=10, gb_free=2.8, wall=35283 2021-06-19 04:26:59 | INFO | train_inner | epoch 002: 90 / 3002 loss=2.842, ppl=7.17, wps=5867.3, ups=0.09, wpb=64822, bsz=128, num_updates=3069, lr=9.99834e-05, gnorm=2.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=35294 2021-06-19 04:27:10 | INFO | train_inner | epoch 002: 91 / 3002 loss=3.049, ppl=8.28, wps=5922, ups=0.09, wpb=64831, bsz=128, num_updates=3070, lr=9.99834e-05, gnorm=2.444, loss_scale=2, train_wall=10, gb_free=2.8, wall=35305 2021-06-19 04:27:21 | INFO | train_inner | epoch 002: 92 / 3002 loss=2.821, ppl=7.06, wps=5924.3, ups=0.09, wpb=64851, bsz=128, num_updates=3071, lr=9.99834e-05, gnorm=2.288, loss_scale=2, train_wall=10, gb_free=2.8, wall=35316 2021-06-19 04:27:32 | INFO | train_inner | epoch 002: 93 / 3002 loss=2.872, ppl=7.32, wps=5881.8, ups=0.09, wpb=64823, bsz=128, num_updates=3072, lr=9.99834e-05, gnorm=2.34, loss_scale=2, train_wall=11, gb_free=2.8, wall=35327 2021-06-19 04:27:43 | INFO | train_inner | epoch 002: 94 / 3002 loss=2.933, ppl=7.64, wps=5879.7, ups=0.09, wpb=64853, bsz=128, num_updates=3073, lr=9.99834e-05, gnorm=2.302, loss_scale=2, train_wall=11, gb_free=2.8, wall=35338 2021-06-19 04:27:54 | INFO | train_inner | epoch 002: 95 / 3002 loss=2.822, ppl=7.07, wps=5896.2, ups=0.09, wpb=64892, bsz=128, num_updates=3074, lr=9.99834e-05, gnorm=5.24, loss_scale=2, train_wall=11, gb_free=2.8, wall=35349 2021-06-19 04:28:06 | INFO | train_inner | epoch 002: 96 / 3002 loss=2.836, ppl=7.14, wps=5833, ups=0.09, wpb=64832, bsz=128, num_updates=3075, lr=9.99834e-05, gnorm=2.35, loss_scale=2, train_wall=11, gb_free=2.8, wall=35360 2021-06-19 04:28:16 | INFO | train_inner | epoch 002: 97 / 3002 loss=2.845, ppl=7.19, wps=6038.4, ups=0.09, wpb=64836, bsz=128, num_updates=3076, lr=9.99834e-05, gnorm=6.876, loss_scale=2, train_wall=10, gb_free=2.8, wall=35371 2021-06-19 04:28:27 | INFO | train_inner | epoch 002: 98 / 3002 loss=2.816, ppl=7.04, wps=5963.8, ups=0.09, wpb=64876, bsz=128, num_updates=3077, lr=9.99834e-05, gnorm=2.397, loss_scale=2, train_wall=10, gb_free=2.8, wall=35381 2021-06-19 04:28:38 | INFO | train_inner | epoch 002: 99 / 3002 loss=2.885, ppl=7.39, wps=5736, ups=0.09, wpb=64769, bsz=128, num_updates=3078, lr=9.99834e-05, gnorm=2.284, loss_scale=2, train_wall=11, gb_free=2.8, wall=35393 2021-06-19 04:28:50 | INFO | train_inner | epoch 002: 100 / 3002 loss=3.075, ppl=8.43, wps=5843.9, ups=0.09, wpb=64795, bsz=128, num_updates=3079, lr=9.99834e-05, gnorm=2.499, loss_scale=2, train_wall=11, gb_free=2.8, wall=35404 2021-06-19 04:29:00 | INFO | train_inner | epoch 002: 101 / 3002 loss=2.93, ppl=7.62, wps=5966.9, ups=0.09, wpb=64787, bsz=128, num_updates=3080, lr=9.99834e-05, gnorm=2.469, loss_scale=2, train_wall=10, gb_free=2.8, wall=35415 2021-06-19 04:29:12 | INFO | train_inner | epoch 002: 102 / 3002 loss=2.745, ppl=6.7, wps=5772.6, ups=0.09, wpb=64801, bsz=128, num_updates=3081, lr=9.99834e-05, gnorm=2.432, loss_scale=2, train_wall=11, gb_free=2.8, wall=35426 2021-06-19 04:29:23 | INFO | train_inner | epoch 002: 103 / 3002 loss=2.877, ppl=7.35, wps=5784, ups=0.09, wpb=64887, bsz=128, num_updates=3082, lr=9.99833e-05, gnorm=2.481, loss_scale=2, train_wall=11, gb_free=2.8, wall=35437 2021-06-19 04:29:34 | INFO | train_inner | epoch 002: 104 / 3002 loss=2.792, ppl=6.92, wps=5788.6, ups=0.09, wpb=64851, bsz=128, num_updates=3083, lr=9.99833e-05, gnorm=2.329, loss_scale=2, train_wall=11, gb_free=2.8, wall=35448 2021-06-19 04:29:45 | INFO | train_inner | epoch 002: 105 / 3002 loss=2.889, ppl=7.41, wps=5817.2, ups=0.09, wpb=64793, bsz=128, num_updates=3084, lr=9.99833e-05, gnorm=2.307, loss_scale=2, train_wall=11, gb_free=2.8, wall=35459 2021-06-19 04:29:56 | INFO | train_inner | epoch 002: 106 / 3002 loss=2.655, ppl=6.3, wps=5800.8, ups=0.09, wpb=64913, bsz=128, num_updates=3085, lr=9.99833e-05, gnorm=2.383, loss_scale=2, train_wall=11, gb_free=2.8, wall=35471 2021-06-19 04:30:07 | INFO | train_inner | epoch 002: 107 / 3002 loss=2.902, ppl=7.47, wps=5903.6, ups=0.09, wpb=64803, bsz=128, num_updates=3086, lr=9.99833e-05, gnorm=2.557, loss_scale=2, train_wall=11, gb_free=2.8, wall=35482 2021-06-19 04:30:18 | INFO | train_inner | epoch 002: 108 / 3002 loss=2.768, ppl=6.81, wps=5927, ups=0.09, wpb=64847, bsz=128, num_updates=3087, lr=9.99833e-05, gnorm=2.379, loss_scale=2, train_wall=10, gb_free=2.8, wall=35493 2021-06-19 04:30:29 | INFO | train_inner | epoch 002: 109 / 3002 loss=2.814, ppl=7.03, wps=5840.3, ups=0.09, wpb=64818, bsz=128, num_updates=3088, lr=9.99833e-05, gnorm=2.298, loss_scale=2, train_wall=11, gb_free=2.8, wall=35504 2021-06-19 04:30:40 | INFO | train_inner | epoch 002: 110 / 3002 loss=2.856, ppl=7.24, wps=5886.6, ups=0.09, wpb=64823, bsz=128, num_updates=3089, lr=9.99833e-05, gnorm=2.358, loss_scale=2, train_wall=11, gb_free=2.8, wall=35515 2021-06-19 04:30:51 | INFO | train_inner | epoch 002: 111 / 3002 loss=2.936, ppl=7.65, wps=5828.8, ups=0.09, wpb=64776, bsz=128, num_updates=3090, lr=9.99833e-05, gnorm=2.328, loss_scale=2, train_wall=11, gb_free=2.8, wall=35526 2021-06-19 04:31:03 | INFO | train_inner | epoch 002: 112 / 3002 loss=3.037, ppl=8.21, wps=5855.8, ups=0.09, wpb=64797, bsz=128, num_updates=3091, lr=9.99833e-05, gnorm=2.365, loss_scale=2, train_wall=11, gb_free=2.8, wall=35537 2021-06-19 04:31:14 | INFO | train_inner | epoch 002: 113 / 3002 loss=2.786, ppl=6.9, wps=5880.8, ups=0.09, wpb=64794, bsz=128, num_updates=3092, lr=9.99833e-05, gnorm=17.447, loss_scale=2, train_wall=11, gb_free=2.8, wall=35548 2021-06-19 04:31:25 | INFO | train_inner | epoch 002: 114 / 3002 loss=2.833, ppl=7.12, wps=5767.7, ups=0.09, wpb=64716, bsz=128, num_updates=3093, lr=9.99833e-05, gnorm=2.661, loss_scale=2, train_wall=11, gb_free=2.8, wall=35559 2021-06-19 04:31:36 | INFO | train_inner | epoch 002: 115 / 3002 loss=2.578, ppl=5.97, wps=5923.2, ups=0.09, wpb=64924, bsz=128, num_updates=3094, lr=9.99832e-05, gnorm=2.37, loss_scale=2, train_wall=11, gb_free=2.8, wall=35570 2021-06-19 04:31:47 | INFO | train_inner | epoch 002: 116 / 3002 loss=2.713, ppl=6.56, wps=5896.7, ups=0.09, wpb=64815, bsz=128, num_updates=3095, lr=9.99832e-05, gnorm=2.285, loss_scale=2, train_wall=11, gb_free=2.8, wall=35581 2021-06-19 04:31:58 | INFO | train_inner | epoch 002: 117 / 3002 loss=2.67, ppl=6.36, wps=5857.8, ups=0.09, wpb=64795, bsz=128, num_updates=3096, lr=9.99832e-05, gnorm=2.499, loss_scale=2, train_wall=11, gb_free=2.8, wall=35592 2021-06-19 04:32:09 | INFO | train_inner | epoch 002: 118 / 3002 loss=2.793, ppl=6.93, wps=5940.7, ups=0.09, wpb=64805, bsz=128, num_updates=3097, lr=9.99832e-05, gnorm=2.445, loss_scale=2, train_wall=10, gb_free=2.8, wall=35603 2021-06-19 04:32:20 | INFO | train_inner | epoch 002: 119 / 3002 loss=2.843, ppl=7.17, wps=5903.3, ups=0.09, wpb=64864, bsz=128, num_updates=3098, lr=9.99832e-05, gnorm=2.438, loss_scale=2, train_wall=11, gb_free=2.8, wall=35614 2021-06-19 04:32:31 | INFO | train_inner | epoch 002: 120 / 3002 loss=2.853, ppl=7.22, wps=5814.8, ups=0.09, wpb=64752, bsz=128, num_updates=3099, lr=9.99832e-05, gnorm=2.314, loss_scale=2, train_wall=11, gb_free=2.8, wall=35625 2021-06-19 04:32:42 | INFO | train_inner | epoch 002: 121 / 3002 loss=2.656, ppl=6.3, wps=5815.4, ups=0.09, wpb=64821, bsz=128, num_updates=3100, lr=9.99832e-05, gnorm=2.268, loss_scale=2, train_wall=11, gb_free=2.8, wall=35636 2021-06-19 04:32:53 | INFO | train_inner | epoch 002: 122 / 3002 loss=2.982, ppl=7.9, wps=5903.2, ups=0.09, wpb=64841, bsz=128, num_updates=3101, lr=9.99832e-05, gnorm=2.372, loss_scale=2, train_wall=11, gb_free=2.8, wall=35647 2021-06-19 04:33:04 | INFO | train_inner | epoch 002: 123 / 3002 loss=2.796, ppl=6.95, wps=5819.6, ups=0.09, wpb=64837, bsz=128, num_updates=3102, lr=9.99832e-05, gnorm=2.33, loss_scale=2, train_wall=11, gb_free=2.8, wall=35658 2021-06-19 04:33:15 | INFO | train_inner | epoch 002: 124 / 3002 loss=2.697, ppl=6.48, wps=5926.3, ups=0.09, wpb=64840, bsz=128, num_updates=3103, lr=9.99832e-05, gnorm=3.092, loss_scale=2, train_wall=10, gb_free=2.8, wall=35669 2021-06-19 04:33:26 | INFO | train_inner | epoch 002: 125 / 3002 loss=2.872, ppl=7.32, wps=5727.7, ups=0.09, wpb=64832, bsz=128, num_updates=3104, lr=9.99832e-05, gnorm=2.306, loss_scale=2, train_wall=11, gb_free=2.8, wall=35681 2021-06-19 04:33:38 | INFO | train_inner | epoch 002: 126 / 3002 loss=2.888, ppl=7.4, wps=5756.9, ups=0.09, wpb=64798, bsz=128, num_updates=3105, lr=9.99832e-05, gnorm=2.433, loss_scale=2, train_wall=11, gb_free=2.8, wall=35692 2021-06-19 04:33:49 | INFO | train_inner | epoch 002: 127 / 3002 loss=2.859, ppl=7.26, wps=5812.2, ups=0.09, wpb=64806, bsz=128, num_updates=3106, lr=9.99832e-05, gnorm=2.932, loss_scale=2, train_wall=11, gb_free=2.8, wall=35703 2021-06-19 04:34:00 | INFO | train_inner | epoch 002: 128 / 3002 loss=2.809, ppl=7.01, wps=5918, ups=0.09, wpb=64835, bsz=128, num_updates=3107, lr=9.99831e-05, gnorm=3.282, loss_scale=2, train_wall=10, gb_free=2.8, wall=35714 2021-06-19 04:34:11 | INFO | train_inner | epoch 002: 129 / 3002 loss=2.963, ppl=7.8, wps=5847.5, ups=0.09, wpb=64838, bsz=128, num_updates=3108, lr=9.99831e-05, gnorm=8.76, loss_scale=2, train_wall=11, gb_free=2.8, wall=35725 2021-06-19 04:34:22 | INFO | train_inner | epoch 002: 130 / 3002 loss=2.878, ppl=7.35, wps=5862, ups=0.09, wpb=64894, bsz=128, num_updates=3109, lr=9.99831e-05, gnorm=2.58, loss_scale=2, train_wall=11, gb_free=2.8, wall=35736 2021-06-19 04:34:33 | INFO | train_inner | epoch 002: 131 / 3002 loss=3.012, ppl=8.07, wps=5868, ups=0.09, wpb=64813, bsz=128, num_updates=3110, lr=9.99831e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=35747 2021-06-19 04:34:44 | INFO | train_inner | epoch 002: 132 / 3002 loss=2.764, ppl=6.79, wps=5814.8, ups=0.09, wpb=64875, bsz=128, num_updates=3111, lr=9.99831e-05, gnorm=2.36, loss_scale=2, train_wall=11, gb_free=2.8, wall=35758 2021-06-19 04:34:55 | INFO | train_inner | epoch 002: 133 / 3002 loss=2.799, ppl=6.96, wps=5766.1, ups=0.09, wpb=64830, bsz=128, num_updates=3112, lr=9.99831e-05, gnorm=2.333, loss_scale=2, train_wall=11, gb_free=2.8, wall=35770 2021-06-19 04:35:06 | INFO | train_inner | epoch 002: 134 / 3002 loss=2.687, ppl=6.44, wps=5837, ups=0.09, wpb=64829, bsz=128, num_updates=3113, lr=9.99831e-05, gnorm=2.516, loss_scale=2, train_wall=11, gb_free=2.8, wall=35781 2021-06-19 04:35:18 | INFO | train_inner | epoch 002: 135 / 3002 loss=2.871, ppl=7.32, wps=5858.1, ups=0.09, wpb=64874, bsz=128, num_updates=3114, lr=9.99831e-05, gnorm=2.413, loss_scale=2, train_wall=11, gb_free=2.8, wall=35792 2021-06-19 04:35:29 | INFO | train_inner | epoch 002: 136 / 3002 loss=2.876, ppl=7.34, wps=5829.9, ups=0.09, wpb=64865, bsz=128, num_updates=3115, lr=9.99831e-05, gnorm=2.474, loss_scale=2, train_wall=11, gb_free=2.8, wall=35803 2021-06-19 04:35:40 | INFO | train_inner | epoch 002: 137 / 3002 loss=2.804, ppl=6.98, wps=5915, ups=0.09, wpb=64790, bsz=128, num_updates=3116, lr=9.99831e-05, gnorm=2.383, loss_scale=2, train_wall=11, gb_free=2.8, wall=35814 2021-06-19 04:35:51 | INFO | train_inner | epoch 002: 138 / 3002 loss=2.966, ppl=7.81, wps=5948, ups=0.09, wpb=64878, bsz=128, num_updates=3117, lr=9.99831e-05, gnorm=2.809, loss_scale=2, train_wall=10, gb_free=2.8, wall=35825 2021-06-19 04:36:01 | INFO | train_inner | epoch 002: 139 / 3002 loss=2.666, ppl=6.35, wps=5985.3, ups=0.09, wpb=64802, bsz=128, num_updates=3118, lr=9.99831e-05, gnorm=4.02, loss_scale=2, train_wall=10, gb_free=2.8, wall=35836 2021-06-19 04:36:13 | INFO | train_inner | epoch 002: 140 / 3002 loss=2.81, ppl=7.01, wps=5776.1, ups=0.09, wpb=64892, bsz=128, num_updates=3119, lr=9.9983e-05, gnorm=2.402, loss_scale=2, train_wall=11, gb_free=2.8, wall=35847 2021-06-19 04:36:23 | INFO | train_inner | epoch 002: 141 / 3002 loss=2.848, ppl=7.2, wps=5943, ups=0.09, wpb=64835, bsz=128, num_updates=3120, lr=9.9983e-05, gnorm=2.442, loss_scale=2, train_wall=10, gb_free=2.8, wall=35858 2021-06-19 04:36:34 | INFO | train_inner | epoch 002: 142 / 3002 loss=2.781, ppl=6.87, wps=5937.4, ups=0.09, wpb=64838, bsz=128, num_updates=3121, lr=9.9983e-05, gnorm=2.861, loss_scale=2, train_wall=10, gb_free=2.8, wall=35869 2021-06-19 04:36:45 | INFO | train_inner | epoch 002: 143 / 3002 loss=2.817, ppl=7.05, wps=5882.4, ups=0.09, wpb=64845, bsz=128, num_updates=3122, lr=9.9983e-05, gnorm=18.499, loss_scale=2, train_wall=11, gb_free=2.8, wall=35880 2021-06-19 04:36:56 | INFO | train_inner | epoch 002: 144 / 3002 loss=2.65, ppl=6.28, wps=5861.5, ups=0.09, wpb=64853, bsz=128, num_updates=3123, lr=9.9983e-05, gnorm=2.316, loss_scale=2, train_wall=11, gb_free=2.8, wall=35891 2021-06-19 04:37:08 | INFO | train_inner | epoch 002: 145 / 3002 loss=2.84, ppl=7.16, wps=5869.5, ups=0.09, wpb=64806, bsz=128, num_updates=3124, lr=9.9983e-05, gnorm=2.436, loss_scale=2, train_wall=11, gb_free=2.8, wall=35902 2021-06-19 04:37:18 | INFO | train_inner | epoch 002: 146 / 3002 loss=2.758, ppl=6.76, wps=5942.9, ups=0.09, wpb=64885, bsz=128, num_updates=3125, lr=9.9983e-05, gnorm=2.435, loss_scale=2, train_wall=10, gb_free=2.8, wall=35913 2021-06-19 04:37:30 | INFO | train_inner | epoch 002: 147 / 3002 loss=2.941, ppl=7.68, wps=5848.3, ups=0.09, wpb=64831, bsz=128, num_updates=3126, lr=9.9983e-05, gnorm=10.465, loss_scale=2, train_wall=11, gb_free=2.8, wall=35924 2021-06-19 04:37:41 | INFO | train_inner | epoch 002: 148 / 3002 loss=2.911, ppl=7.52, wps=5853.3, ups=0.09, wpb=64809, bsz=128, num_updates=3127, lr=9.9983e-05, gnorm=3.173, loss_scale=2, train_wall=11, gb_free=2.8, wall=35935 2021-06-19 04:37:52 | INFO | train_inner | epoch 002: 149 / 3002 loss=2.946, ppl=7.7, wps=5733.3, ups=0.09, wpb=64793, bsz=128, num_updates=3128, lr=9.9983e-05, gnorm=3.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=35946 2021-06-19 04:38:03 | INFO | train_inner | epoch 002: 150 / 3002 loss=2.815, ppl=7.04, wps=5813.3, ups=0.09, wpb=64861, bsz=128, num_updates=3129, lr=9.9983e-05, gnorm=3.696, loss_scale=2, train_wall=11, gb_free=2.8, wall=35957 2021-06-19 04:38:14 | INFO | train_inner | epoch 002: 151 / 3002 loss=2.829, ppl=7.1, wps=5809.6, ups=0.09, wpb=64907, bsz=128, num_updates=3130, lr=9.9983e-05, gnorm=3.061, loss_scale=2, train_wall=11, gb_free=2.8, wall=35969 2021-06-19 04:38:25 | INFO | train_inner | epoch 002: 152 / 3002 loss=2.928, ppl=7.61, wps=5875.9, ups=0.09, wpb=64807, bsz=128, num_updates=3131, lr=9.9983e-05, gnorm=2.945, loss_scale=2, train_wall=11, gb_free=2.8, wall=35980 2021-06-19 04:38:36 | INFO | train_inner | epoch 002: 153 / 3002 loss=2.794, ppl=6.93, wps=5966.3, ups=0.09, wpb=64860, bsz=128, num_updates=3132, lr=9.99829e-05, gnorm=2.924, loss_scale=2, train_wall=10, gb_free=2.8, wall=35990 2021-06-19 04:38:47 | INFO | train_inner | epoch 002: 154 / 3002 loss=2.959, ppl=7.78, wps=5842.4, ups=0.09, wpb=64898, bsz=128, num_updates=3133, lr=9.99829e-05, gnorm=2.844, loss_scale=2, train_wall=11, gb_free=2.8, wall=36002 2021-06-19 04:38:58 | INFO | train_inner | epoch 002: 155 / 3002 loss=2.916, ppl=7.55, wps=5842.5, ups=0.09, wpb=64809, bsz=128, num_updates=3134, lr=9.99829e-05, gnorm=2.834, loss_scale=2, train_wall=11, gb_free=2.8, wall=36013 2021-06-19 04:39:09 | INFO | train_inner | epoch 002: 156 / 3002 loss=2.695, ppl=6.48, wps=5855.8, ups=0.09, wpb=64780, bsz=128, num_updates=3135, lr=9.99829e-05, gnorm=2.879, loss_scale=2, train_wall=11, gb_free=2.8, wall=36024 2021-06-19 04:39:21 | INFO | train_inner | epoch 002: 157 / 3002 loss=2.907, ppl=7.5, wps=5760.5, ups=0.09, wpb=64797, bsz=128, num_updates=3136, lr=9.99829e-05, gnorm=2.818, loss_scale=2, train_wall=11, gb_free=2.8, wall=36035 2021-06-19 04:39:32 | INFO | train_inner | epoch 002: 158 / 3002 loss=3.055, ppl=8.31, wps=5810.7, ups=0.09, wpb=64762, bsz=128, num_updates=3137, lr=9.99829e-05, gnorm=2.812, loss_scale=2, train_wall=11, gb_free=2.8, wall=36046 2021-06-19 04:39:43 | INFO | train_inner | epoch 002: 159 / 3002 loss=3.004, ppl=8.02, wps=5836.6, ups=0.09, wpb=64853, bsz=128, num_updates=3138, lr=9.99829e-05, gnorm=2.659, loss_scale=2, train_wall=11, gb_free=2.8, wall=36057 2021-06-19 04:39:54 | INFO | train_inner | epoch 002: 160 / 3002 loss=2.803, ppl=6.98, wps=5748.7, ups=0.09, wpb=64854, bsz=128, num_updates=3139, lr=9.99829e-05, gnorm=3.476, loss_scale=2, train_wall=11, gb_free=2.8, wall=36069 2021-06-19 04:40:05 | INFO | train_inner | epoch 002: 161 / 3002 loss=2.796, ppl=6.94, wps=5860.3, ups=0.09, wpb=64822, bsz=128, num_updates=3140, lr=9.99829e-05, gnorm=2.478, loss_scale=2, train_wall=11, gb_free=2.8, wall=36080 2021-06-19 04:40:16 | INFO | train_inner | epoch 002: 162 / 3002 loss=2.888, ppl=7.4, wps=5783, ups=0.09, wpb=64883, bsz=128, num_updates=3141, lr=9.99829e-05, gnorm=2.576, loss_scale=2, train_wall=11, gb_free=2.8, wall=36091 2021-06-19 04:40:28 | INFO | train_inner | epoch 002: 163 / 3002 loss=2.874, ppl=7.33, wps=5785.6, ups=0.09, wpb=64815, bsz=128, num_updates=3142, lr=9.99829e-05, gnorm=6.894, loss_scale=2, train_wall=11, gb_free=2.8, wall=36102 2021-06-19 04:40:39 | INFO | train_inner | epoch 002: 164 / 3002 loss=2.784, ppl=6.89, wps=5841.9, ups=0.09, wpb=64832, bsz=128, num_updates=3143, lr=9.99829e-05, gnorm=2.645, loss_scale=2, train_wall=11, gb_free=2.8, wall=36113 2021-06-19 04:40:50 | INFO | train_inner | epoch 002: 165 / 3002 loss=2.971, ppl=7.84, wps=5963, ups=0.09, wpb=64867, bsz=128, num_updates=3144, lr=9.99828e-05, gnorm=2.581, loss_scale=2, train_wall=10, gb_free=2.8, wall=36124 2021-06-19 04:41:01 | INFO | train_inner | epoch 002: 166 / 3002 loss=2.961, ppl=7.79, wps=5877.5, ups=0.09, wpb=64838, bsz=128, num_updates=3145, lr=9.99828e-05, gnorm=2.491, loss_scale=2, train_wall=11, gb_free=2.8, wall=36135 2021-06-19 04:41:12 | INFO | train_inner | epoch 002: 167 / 3002 loss=2.881, ppl=7.36, wps=5817.6, ups=0.09, wpb=64852, bsz=128, num_updates=3146, lr=9.99828e-05, gnorm=2.777, loss_scale=2, train_wall=11, gb_free=2.8, wall=36146 2021-06-19 04:41:23 | INFO | train_inner | epoch 002: 168 / 3002 loss=2.743, ppl=6.69, wps=5770.5, ups=0.09, wpb=64885, bsz=128, num_updates=3147, lr=9.99828e-05, gnorm=2.849, loss_scale=2, train_wall=11, gb_free=2.8, wall=36157 2021-06-19 04:41:34 | INFO | train_inner | epoch 002: 169 / 3002 loss=2.802, ppl=6.98, wps=5920, ups=0.09, wpb=64857, bsz=128, num_updates=3148, lr=9.99828e-05, gnorm=2.351, loss_scale=2, train_wall=11, gb_free=2.8, wall=36168 2021-06-19 04:41:45 | INFO | train_inner | epoch 002: 170 / 3002 loss=3.024, ppl=8.13, wps=5837.5, ups=0.09, wpb=64790, bsz=128, num_updates=3149, lr=9.99828e-05, gnorm=2.713, loss_scale=2, train_wall=11, gb_free=2.8, wall=36179 2021-06-19 04:41:56 | INFO | train_inner | epoch 002: 171 / 3002 loss=2.929, ppl=7.62, wps=5882.3, ups=0.09, wpb=64843, bsz=128, num_updates=3150, lr=9.99828e-05, gnorm=4.364, loss_scale=2, train_wall=11, gb_free=2.8, wall=36191 2021-06-19 04:42:07 | INFO | train_inner | epoch 002: 172 / 3002 loss=3.037, ppl=8.21, wps=5824.2, ups=0.09, wpb=64799, bsz=128, num_updates=3151, lr=9.99828e-05, gnorm=3.016, loss_scale=2, train_wall=11, gb_free=2.8, wall=36202 2021-06-19 04:42:18 | INFO | train_inner | epoch 002: 173 / 3002 loss=2.847, ppl=7.2, wps=5854.5, ups=0.09, wpb=64786, bsz=128, num_updates=3152, lr=9.99828e-05, gnorm=2.386, loss_scale=2, train_wall=11, gb_free=2.8, wall=36213 2021-06-19 04:42:29 | INFO | train_inner | epoch 002: 174 / 3002 loss=2.9, ppl=7.46, wps=5926.8, ups=0.09, wpb=64859, bsz=128, num_updates=3153, lr=9.99828e-05, gnorm=11.081, loss_scale=2, train_wall=11, gb_free=2.8, wall=36224 2021-06-19 04:42:40 | INFO | train_inner | epoch 002: 175 / 3002 loss=2.756, ppl=6.75, wps=5804.3, ups=0.09, wpb=64846, bsz=128, num_updates=3154, lr=9.99828e-05, gnorm=2.363, loss_scale=2, train_wall=11, gb_free=2.8, wall=36235 2021-06-19 04:42:52 | INFO | train_inner | epoch 002: 176 / 3002 loss=2.928, ppl=7.61, wps=5863.8, ups=0.09, wpb=64825, bsz=128, num_updates=3155, lr=9.99828e-05, gnorm=2.766, loss_scale=2, train_wall=11, gb_free=2.8, wall=36246 2021-06-19 04:43:02 | INFO | train_inner | epoch 002: 177 / 3002 loss=2.804, ppl=6.99, wps=6075.1, ups=0.09, wpb=64861, bsz=128, num_updates=3156, lr=9.99828e-05, gnorm=5.703, loss_scale=2, train_wall=10, gb_free=2.8, wall=36257 2021-06-19 04:43:13 | INFO | train_inner | epoch 002: 178 / 3002 loss=2.897, ppl=7.45, wps=5791.6, ups=0.09, wpb=64823, bsz=128, num_updates=3157, lr=9.99827e-05, gnorm=2.418, loss_scale=2, train_wall=11, gb_free=2.8, wall=36268 2021-06-19 04:43:24 | INFO | train_inner | epoch 002: 179 / 3002 loss=2.734, ppl=6.65, wps=5850.7, ups=0.09, wpb=64830, bsz=128, num_updates=3158, lr=9.99827e-05, gnorm=9.348, loss_scale=2, train_wall=11, gb_free=2.8, wall=36279 2021-06-19 04:43:36 | INFO | train_inner | epoch 002: 180 / 3002 loss=2.836, ppl=7.14, wps=5782.3, ups=0.09, wpb=64807, bsz=128, num_updates=3159, lr=9.99827e-05, gnorm=2.507, loss_scale=2, train_wall=11, gb_free=2.8, wall=36290 2021-06-19 04:43:47 | INFO | train_inner | epoch 002: 181 / 3002 loss=2.818, ppl=7.05, wps=5821, ups=0.09, wpb=64836, bsz=128, num_updates=3160, lr=9.99827e-05, gnorm=2.488, loss_scale=2, train_wall=11, gb_free=2.8, wall=36301 2021-06-19 04:43:58 | INFO | train_inner | epoch 002: 182 / 3002 loss=3.038, ppl=8.22, wps=5921.7, ups=0.09, wpb=64836, bsz=128, num_updates=3161, lr=9.99827e-05, gnorm=3.683, loss_scale=2, train_wall=11, gb_free=2.8, wall=36312 2021-06-19 04:44:09 | INFO | train_inner | epoch 002: 183 / 3002 loss=2.942, ppl=7.68, wps=5931.9, ups=0.09, wpb=64863, bsz=128, num_updates=3162, lr=9.99827e-05, gnorm=2.534, loss_scale=2, train_wall=10, gb_free=2.8, wall=36323 2021-06-19 04:44:20 | INFO | train_inner | epoch 002: 184 / 3002 loss=2.77, ppl=6.82, wps=5769.4, ups=0.09, wpb=64802, bsz=128, num_updates=3163, lr=9.99827e-05, gnorm=4.392, loss_scale=2, train_wall=11, gb_free=2.8, wall=36334 2021-06-19 04:44:31 | INFO | train_inner | epoch 002: 185 / 3002 loss=2.974, ppl=7.86, wps=5810, ups=0.09, wpb=64868, bsz=128, num_updates=3164, lr=9.99827e-05, gnorm=2.551, loss_scale=2, train_wall=11, gb_free=2.8, wall=36345 2021-06-19 04:44:42 | INFO | train_inner | epoch 002: 186 / 3002 loss=2.72, ppl=6.59, wps=5982, ups=0.09, wpb=64956, bsz=128, num_updates=3165, lr=9.99827e-05, gnorm=2.547, loss_scale=2, train_wall=10, gb_free=2.8, wall=36356 2021-06-19 04:44:53 | INFO | train_inner | epoch 002: 187 / 3002 loss=2.92, ppl=7.57, wps=5726.1, ups=0.09, wpb=64809, bsz=128, num_updates=3166, lr=9.99827e-05, gnorm=2.971, loss_scale=2, train_wall=11, gb_free=2.8, wall=36368 2021-06-19 04:45:04 | INFO | train_inner | epoch 002: 188 / 3002 loss=2.857, ppl=7.25, wps=5783.8, ups=0.09, wpb=64831, bsz=128, num_updates=3167, lr=9.99827e-05, gnorm=2.683, loss_scale=2, train_wall=11, gb_free=2.8, wall=36379 2021-06-19 04:45:16 | INFO | train_inner | epoch 002: 189 / 3002 loss=2.908, ppl=7.5, wps=5777.9, ups=0.09, wpb=64853, bsz=128, num_updates=3168, lr=9.99827e-05, gnorm=2.609, loss_scale=2, train_wall=11, gb_free=2.8, wall=36390 2021-06-19 04:45:27 | INFO | train_inner | epoch 002: 190 / 3002 loss=2.95, ppl=7.73, wps=5859.1, ups=0.09, wpb=64888, bsz=128, num_updates=3169, lr=9.99826e-05, gnorm=2.514, loss_scale=2, train_wall=11, gb_free=2.8, wall=36401 2021-06-19 04:45:38 | INFO | train_inner | epoch 002: 191 / 3002 loss=2.826, ppl=7.09, wps=5821.6, ups=0.09, wpb=64790, bsz=128, num_updates=3170, lr=9.99826e-05, gnorm=3.2, loss_scale=2, train_wall=11, gb_free=2.8, wall=36412 2021-06-19 04:45:49 | INFO | train_inner | epoch 002: 192 / 3002 loss=2.834, ppl=7.13, wps=5844.1, ups=0.09, wpb=64849, bsz=128, num_updates=3171, lr=9.99826e-05, gnorm=2.711, loss_scale=4, train_wall=11, gb_free=2.8, wall=36423 2021-06-19 04:46:00 | INFO | train_inner | epoch 002: 193 / 3002 loss=2.845, ppl=7.19, wps=5826.2, ups=0.09, wpb=64862, bsz=128, num_updates=3172, lr=9.99826e-05, gnorm=2.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=36434 2021-06-19 04:46:11 | INFO | train_inner | epoch 002: 194 / 3002 loss=2.812, ppl=7.02, wps=5788.8, ups=0.09, wpb=64812, bsz=128, num_updates=3173, lr=9.99826e-05, gnorm=3.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=36446 2021-06-19 04:46:22 | INFO | train_inner | epoch 002: 195 / 3002 loss=2.856, ppl=7.24, wps=5882.8, ups=0.09, wpb=64757, bsz=128, num_updates=3174, lr=9.99826e-05, gnorm=2.842, loss_scale=4, train_wall=11, gb_free=2.8, wall=36457 2021-06-19 04:46:34 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-19 04:46:45 | INFO | train_inner | epoch 002: 197 / 3002 loss=2.866, ppl=7.29, wps=2917.4, ups=0.04, wpb=64853, bsz=128, num_updates=3175, lr=9.99826e-05, gnorm=2.636, loss_scale=2, train_wall=21, gb_free=2.8, wall=36479 2021-06-19 04:46:56 | INFO | train_inner | epoch 002: 198 / 3002 loss=3.001, ppl=8.01, wps=5871.9, ups=0.09, wpb=64755, bsz=128, num_updates=3176, lr=9.99826e-05, gnorm=2.476, loss_scale=2, train_wall=11, gb_free=2.8, wall=36490 2021-06-19 04:47:07 | INFO | train_inner | epoch 002: 199 / 3002 loss=2.868, ppl=7.3, wps=5783.8, ups=0.09, wpb=64804, bsz=128, num_updates=3177, lr=9.99826e-05, gnorm=2.526, loss_scale=2, train_wall=11, gb_free=2.8, wall=36501 2021-06-19 04:47:18 | INFO | train_inner | epoch 002: 200 / 3002 loss=2.82, ppl=7.06, wps=5902.9, ups=0.09, wpb=64881, bsz=128, num_updates=3178, lr=9.99826e-05, gnorm=2.5, loss_scale=2, train_wall=11, gb_free=2.8, wall=36512 2021-06-19 04:47:29 | INFO | train_inner | epoch 002: 201 / 3002 loss=3.124, ppl=8.72, wps=5870.3, ups=0.09, wpb=64838, bsz=128, num_updates=3179, lr=9.99826e-05, gnorm=2.802, loss_scale=2, train_wall=11, gb_free=2.8, wall=36523 2021-06-19 04:47:40 | INFO | train_inner | epoch 002: 202 / 3002 loss=2.855, ppl=7.24, wps=5902.8, ups=0.09, wpb=64848, bsz=128, num_updates=3180, lr=9.99826e-05, gnorm=2.423, loss_scale=2, train_wall=11, gb_free=2.8, wall=36534 2021-06-19 04:47:51 | INFO | train_inner | epoch 002: 203 / 3002 loss=2.765, ppl=6.8, wps=5937.8, ups=0.09, wpb=64856, bsz=128, num_updates=3181, lr=9.99826e-05, gnorm=2.394, loss_scale=2, train_wall=10, gb_free=2.8, wall=36545 2021-06-19 04:48:02 | INFO | train_inner | epoch 002: 204 / 3002 loss=2.832, ppl=7.12, wps=5872.7, ups=0.09, wpb=64831, bsz=128, num_updates=3182, lr=9.99825e-05, gnorm=5.874, loss_scale=2, train_wall=11, gb_free=2.8, wall=36556 2021-06-19 04:48:13 | INFO | train_inner | epoch 002: 205 / 3002 loss=2.764, ppl=6.79, wps=5857, ups=0.09, wpb=64898, bsz=128, num_updates=3183, lr=9.99825e-05, gnorm=2.448, loss_scale=2, train_wall=11, gb_free=2.8, wall=36567 2021-06-19 04:48:24 | INFO | train_inner | epoch 002: 206 / 3002 loss=2.84, ppl=7.16, wps=5837.5, ups=0.09, wpb=64823, bsz=128, num_updates=3184, lr=9.99825e-05, gnorm=3.674, loss_scale=2, train_wall=11, gb_free=2.8, wall=36578 2021-06-19 04:48:35 | INFO | train_inner | epoch 002: 207 / 3002 loss=2.845, ppl=7.19, wps=5939, ups=0.09, wpb=64893, bsz=128, num_updates=3185, lr=9.99825e-05, gnorm=2.366, loss_scale=2, train_wall=10, gb_free=2.8, wall=36589 2021-06-19 04:48:46 | INFO | train_inner | epoch 002: 208 / 3002 loss=2.892, ppl=7.42, wps=5900.8, ups=0.09, wpb=64912, bsz=128, num_updates=3186, lr=9.99825e-05, gnorm=2.479, loss_scale=2, train_wall=11, gb_free=2.8, wall=36600 2021-06-19 04:48:57 | INFO | train_inner | epoch 002: 209 / 3002 loss=2.89, ppl=7.41, wps=5740.5, ups=0.09, wpb=64845, bsz=128, num_updates=3187, lr=9.99825e-05, gnorm=3.482, loss_scale=2, train_wall=11, gb_free=2.8, wall=36612 2021-06-19 04:49:08 | INFO | train_inner | epoch 002: 210 / 3002 loss=2.744, ppl=6.7, wps=5838.5, ups=0.09, wpb=64820, bsz=128, num_updates=3188, lr=9.99825e-05, gnorm=2.414, loss_scale=2, train_wall=11, gb_free=2.8, wall=36623 2021-06-19 04:49:19 | INFO | train_inner | epoch 002: 211 / 3002 loss=2.754, ppl=6.75, wps=5821.3, ups=0.09, wpb=64819, bsz=128, num_updates=3189, lr=9.99825e-05, gnorm=2.361, loss_scale=2, train_wall=11, gb_free=2.8, wall=36634 2021-06-19 04:49:30 | INFO | train_inner | epoch 002: 212 / 3002 loss=2.916, ppl=7.55, wps=5998.6, ups=0.09, wpb=64780, bsz=128, num_updates=3190, lr=9.99825e-05, gnorm=2.417, loss_scale=2, train_wall=10, gb_free=2.8, wall=36645 2021-06-19 04:49:41 | INFO | train_inner | epoch 002: 213 / 3002 loss=2.86, ppl=7.26, wps=5918.4, ups=0.09, wpb=64857, bsz=128, num_updates=3191, lr=9.99825e-05, gnorm=2.643, loss_scale=2, train_wall=11, gb_free=2.8, wall=36656 2021-06-19 04:49:52 | INFO | train_inner | epoch 002: 214 / 3002 loss=2.784, ppl=6.89, wps=5914.2, ups=0.09, wpb=64856, bsz=128, num_updates=3192, lr=9.99825e-05, gnorm=2.665, loss_scale=2, train_wall=11, gb_free=2.8, wall=36667 2021-06-19 04:50:03 | INFO | train_inner | epoch 002: 215 / 3002 loss=2.839, ppl=7.15, wps=5839.3, ups=0.09, wpb=64830, bsz=128, num_updates=3193, lr=9.99825e-05, gnorm=5.391, loss_scale=2, train_wall=11, gb_free=2.8, wall=36678 2021-06-19 04:50:14 | INFO | train_inner | epoch 002: 216 / 3002 loss=2.874, ppl=7.33, wps=5918.7, ups=0.09, wpb=64870, bsz=128, num_updates=3194, lr=9.99824e-05, gnorm=2.647, loss_scale=2, train_wall=10, gb_free=2.8, wall=36689 2021-06-19 04:50:25 | INFO | train_inner | epoch 002: 217 / 3002 loss=2.907, ppl=7.5, wps=5996.2, ups=0.09, wpb=64826, bsz=128, num_updates=3195, lr=9.99824e-05, gnorm=3.309, loss_scale=2, train_wall=10, gb_free=2.8, wall=36699 2021-06-19 04:50:36 | INFO | train_inner | epoch 002: 218 / 3002 loss=2.867, ppl=7.3, wps=5882.8, ups=0.09, wpb=64816, bsz=128, num_updates=3196, lr=9.99824e-05, gnorm=2.52, loss_scale=2, train_wall=11, gb_free=2.8, wall=36710 2021-06-19 04:50:47 | INFO | train_inner | epoch 002: 219 / 3002 loss=2.879, ppl=7.36, wps=5824.6, ups=0.09, wpb=64839, bsz=128, num_updates=3197, lr=9.99824e-05, gnorm=2.475, loss_scale=2, train_wall=11, gb_free=2.8, wall=36722 2021-06-19 04:50:59 | INFO | train_inner | epoch 002: 220 / 3002 loss=2.923, ppl=7.59, wps=5736.1, ups=0.09, wpb=64844, bsz=128, num_updates=3198, lr=9.99824e-05, gnorm=2.429, loss_scale=2, train_wall=11, gb_free=2.8, wall=36733 2021-06-19 04:51:09 | INFO | train_inner | epoch 002: 221 / 3002 loss=2.858, ppl=7.25, wps=5934.5, ups=0.09, wpb=64847, bsz=128, num_updates=3199, lr=9.99824e-05, gnorm=2.423, loss_scale=2, train_wall=10, gb_free=2.8, wall=36744 2021-06-19 04:51:20 | INFO | train_inner | epoch 002: 222 / 3002 loss=2.938, ppl=7.67, wps=5906.6, ups=0.09, wpb=64844, bsz=128, num_updates=3200, lr=9.99824e-05, gnorm=2.552, loss_scale=2, train_wall=11, gb_free=2.8, wall=36755 2021-06-19 04:51:31 | INFO | train_inner | epoch 002: 223 / 3002 loss=2.804, ppl=6.98, wps=5919.6, ups=0.09, wpb=64767, bsz=128, num_updates=3201, lr=9.99824e-05, gnorm=2.481, loss_scale=2, train_wall=10, gb_free=2.8, wall=36766 2021-06-19 04:51:42 | INFO | train_inner | epoch 002: 224 / 3002 loss=2.908, ppl=7.5, wps=5855.8, ups=0.09, wpb=64797, bsz=128, num_updates=3202, lr=9.99824e-05, gnorm=2.599, loss_scale=2, train_wall=11, gb_free=2.8, wall=36777 2021-06-19 04:51:53 | INFO | train_inner | epoch 002: 225 / 3002 loss=2.831, ppl=7.12, wps=5871.8, ups=0.09, wpb=64776, bsz=128, num_updates=3203, lr=9.99824e-05, gnorm=2.407, loss_scale=2, train_wall=11, gb_free=2.8, wall=36788 2021-06-19 04:52:04 | INFO | train_inner | epoch 002: 226 / 3002 loss=2.891, ppl=7.42, wps=5899.4, ups=0.09, wpb=64867, bsz=128, num_updates=3204, lr=9.99824e-05, gnorm=4.171, loss_scale=2, train_wall=11, gb_free=2.8, wall=36799 2021-06-19 04:52:15 | INFO | train_inner | epoch 002: 227 / 3002 loss=2.867, ppl=7.3, wps=5879.5, ups=0.09, wpb=64911, bsz=128, num_updates=3205, lr=9.99824e-05, gnorm=2.435, loss_scale=2, train_wall=11, gb_free=2.8, wall=36810 2021-06-19 04:52:27 | INFO | train_inner | epoch 002: 228 / 3002 loss=2.836, ppl=7.14, wps=5743.2, ups=0.09, wpb=64855, bsz=128, num_updates=3206, lr=9.99824e-05, gnorm=2.961, loss_scale=2, train_wall=11, gb_free=2.8, wall=36821 2021-06-19 04:52:38 | INFO | train_inner | epoch 002: 229 / 3002 loss=2.819, ppl=7.06, wps=5898.6, ups=0.09, wpb=64863, bsz=128, num_updates=3207, lr=9.99823e-05, gnorm=2.435, loss_scale=2, train_wall=11, gb_free=2.8, wall=36832 2021-06-19 04:52:49 | INFO | train_inner | epoch 002: 230 / 3002 loss=2.756, ppl=6.75, wps=5825.1, ups=0.09, wpb=64864, bsz=128, num_updates=3208, lr=9.99823e-05, gnorm=5.445, loss_scale=2, train_wall=11, gb_free=2.8, wall=36843 2021-06-19 04:53:00 | INFO | train_inner | epoch 002: 231 / 3002 loss=2.716, ppl=6.57, wps=5752.3, ups=0.09, wpb=64798, bsz=128, num_updates=3209, lr=9.99823e-05, gnorm=2.469, loss_scale=2, train_wall=11, gb_free=2.8, wall=36855 2021-06-19 04:53:11 | INFO | train_inner | epoch 002: 232 / 3002 loss=2.742, ppl=6.69, wps=5829.7, ups=0.09, wpb=64771, bsz=128, num_updates=3210, lr=9.99823e-05, gnorm=2.291, loss_scale=2, train_wall=11, gb_free=2.8, wall=36866 2021-06-19 04:53:22 | INFO | train_inner | epoch 002: 233 / 3002 loss=3.047, ppl=8.27, wps=6055.7, ups=0.09, wpb=64821, bsz=128, num_updates=3211, lr=9.99823e-05, gnorm=2.336, loss_scale=2, train_wall=10, gb_free=2.8, wall=36876 2021-06-19 04:53:33 | INFO | train_inner | epoch 002: 234 / 3002 loss=2.911, ppl=7.52, wps=5885.2, ups=0.09, wpb=64846, bsz=128, num_updates=3212, lr=9.99823e-05, gnorm=2.5, loss_scale=2, train_wall=11, gb_free=2.8, wall=36887 2021-06-19 04:53:44 | INFO | train_inner | epoch 002: 235 / 3002 loss=2.824, ppl=7.08, wps=5877.8, ups=0.09, wpb=64873, bsz=128, num_updates=3213, lr=9.99823e-05, gnorm=2.482, loss_scale=2, train_wall=11, gb_free=2.8, wall=36898 2021-06-19 04:53:55 | INFO | train_inner | epoch 002: 236 / 3002 loss=2.751, ppl=6.73, wps=5794.4, ups=0.09, wpb=64855, bsz=128, num_updates=3214, lr=9.99823e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=36910 2021-06-19 04:54:06 | INFO | train_inner | epoch 002: 237 / 3002 loss=2.963, ppl=7.8, wps=5818.4, ups=0.09, wpb=64868, bsz=128, num_updates=3215, lr=9.99823e-05, gnorm=2.529, loss_scale=2, train_wall=11, gb_free=2.8, wall=36921 2021-06-19 04:54:17 | INFO | train_inner | epoch 002: 238 / 3002 loss=2.814, ppl=7.03, wps=5835.9, ups=0.09, wpb=64796, bsz=128, num_updates=3216, lr=9.99823e-05, gnorm=2.357, loss_scale=2, train_wall=11, gb_free=2.8, wall=36932 2021-06-19 04:54:29 | INFO | train_inner | epoch 002: 239 / 3002 loss=2.834, ppl=7.13, wps=5782.1, ups=0.09, wpb=64799, bsz=128, num_updates=3217, lr=9.99823e-05, gnorm=2.296, loss_scale=2, train_wall=11, gb_free=2.8, wall=36943 2021-06-19 04:54:40 | INFO | train_inner | epoch 002: 240 / 3002 loss=2.739, ppl=6.68, wps=5890.1, ups=0.09, wpb=64891, bsz=128, num_updates=3218, lr=9.99823e-05, gnorm=2.462, loss_scale=2, train_wall=11, gb_free=2.8, wall=36954 2021-06-19 04:54:51 | INFO | train_inner | epoch 002: 241 / 3002 loss=2.811, ppl=7.02, wps=5889.9, ups=0.09, wpb=64832, bsz=128, num_updates=3219, lr=9.99822e-05, gnorm=2.529, loss_scale=2, train_wall=11, gb_free=2.8, wall=36965 2021-06-19 04:55:02 | INFO | train_inner | epoch 002: 242 / 3002 loss=2.715, ppl=6.57, wps=5733.8, ups=0.09, wpb=64821, bsz=128, num_updates=3220, lr=9.99822e-05, gnorm=2.443, loss_scale=2, train_wall=11, gb_free=2.8, wall=36976 2021-06-19 04:55:13 | INFO | train_inner | epoch 002: 243 / 3002 loss=2.723, ppl=6.6, wps=5785.1, ups=0.09, wpb=64809, bsz=128, num_updates=3221, lr=9.99822e-05, gnorm=2.62, loss_scale=2, train_wall=11, gb_free=2.8, wall=36988 2021-06-19 04:55:24 | INFO | train_inner | epoch 002: 244 / 3002 loss=2.919, ppl=7.56, wps=5834.3, ups=0.09, wpb=64782, bsz=128, num_updates=3222, lr=9.99822e-05, gnorm=2.498, loss_scale=2, train_wall=11, gb_free=2.8, wall=36999 2021-06-19 04:55:35 | INFO | train_inner | epoch 002: 245 / 3002 loss=2.979, ppl=7.89, wps=5872, ups=0.09, wpb=64861, bsz=128, num_updates=3223, lr=9.99822e-05, gnorm=3.515, loss_scale=2, train_wall=11, gb_free=2.8, wall=37010 2021-06-19 04:55:47 | INFO | train_inner | epoch 002: 246 / 3002 loss=2.804, ppl=6.99, wps=5726.4, ups=0.09, wpb=64866, bsz=128, num_updates=3224, lr=9.99822e-05, gnorm=2.775, loss_scale=2, train_wall=11, gb_free=2.8, wall=37021 2021-06-19 04:55:58 | INFO | train_inner | epoch 002: 247 / 3002 loss=2.714, ppl=6.56, wps=5696.1, ups=0.09, wpb=64835, bsz=128, num_updates=3225, lr=9.99822e-05, gnorm=2.415, loss_scale=2, train_wall=11, gb_free=2.8, wall=37032 2021-06-19 04:56:09 | INFO | train_inner | epoch 002: 248 / 3002 loss=2.912, ppl=7.53, wps=5836.2, ups=0.09, wpb=64791, bsz=128, num_updates=3226, lr=9.99822e-05, gnorm=2.559, loss_scale=2, train_wall=11, gb_free=2.8, wall=37044 2021-06-19 04:56:20 | INFO | train_inner | epoch 002: 249 / 3002 loss=2.839, ppl=7.16, wps=5990.7, ups=0.09, wpb=64798, bsz=128, num_updates=3227, lr=9.99822e-05, gnorm=4.429, loss_scale=2, train_wall=10, gb_free=2.8, wall=37054 2021-06-19 04:56:31 | INFO | train_inner | epoch 002: 250 / 3002 loss=2.746, ppl=6.71, wps=5842.2, ups=0.09, wpb=64816, bsz=128, num_updates=3228, lr=9.99822e-05, gnorm=2.389, loss_scale=2, train_wall=11, gb_free=2.8, wall=37065 2021-06-19 04:56:42 | INFO | train_inner | epoch 002: 251 / 3002 loss=2.773, ppl=6.84, wps=5831.1, ups=0.09, wpb=64846, bsz=128, num_updates=3229, lr=9.99822e-05, gnorm=2.363, loss_scale=2, train_wall=11, gb_free=2.8, wall=37077 2021-06-19 04:56:53 | INFO | train_inner | epoch 002: 252 / 3002 loss=2.956, ppl=7.76, wps=5831.1, ups=0.09, wpb=64791, bsz=128, num_updates=3230, lr=9.99822e-05, gnorm=2.574, loss_scale=2, train_wall=11, gb_free=2.8, wall=37088 2021-06-19 04:57:04 | INFO | train_inner | epoch 002: 253 / 3002 loss=2.846, ppl=7.19, wps=5873.6, ups=0.09, wpb=64822, bsz=128, num_updates=3231, lr=9.99822e-05, gnorm=2.838, loss_scale=2, train_wall=11, gb_free=2.8, wall=37099 2021-06-19 04:57:15 | INFO | train_inner | epoch 002: 254 / 3002 loss=2.77, ppl=6.82, wps=5928.5, ups=0.09, wpb=64829, bsz=128, num_updates=3232, lr=9.99821e-05, gnorm=3.255, loss_scale=2, train_wall=10, gb_free=2.8, wall=37110 2021-06-19 04:57:26 | INFO | train_inner | epoch 002: 255 / 3002 loss=2.823, ppl=7.08, wps=5862.1, ups=0.09, wpb=64852, bsz=128, num_updates=3233, lr=9.99821e-05, gnorm=2.676, loss_scale=2, train_wall=11, gb_free=2.8, wall=37121 2021-06-19 04:57:37 | INFO | train_inner | epoch 002: 256 / 3002 loss=2.828, ppl=7.1, wps=5995, ups=0.09, wpb=64852, bsz=128, num_updates=3234, lr=9.99821e-05, gnorm=2.838, loss_scale=2, train_wall=10, gb_free=2.8, wall=37132 2021-06-19 04:57:48 | INFO | train_inner | epoch 002: 257 / 3002 loss=2.907, ppl=7.5, wps=5763.1, ups=0.09, wpb=64898, bsz=128, num_updates=3235, lr=9.99821e-05, gnorm=2.385, loss_scale=2, train_wall=11, gb_free=2.8, wall=37143 2021-06-19 04:58:00 | INFO | train_inner | epoch 002: 258 / 3002 loss=2.674, ppl=6.38, wps=5825.2, ups=0.09, wpb=64894, bsz=128, num_updates=3236, lr=9.99821e-05, gnorm=2.428, loss_scale=2, train_wall=11, gb_free=2.8, wall=37154 2021-06-19 04:58:11 | INFO | train_inner | epoch 002: 259 / 3002 loss=2.775, ppl=6.84, wps=5893.7, ups=0.09, wpb=64825, bsz=128, num_updates=3237, lr=9.99821e-05, gnorm=2.343, loss_scale=2, train_wall=11, gb_free=2.8, wall=37165 2021-06-19 04:58:21 | INFO | train_inner | epoch 002: 260 / 3002 loss=2.677, ppl=6.4, wps=5963.2, ups=0.09, wpb=64896, bsz=128, num_updates=3238, lr=9.99821e-05, gnorm=2.373, loss_scale=2, train_wall=10, gb_free=2.8, wall=37176 2021-06-19 04:58:33 | INFO | train_inner | epoch 002: 261 / 3002 loss=2.659, ppl=6.32, wps=5808.7, ups=0.09, wpb=64816, bsz=128, num_updates=3239, lr=9.99821e-05, gnorm=2.352, loss_scale=2, train_wall=11, gb_free=2.8, wall=37187 2021-06-19 04:58:44 | INFO | train_inner | epoch 002: 262 / 3002 loss=2.87, ppl=7.31, wps=5838.8, ups=0.09, wpb=64746, bsz=128, num_updates=3240, lr=9.99821e-05, gnorm=2.351, loss_scale=2, train_wall=11, gb_free=2.8, wall=37198 2021-06-19 04:58:55 | INFO | train_inner | epoch 002: 263 / 3002 loss=2.911, ppl=7.52, wps=5921.3, ups=0.09, wpb=64826, bsz=128, num_updates=3241, lr=9.99821e-05, gnorm=8.158, loss_scale=2, train_wall=10, gb_free=2.8, wall=37209 2021-06-19 04:59:06 | INFO | train_inner | epoch 002: 264 / 3002 loss=2.959, ppl=7.78, wps=5807.3, ups=0.09, wpb=64762, bsz=128, num_updates=3242, lr=9.99821e-05, gnorm=2.333, loss_scale=2, train_wall=11, gb_free=2.8, wall=37220 2021-06-19 04:59:17 | INFO | train_inner | epoch 002: 265 / 3002 loss=2.968, ppl=7.82, wps=5884.7, ups=0.09, wpb=64809, bsz=128, num_updates=3243, lr=9.99821e-05, gnorm=2.346, loss_scale=2, train_wall=11, gb_free=2.8, wall=37231 2021-06-19 04:59:28 | INFO | train_inner | epoch 002: 266 / 3002 loss=2.881, ppl=7.37, wps=5798.9, ups=0.09, wpb=64859, bsz=128, num_updates=3244, lr=9.9982e-05, gnorm=2.346, loss_scale=2, train_wall=11, gb_free=2.8, wall=37242 2021-06-19 04:59:39 | INFO | train_inner | epoch 002: 267 / 3002 loss=2.857, ppl=7.25, wps=6014.9, ups=0.09, wpb=64831, bsz=128, num_updates=3245, lr=9.9982e-05, gnorm=2.34, loss_scale=2, train_wall=10, gb_free=2.8, wall=37253 2021-06-19 04:59:50 | INFO | train_inner | epoch 002: 268 / 3002 loss=2.944, ppl=7.7, wps=5779.8, ups=0.09, wpb=64874, bsz=128, num_updates=3246, lr=9.9982e-05, gnorm=3.013, loss_scale=2, train_wall=11, gb_free=2.8, wall=37264 2021-06-19 05:00:01 | INFO | train_inner | epoch 002: 269 / 3002 loss=2.775, ppl=6.85, wps=5800.8, ups=0.09, wpb=64741, bsz=128, num_updates=3247, lr=9.9982e-05, gnorm=2.53, loss_scale=2, train_wall=11, gb_free=2.8, wall=37276 2021-06-19 05:00:12 | INFO | train_inner | epoch 002: 270 / 3002 loss=2.812, ppl=7.02, wps=5946.2, ups=0.09, wpb=64838, bsz=128, num_updates=3248, lr=9.9982e-05, gnorm=2.742, loss_scale=2, train_wall=10, gb_free=2.8, wall=37286 2021-06-19 05:00:23 | INFO | train_inner | epoch 002: 271 / 3002 loss=2.862, ppl=7.27, wps=5933.2, ups=0.09, wpb=64852, bsz=128, num_updates=3249, lr=9.9982e-05, gnorm=2.349, loss_scale=2, train_wall=10, gb_free=2.8, wall=37297 2021-06-19 05:00:34 | INFO | train_inner | epoch 002: 272 / 3002 loss=2.714, ppl=6.56, wps=5784.5, ups=0.09, wpb=64825, bsz=128, num_updates=3250, lr=9.9982e-05, gnorm=5.123, loss_scale=2, train_wall=11, gb_free=2.8, wall=37309 2021-06-19 05:00:45 | INFO | train_inner | epoch 002: 273 / 3002 loss=2.839, ppl=7.16, wps=5911.2, ups=0.09, wpb=64885, bsz=128, num_updates=3251, lr=9.9982e-05, gnorm=2.488, loss_scale=2, train_wall=10, gb_free=2.8, wall=37320 2021-06-19 05:00:57 | INFO | train_inner | epoch 002: 274 / 3002 loss=2.905, ppl=7.49, wps=5723.4, ups=0.09, wpb=64780, bsz=128, num_updates=3252, lr=9.9982e-05, gnorm=2.418, loss_scale=2, train_wall=11, gb_free=2.8, wall=37331 2021-06-19 05:01:08 | INFO | train_inner | epoch 002: 275 / 3002 loss=3.041, ppl=8.23, wps=5684.2, ups=0.09, wpb=64822, bsz=128, num_updates=3253, lr=9.9982e-05, gnorm=2.526, loss_scale=2, train_wall=11, gb_free=2.8, wall=37342 2021-06-19 05:01:19 | INFO | train_inner | epoch 002: 276 / 3002 loss=2.69, ppl=6.45, wps=5815.6, ups=0.09, wpb=64791, bsz=128, num_updates=3254, lr=9.9982e-05, gnorm=2.664, loss_scale=2, train_wall=11, gb_free=2.8, wall=37353 2021-06-19 05:01:30 | INFO | train_inner | epoch 002: 277 / 3002 loss=2.772, ppl=6.83, wps=5936, ups=0.09, wpb=64812, bsz=128, num_updates=3255, lr=9.9982e-05, gnorm=3.761, loss_scale=2, train_wall=10, gb_free=2.8, wall=37364 2021-06-19 05:01:41 | INFO | train_inner | epoch 002: 278 / 3002 loss=2.867, ppl=7.3, wps=5918.9, ups=0.09, wpb=64767, bsz=128, num_updates=3256, lr=9.9982e-05, gnorm=5.636, loss_scale=2, train_wall=10, gb_free=2.8, wall=37375 2021-06-19 05:01:52 | INFO | train_inner | epoch 002: 279 / 3002 loss=2.944, ppl=7.69, wps=5885.4, ups=0.09, wpb=64889, bsz=128, num_updates=3257, lr=9.99819e-05, gnorm=2.428, loss_scale=2, train_wall=11, gb_free=2.8, wall=37386 2021-06-19 05:02:03 | INFO | train_inner | epoch 002: 280 / 3002 loss=2.68, ppl=6.41, wps=5805.3, ups=0.09, wpb=64735, bsz=128, num_updates=3258, lr=9.99819e-05, gnorm=2.932, loss_scale=2, train_wall=11, gb_free=2.8, wall=37397 2021-06-19 05:02:14 | INFO | train_inner | epoch 002: 281 / 3002 loss=2.878, ppl=7.35, wps=5856.9, ups=0.09, wpb=64815, bsz=128, num_updates=3259, lr=9.99819e-05, gnorm=2.995, loss_scale=2, train_wall=11, gb_free=2.8, wall=37409 2021-06-19 05:02:25 | INFO | train_inner | epoch 002: 282 / 3002 loss=2.855, ppl=7.23, wps=5949.1, ups=0.09, wpb=64878, bsz=128, num_updates=3260, lr=9.99819e-05, gnorm=3.44, loss_scale=2, train_wall=10, gb_free=2.8, wall=37419 2021-06-19 05:02:36 | INFO | train_inner | epoch 002: 283 / 3002 loss=2.753, ppl=6.74, wps=5716, ups=0.09, wpb=64857, bsz=128, num_updates=3261, lr=9.99819e-05, gnorm=2.98, loss_scale=2, train_wall=11, gb_free=2.8, wall=37431 2021-06-19 05:02:48 | INFO | train_inner | epoch 002: 284 / 3002 loss=2.821, ppl=7.07, wps=5794, ups=0.09, wpb=64356, bsz=128, num_updates=3262, lr=9.99819e-05, gnorm=2.541, loss_scale=2, train_wall=11, gb_free=2.8, wall=37442 2021-06-19 05:02:58 | INFO | train_inner | epoch 002: 285 / 3002 loss=2.787, ppl=6.9, wps=5933.3, ups=0.09, wpb=64830, bsz=128, num_updates=3263, lr=9.99819e-05, gnorm=3.002, loss_scale=2, train_wall=10, gb_free=2.8, wall=37453 2021-06-19 05:03:10 | INFO | train_inner | epoch 002: 286 / 3002 loss=2.75, ppl=6.73, wps=5730.4, ups=0.09, wpb=64846, bsz=128, num_updates=3264, lr=9.99819e-05, gnorm=2.431, loss_scale=2, train_wall=11, gb_free=2.8, wall=37464 2021-06-19 05:03:21 | INFO | train_inner | epoch 002: 287 / 3002 loss=2.848, ppl=7.2, wps=5907.7, ups=0.09, wpb=64922, bsz=128, num_updates=3265, lr=9.99819e-05, gnorm=5.724, loss_scale=2, train_wall=11, gb_free=2.8, wall=37475 2021-06-19 05:03:32 | INFO | train_inner | epoch 002: 288 / 3002 loss=2.933, ppl=7.64, wps=5767.1, ups=0.09, wpb=64852, bsz=128, num_updates=3266, lr=9.99819e-05, gnorm=2.72, loss_scale=2, train_wall=11, gb_free=2.8, wall=37486 2021-06-19 05:03:43 | INFO | train_inner | epoch 002: 289 / 3002 loss=2.892, ppl=7.42, wps=5952.9, ups=0.09, wpb=64871, bsz=128, num_updates=3267, lr=9.99819e-05, gnorm=2.852, loss_scale=2, train_wall=10, gb_free=2.8, wall=37497 2021-06-19 05:03:54 | INFO | train_inner | epoch 002: 290 / 3002 loss=2.769, ppl=6.82, wps=5857, ups=0.09, wpb=64874, bsz=128, num_updates=3268, lr=9.99819e-05, gnorm=3.501, loss_scale=2, train_wall=11, gb_free=2.8, wall=37508 2021-06-19 05:04:05 | INFO | train_inner | epoch 002: 291 / 3002 loss=2.825, ppl=7.09, wps=5873.1, ups=0.09, wpb=64799, bsz=128, num_updates=3269, lr=9.99818e-05, gnorm=2.772, loss_scale=2, train_wall=11, gb_free=2.8, wall=37519 2021-06-19 05:04:16 | INFO | train_inner | epoch 002: 292 / 3002 loss=3.024, ppl=8.13, wps=5702.7, ups=0.09, wpb=64815, bsz=128, num_updates=3270, lr=9.99818e-05, gnorm=2.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=37531 2021-06-19 05:04:28 | INFO | train_inner | epoch 002: 293 / 3002 loss=3.01, ppl=8.05, wps=5806.9, ups=0.09, wpb=64873, bsz=128, num_updates=3271, lr=9.99818e-05, gnorm=2.983, loss_scale=2, train_wall=11, gb_free=2.8, wall=37542 2021-06-19 05:04:39 | INFO | train_inner | epoch 002: 294 / 3002 loss=2.956, ppl=7.76, wps=5855, ups=0.09, wpb=64887, bsz=128, num_updates=3272, lr=9.99818e-05, gnorm=3.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=37553 2021-06-19 05:04:50 | INFO | train_inner | epoch 002: 295 / 3002 loss=2.996, ppl=7.98, wps=5865.1, ups=0.09, wpb=64785, bsz=128, num_updates=3273, lr=9.99818e-05, gnorm=3.17, loss_scale=2, train_wall=11, gb_free=2.8, wall=37564 2021-06-19 05:05:01 | INFO | train_inner | epoch 002: 296 / 3002 loss=2.977, ppl=7.87, wps=5818.1, ups=0.09, wpb=64797, bsz=128, num_updates=3274, lr=9.99818e-05, gnorm=5.354, loss_scale=2, train_wall=11, gb_free=2.8, wall=37575 2021-06-19 05:05:12 | INFO | train_inner | epoch 002: 297 / 3002 loss=2.747, ppl=6.71, wps=5864.6, ups=0.09, wpb=64842, bsz=128, num_updates=3275, lr=9.99818e-05, gnorm=2.429, loss_scale=2, train_wall=11, gb_free=2.8, wall=37586 2021-06-19 05:05:23 | INFO | train_inner | epoch 002: 298 / 3002 loss=2.887, ppl=7.4, wps=5910.5, ups=0.09, wpb=64925, bsz=128, num_updates=3276, lr=9.99818e-05, gnorm=2.779, loss_scale=2, train_wall=11, gb_free=2.8, wall=37597 2021-06-19 05:05:34 | INFO | train_inner | epoch 002: 299 / 3002 loss=2.867, ppl=7.29, wps=5886.7, ups=0.09, wpb=64809, bsz=128, num_updates=3277, lr=9.99818e-05, gnorm=2.962, loss_scale=2, train_wall=11, gb_free=2.8, wall=37608 2021-06-19 05:05:45 | INFO | train_inner | epoch 002: 300 / 3002 loss=2.877, ppl=7.35, wps=5814, ups=0.09, wpb=64789, bsz=128, num_updates=3278, lr=9.99818e-05, gnorm=2.486, loss_scale=2, train_wall=11, gb_free=2.8, wall=37619 2021-06-19 05:05:56 | INFO | train_inner | epoch 002: 301 / 3002 loss=2.816, ppl=7.04, wps=5868.3, ups=0.09, wpb=64804, bsz=128, num_updates=3279, lr=9.99818e-05, gnorm=9.481, loss_scale=2, train_wall=11, gb_free=2.8, wall=37630 2021-06-19 05:06:07 | INFO | train_inner | epoch 002: 302 / 3002 loss=2.807, ppl=7, wps=5802.6, ups=0.09, wpb=64829, bsz=128, num_updates=3280, lr=9.99818e-05, gnorm=2.436, loss_scale=2, train_wall=11, gb_free=2.8, wall=37642 2021-06-19 05:06:18 | INFO | train_inner | epoch 002: 303 / 3002 loss=2.964, ppl=7.8, wps=5835, ups=0.09, wpb=64750, bsz=128, num_updates=3281, lr=9.99818e-05, gnorm=2.66, loss_scale=2, train_wall=11, gb_free=2.8, wall=37653 2021-06-19 05:06:29 | INFO | train_inner | epoch 002: 304 / 3002 loss=2.889, ppl=7.41, wps=5855.2, ups=0.09, wpb=64798, bsz=128, num_updates=3282, lr=9.99817e-05, gnorm=2.466, loss_scale=2, train_wall=11, gb_free=2.8, wall=37664 2021-06-19 05:06:40 | INFO | train_inner | epoch 002: 305 / 3002 loss=2.903, ppl=7.48, wps=5903.4, ups=0.09, wpb=64736, bsz=128, num_updates=3283, lr=9.99817e-05, gnorm=2.58, loss_scale=2, train_wall=11, gb_free=2.8, wall=37675 2021-06-19 05:06:51 | INFO | train_inner | epoch 002: 306 / 3002 loss=2.842, ppl=7.17, wps=5914, ups=0.09, wpb=64813, bsz=128, num_updates=3284, lr=9.99817e-05, gnorm=2.512, loss_scale=2, train_wall=11, gb_free=2.8, wall=37686 2021-06-19 05:07:02 | INFO | train_inner | epoch 002: 307 / 3002 loss=2.863, ppl=7.27, wps=5835.6, ups=0.09, wpb=64855, bsz=128, num_updates=3285, lr=9.99817e-05, gnorm=2.353, loss_scale=2, train_wall=11, gb_free=2.8, wall=37697 2021-06-19 05:07:14 | INFO | train_inner | epoch 002: 308 / 3002 loss=3.113, ppl=8.65, wps=5762.1, ups=0.09, wpb=64717, bsz=128, num_updates=3286, lr=9.99817e-05, gnorm=2.623, loss_scale=2, train_wall=11, gb_free=2.8, wall=37708 2021-06-19 05:07:25 | INFO | train_inner | epoch 002: 309 / 3002 loss=2.933, ppl=7.64, wps=5762.3, ups=0.09, wpb=64711, bsz=128, num_updates=3287, lr=9.99817e-05, gnorm=2.426, loss_scale=2, train_wall=11, gb_free=2.8, wall=37719 2021-06-19 05:07:36 | INFO | train_inner | epoch 002: 310 / 3002 loss=2.795, ppl=6.94, wps=5836.5, ups=0.09, wpb=64882, bsz=128, num_updates=3288, lr=9.99817e-05, gnorm=2.395, loss_scale=2, train_wall=11, gb_free=2.8, wall=37730 2021-06-19 05:07:47 | INFO | train_inner | epoch 002: 311 / 3002 loss=2.93, ppl=7.62, wps=5940.3, ups=0.09, wpb=64832, bsz=128, num_updates=3289, lr=9.99817e-05, gnorm=2.376, loss_scale=2, train_wall=10, gb_free=2.8, wall=37741 2021-06-19 05:07:58 | INFO | train_inner | epoch 002: 312 / 3002 loss=2.928, ppl=7.61, wps=5877.5, ups=0.09, wpb=64814, bsz=128, num_updates=3290, lr=9.99817e-05, gnorm=2.364, loss_scale=2, train_wall=11, gb_free=2.8, wall=37752 2021-06-19 05:08:09 | INFO | train_inner | epoch 002: 313 / 3002 loss=2.967, ppl=7.82, wps=5877.2, ups=0.09, wpb=64795, bsz=128, num_updates=3291, lr=9.99817e-05, gnorm=2.612, loss_scale=2, train_wall=11, gb_free=2.8, wall=37763 2021-06-19 05:08:20 | INFO | train_inner | epoch 002: 314 / 3002 loss=2.812, ppl=7.02, wps=5742, ups=0.09, wpb=64773, bsz=128, num_updates=3292, lr=9.99817e-05, gnorm=2.621, loss_scale=2, train_wall=11, gb_free=2.8, wall=37775 2021-06-19 05:08:31 | INFO | train_inner | epoch 002: 315 / 3002 loss=2.852, ppl=7.22, wps=5897.9, ups=0.09, wpb=64840, bsz=128, num_updates=3293, lr=9.99817e-05, gnorm=2.597, loss_scale=2, train_wall=11, gb_free=2.8, wall=37786 2021-06-19 05:08:42 | INFO | train_inner | epoch 002: 316 / 3002 loss=2.782, ppl=6.88, wps=5786.2, ups=0.09, wpb=64854, bsz=128, num_updates=3294, lr=9.99816e-05, gnorm=2.547, loss_scale=2, train_wall=11, gb_free=2.8, wall=37797 2021-06-19 05:08:54 | INFO | train_inner | epoch 002: 317 / 3002 loss=3.025, ppl=8.14, wps=5877.5, ups=0.09, wpb=64841, bsz=128, num_updates=3295, lr=9.99816e-05, gnorm=2.393, loss_scale=2, train_wall=11, gb_free=2.8, wall=37808 2021-06-19 05:09:05 | INFO | train_inner | epoch 002: 318 / 3002 loss=2.691, ppl=6.46, wps=5817.9, ups=0.09, wpb=64845, bsz=128, num_updates=3296, lr=9.99816e-05, gnorm=3.187, loss_scale=2, train_wall=11, gb_free=2.8, wall=37819 2021-06-19 05:09:16 | INFO | train_inner | epoch 002: 319 / 3002 loss=2.759, ppl=6.77, wps=5742.9, ups=0.09, wpb=64831, bsz=128, num_updates=3297, lr=9.99816e-05, gnorm=2.481, loss_scale=2, train_wall=11, gb_free=2.8, wall=37830 2021-06-19 05:09:27 | INFO | train_inner | epoch 002: 320 / 3002 loss=2.886, ppl=7.39, wps=5835.5, ups=0.09, wpb=64834, bsz=128, num_updates=3298, lr=9.99816e-05, gnorm=2.456, loss_scale=2, train_wall=11, gb_free=2.8, wall=37841 2021-06-19 05:09:38 | INFO | train_inner | epoch 002: 321 / 3002 loss=2.808, ppl=7, wps=5908.5, ups=0.09, wpb=64834, bsz=128, num_updates=3299, lr=9.99816e-05, gnorm=2.716, loss_scale=2, train_wall=10, gb_free=2.8, wall=37852 2021-06-19 05:09:49 | INFO | train_inner | epoch 002: 322 / 3002 loss=2.568, ppl=5.93, wps=5888.2, ups=0.09, wpb=64833, bsz=128, num_updates=3300, lr=9.99816e-05, gnorm=2.572, loss_scale=2, train_wall=11, gb_free=2.8, wall=37863 2021-06-19 05:10:00 | INFO | train_inner | epoch 002: 323 / 3002 loss=2.876, ppl=7.34, wps=5837.5, ups=0.09, wpb=64822, bsz=128, num_updates=3301, lr=9.99816e-05, gnorm=2.44, loss_scale=2, train_wall=11, gb_free=2.8, wall=37874 2021-06-19 05:10:11 | INFO | train_inner | epoch 002: 324 / 3002 loss=2.743, ppl=6.69, wps=5823.1, ups=0.09, wpb=64798, bsz=128, num_updates=3302, lr=9.99816e-05, gnorm=2.409, loss_scale=4, train_wall=11, gb_free=2.8, wall=37886 2021-06-19 05:10:23 | INFO | train_inner | epoch 002: 325 / 3002 loss=2.816, ppl=7.04, wps=5760.2, ups=0.09, wpb=64827, bsz=128, num_updates=3303, lr=9.99816e-05, gnorm=2.376, loss_scale=4, train_wall=11, gb_free=2.8, wall=37897 2021-06-19 05:10:34 | INFO | train_inner | epoch 002: 326 / 3002 loss=2.772, ppl=6.83, wps=5810.2, ups=0.09, wpb=64826, bsz=128, num_updates=3304, lr=9.99816e-05, gnorm=2.865, loss_scale=4, train_wall=11, gb_free=2.8, wall=37908 2021-06-19 05:10:45 | INFO | train_inner | epoch 002: 327 / 3002 loss=2.85, ppl=7.21, wps=5766.1, ups=0.09, wpb=64919, bsz=128, num_updates=3305, lr=9.99816e-05, gnorm=2.377, loss_scale=4, train_wall=11, gb_free=2.8, wall=37919 2021-06-19 05:10:56 | INFO | train_inner | epoch 002: 328 / 3002 loss=2.724, ppl=6.61, wps=5935.1, ups=0.09, wpb=64897, bsz=128, num_updates=3306, lr=9.99816e-05, gnorm=3.409, loss_scale=4, train_wall=10, gb_free=2.8, wall=37930 2021-06-19 05:11:07 | INFO | train_inner | epoch 002: 329 / 3002 loss=2.733, ppl=6.65, wps=6015.9, ups=0.09, wpb=64969, bsz=128, num_updates=3307, lr=9.99815e-05, gnorm=12.542, loss_scale=4, train_wall=10, gb_free=2.8, wall=37941 2021-06-19 05:11:18 | INFO | train_inner | epoch 002: 330 / 3002 loss=2.803, ppl=6.98, wps=5840.9, ups=0.09, wpb=64825, bsz=128, num_updates=3308, lr=9.99815e-05, gnorm=2.312, loss_scale=4, train_wall=11, gb_free=2.8, wall=37952 2021-06-19 05:11:29 | INFO | train_inner | epoch 002: 331 / 3002 loss=2.884, ppl=7.38, wps=5873.7, ups=0.09, wpb=64794, bsz=128, num_updates=3309, lr=9.99815e-05, gnorm=2.399, loss_scale=4, train_wall=11, gb_free=2.8, wall=37963 2021-06-19 05:11:40 | INFO | train_inner | epoch 002: 332 / 3002 loss=2.903, ppl=7.48, wps=5824.6, ups=0.09, wpb=64725, bsz=128, num_updates=3310, lr=9.99815e-05, gnorm=2.529, loss_scale=4, train_wall=11, gb_free=2.8, wall=37974 2021-06-19 05:11:51 | INFO | train_inner | epoch 002: 333 / 3002 loss=2.879, ppl=7.36, wps=5773.3, ups=0.09, wpb=64853, bsz=128, num_updates=3311, lr=9.99815e-05, gnorm=2.509, loss_scale=4, train_wall=11, gb_free=2.8, wall=37985 2021-06-19 05:12:02 | INFO | train_inner | epoch 002: 334 / 3002 loss=2.907, ppl=7.5, wps=5907.7, ups=0.09, wpb=64793, bsz=128, num_updates=3312, lr=9.99815e-05, gnorm=3.113, loss_scale=4, train_wall=11, gb_free=2.8, wall=37996 2021-06-19 05:12:13 | INFO | train_inner | epoch 002: 335 / 3002 loss=2.803, ppl=6.98, wps=5816.9, ups=0.09, wpb=64917, bsz=128, num_updates=3313, lr=9.99815e-05, gnorm=2.493, loss_scale=4, train_wall=11, gb_free=2.8, wall=38008 2021-06-19 05:12:24 | INFO | train_inner | epoch 002: 336 / 3002 loss=2.805, ppl=6.99, wps=5901.9, ups=0.09, wpb=64784, bsz=128, num_updates=3314, lr=9.99815e-05, gnorm=3.498, loss_scale=4, train_wall=11, gb_free=2.8, wall=38019 2021-06-19 05:12:35 | INFO | train_inner | epoch 002: 337 / 3002 loss=2.864, ppl=7.28, wps=5832.6, ups=0.09, wpb=64790, bsz=128, num_updates=3315, lr=9.99815e-05, gnorm=5.389, loss_scale=4, train_wall=11, gb_free=2.8, wall=38030 2021-06-19 05:12:46 | INFO | train_inner | epoch 002: 338 / 3002 loss=2.91, ppl=7.52, wps=5860.8, ups=0.09, wpb=64859, bsz=128, num_updates=3316, lr=9.99815e-05, gnorm=2.3, loss_scale=4, train_wall=11, gb_free=2.8, wall=38041 2021-06-19 05:12:57 | INFO | train_inner | epoch 002: 339 / 3002 loss=2.768, ppl=6.81, wps=5898.5, ups=0.09, wpb=64814, bsz=128, num_updates=3317, lr=9.99815e-05, gnorm=2.925, loss_scale=4, train_wall=11, gb_free=2.8, wall=38052 2021-06-19 05:13:09 | INFO | train_inner | epoch 002: 340 / 3002 loss=2.909, ppl=7.51, wps=5825, ups=0.09, wpb=64888, bsz=128, num_updates=3318, lr=9.99815e-05, gnorm=2.579, loss_scale=4, train_wall=11, gb_free=2.8, wall=38063 2021-06-19 05:13:20 | INFO | train_inner | epoch 002: 341 / 3002 loss=2.877, ppl=7.35, wps=5867.9, ups=0.09, wpb=64888, bsz=128, num_updates=3319, lr=9.99814e-05, gnorm=3.075, loss_scale=4, train_wall=11, gb_free=2.8, wall=38074 2021-06-19 05:13:31 | INFO | train_inner | epoch 002: 342 / 3002 loss=2.79, ppl=6.92, wps=5926, ups=0.09, wpb=64893, bsz=128, num_updates=3320, lr=9.99814e-05, gnorm=2.604, loss_scale=4, train_wall=10, gb_free=2.8, wall=38085 2021-06-19 05:13:42 | INFO | train_inner | epoch 002: 343 / 3002 loss=2.815, ppl=7.04, wps=5855.5, ups=0.09, wpb=64821, bsz=128, num_updates=3321, lr=9.99814e-05, gnorm=2.594, loss_scale=4, train_wall=11, gb_free=2.8, wall=38096 2021-06-19 05:13:53 | INFO | train_inner | epoch 002: 344 / 3002 loss=2.816, ppl=7.04, wps=5819.1, ups=0.09, wpb=64861, bsz=128, num_updates=3322, lr=9.99814e-05, gnorm=2.77, loss_scale=4, train_wall=11, gb_free=2.8, wall=38107 2021-06-19 05:14:04 | INFO | train_inner | epoch 002: 345 / 3002 loss=2.738, ppl=6.67, wps=5896.8, ups=0.09, wpb=64876, bsz=128, num_updates=3323, lr=9.99814e-05, gnorm=2.556, loss_scale=4, train_wall=11, gb_free=2.8, wall=38118 2021-06-19 05:14:15 | INFO | train_inner | epoch 002: 346 / 3002 loss=2.787, ppl=6.9, wps=5978.6, ups=0.09, wpb=64889, bsz=128, num_updates=3324, lr=9.99814e-05, gnorm=6.18, loss_scale=4, train_wall=10, gb_free=2.8, wall=38129 2021-06-19 05:14:26 | INFO | train_inner | epoch 002: 347 / 3002 loss=2.848, ppl=7.2, wps=5786.7, ups=0.09, wpb=64840, bsz=128, num_updates=3325, lr=9.99814e-05, gnorm=3.351, loss_scale=4, train_wall=11, gb_free=2.8, wall=38140 2021-06-19 05:14:37 | INFO | train_inner | epoch 002: 348 / 3002 loss=2.964, ppl=7.81, wps=5904.6, ups=0.09, wpb=64868, bsz=128, num_updates=3326, lr=9.99814e-05, gnorm=2.563, loss_scale=4, train_wall=11, gb_free=2.8, wall=38151 2021-06-19 05:14:48 | INFO | train_inner | epoch 002: 349 / 3002 loss=2.791, ppl=6.92, wps=5724.2, ups=0.09, wpb=64777, bsz=128, num_updates=3327, lr=9.99814e-05, gnorm=2.864, loss_scale=4, train_wall=11, gb_free=2.8, wall=38162 2021-06-19 05:14:59 | INFO | train_inner | epoch 002: 350 / 3002 loss=2.769, ppl=6.82, wps=5760.7, ups=0.09, wpb=64831, bsz=128, num_updates=3328, lr=9.99814e-05, gnorm=3.65, loss_scale=4, train_wall=11, gb_free=2.8, wall=38174 2021-06-19 05:15:11 | INFO | train_inner | epoch 002: 351 / 3002 loss=2.884, ppl=7.38, wps=5795.2, ups=0.09, wpb=64837, bsz=128, num_updates=3329, lr=9.99814e-05, gnorm=2.41, loss_scale=4, train_wall=11, gb_free=2.8, wall=38185 2021-06-19 05:15:22 | INFO | train_inner | epoch 002: 352 / 3002 loss=2.799, ppl=6.96, wps=5871.1, ups=0.09, wpb=64892, bsz=128, num_updates=3330, lr=9.99814e-05, gnorm=2.611, loss_scale=4, train_wall=11, gb_free=2.8, wall=38196 2021-06-19 05:15:33 | INFO | train_inner | epoch 002: 353 / 3002 loss=2.811, ppl=7.02, wps=5822.2, ups=0.09, wpb=64871, bsz=128, num_updates=3331, lr=9.99814e-05, gnorm=2.673, loss_scale=4, train_wall=11, gb_free=2.8, wall=38207 2021-06-19 05:15:44 | INFO | train_inner | epoch 002: 354 / 3002 loss=2.923, ppl=7.58, wps=5935.2, ups=0.09, wpb=64862, bsz=128, num_updates=3332, lr=9.99813e-05, gnorm=2.406, loss_scale=4, train_wall=10, gb_free=2.8, wall=38218 2021-06-19 05:15:55 | INFO | train_inner | epoch 002: 355 / 3002 loss=2.799, ppl=6.96, wps=5817.4, ups=0.09, wpb=64838, bsz=128, num_updates=3333, lr=9.99813e-05, gnorm=2.358, loss_scale=4, train_wall=11, gb_free=2.8, wall=38229 2021-06-19 05:16:06 | INFO | train_inner | epoch 002: 356 / 3002 loss=2.841, ppl=7.17, wps=5922.1, ups=0.09, wpb=64845, bsz=128, num_updates=3334, lr=9.99813e-05, gnorm=3.596, loss_scale=4, train_wall=10, gb_free=2.8, wall=38240 2021-06-19 05:16:17 | INFO | train_inner | epoch 002: 357 / 3002 loss=2.921, ppl=7.57, wps=5870.3, ups=0.09, wpb=64897, bsz=128, num_updates=3335, lr=9.99813e-05, gnorm=2.384, loss_scale=4, train_wall=11, gb_free=2.8, wall=38251 2021-06-19 05:16:28 | INFO | train_inner | epoch 002: 358 / 3002 loss=2.878, ppl=7.35, wps=5930.1, ups=0.09, wpb=64803, bsz=128, num_updates=3336, lr=9.99813e-05, gnorm=3.643, loss_scale=4, train_wall=10, gb_free=2.8, wall=38262 2021-06-19 05:16:39 | INFO | train_inner | epoch 002: 359 / 3002 loss=2.909, ppl=7.51, wps=5862.4, ups=0.09, wpb=64834, bsz=128, num_updates=3337, lr=9.99813e-05, gnorm=5.774, loss_scale=4, train_wall=11, gb_free=2.8, wall=38273 2021-06-19 05:16:50 | INFO | train_inner | epoch 002: 360 / 3002 loss=2.751, ppl=6.73, wps=5872.7, ups=0.09, wpb=64815, bsz=128, num_updates=3338, lr=9.99813e-05, gnorm=3.137, loss_scale=4, train_wall=11, gb_free=2.8, wall=38284 2021-06-19 05:17:01 | INFO | train_inner | epoch 002: 361 / 3002 loss=2.87, ppl=7.31, wps=5856, ups=0.09, wpb=64775, bsz=128, num_updates=3339, lr=9.99813e-05, gnorm=5.31, loss_scale=4, train_wall=11, gb_free=2.8, wall=38295 2021-06-19 05:17:12 | INFO | train_inner | epoch 002: 362 / 3002 loss=2.804, ppl=6.98, wps=5872.9, ups=0.09, wpb=64823, bsz=128, num_updates=3340, lr=9.99813e-05, gnorm=2.378, loss_scale=4, train_wall=11, gb_free=2.8, wall=38306 2021-06-19 05:17:23 | INFO | train_inner | epoch 002: 363 / 3002 loss=2.992, ppl=7.96, wps=5857.2, ups=0.09, wpb=64738, bsz=128, num_updates=3341, lr=9.99813e-05, gnorm=3.367, loss_scale=4, train_wall=11, gb_free=2.8, wall=38317 2021-06-19 05:17:34 | INFO | train_inner | epoch 002: 364 / 3002 loss=2.807, ppl=7, wps=5845.3, ups=0.09, wpb=64862, bsz=128, num_updates=3342, lr=9.99813e-05, gnorm=2.467, loss_scale=4, train_wall=11, gb_free=2.8, wall=38328 2021-06-19 05:17:45 | INFO | train_inner | epoch 002: 365 / 3002 loss=2.828, ppl=7.1, wps=5787.9, ups=0.09, wpb=64799, bsz=128, num_updates=3343, lr=9.99813e-05, gnorm=2.563, loss_scale=4, train_wall=11, gb_free=2.8, wall=38340 2021-06-19 05:17:57 | INFO | train_inner | epoch 002: 366 / 3002 loss=2.984, ppl=7.91, wps=5793.7, ups=0.09, wpb=64843, bsz=128, num_updates=3344, lr=9.99812e-05, gnorm=2.51, loss_scale=4, train_wall=11, gb_free=2.8, wall=38351 2021-06-19 05:18:08 | INFO | train_inner | epoch 002: 367 / 3002 loss=2.875, ppl=7.34, wps=5811.8, ups=0.09, wpb=64821, bsz=128, num_updates=3345, lr=9.99812e-05, gnorm=2.394, loss_scale=4, train_wall=11, gb_free=2.8, wall=38362 2021-06-19 05:18:19 | INFO | train_inner | epoch 002: 368 / 3002 loss=2.901, ppl=7.47, wps=5845.2, ups=0.09, wpb=64801, bsz=128, num_updates=3346, lr=9.99812e-05, gnorm=2.374, loss_scale=4, train_wall=11, gb_free=2.8, wall=38373 2021-06-19 05:18:30 | INFO | train_inner | epoch 002: 369 / 3002 loss=2.836, ppl=7.14, wps=5815.8, ups=0.09, wpb=64847, bsz=128, num_updates=3347, lr=9.99812e-05, gnorm=2.347, loss_scale=4, train_wall=11, gb_free=2.8, wall=38384 2021-06-19 05:18:41 | INFO | train_inner | epoch 002: 370 / 3002 loss=2.905, ppl=7.49, wps=5817.9, ups=0.09, wpb=64830, bsz=128, num_updates=3348, lr=9.99812e-05, gnorm=2.37, loss_scale=4, train_wall=11, gb_free=2.8, wall=38395 2021-06-19 05:18:52 | INFO | train_inner | epoch 002: 371 / 3002 loss=2.936, ppl=7.65, wps=5756.2, ups=0.09, wpb=64841, bsz=128, num_updates=3349, lr=9.99812e-05, gnorm=2.71, loss_scale=4, train_wall=11, gb_free=2.8, wall=38407 2021-06-19 05:19:03 | INFO | train_inner | epoch 002: 372 / 3002 loss=2.814, ppl=7.03, wps=5890.9, ups=0.09, wpb=64872, bsz=128, num_updates=3350, lr=9.99812e-05, gnorm=2.403, loss_scale=4, train_wall=11, gb_free=2.8, wall=38418 2021-06-19 05:19:14 | INFO | train_inner | epoch 002: 373 / 3002 loss=2.914, ppl=7.54, wps=5845.1, ups=0.09, wpb=64786, bsz=128, num_updates=3351, lr=9.99812e-05, gnorm=2.483, loss_scale=4, train_wall=11, gb_free=2.8, wall=38429 2021-06-19 05:19:25 | INFO | train_inner | epoch 002: 374 / 3002 loss=2.627, ppl=6.18, wps=5923.6, ups=0.09, wpb=64883, bsz=128, num_updates=3352, lr=9.99812e-05, gnorm=2.351, loss_scale=4, train_wall=10, gb_free=2.8, wall=38440 2021-06-19 05:19:36 | INFO | train_inner | epoch 002: 375 / 3002 loss=2.898, ppl=7.45, wps=5946.6, ups=0.09, wpb=64846, bsz=128, num_updates=3353, lr=9.99812e-05, gnorm=6.393, loss_scale=4, train_wall=10, gb_free=2.8, wall=38451 2021-06-19 05:19:47 | INFO | train_inner | epoch 002: 376 / 3002 loss=3.132, ppl=8.77, wps=5889.1, ups=0.09, wpb=64842, bsz=128, num_updates=3354, lr=9.99812e-05, gnorm=2.502, loss_scale=4, train_wall=11, gb_free=2.8, wall=38462 2021-06-19 05:19:58 | INFO | train_inner | epoch 002: 377 / 3002 loss=2.913, ppl=7.53, wps=5840.7, ups=0.09, wpb=64813, bsz=128, num_updates=3355, lr=9.99812e-05, gnorm=2.364, loss_scale=4, train_wall=11, gb_free=2.8, wall=38473 2021-06-19 05:20:10 | INFO | train_inner | epoch 002: 378 / 3002 loss=2.819, ppl=7.06, wps=5770.6, ups=0.09, wpb=64729, bsz=128, num_updates=3356, lr=9.99812e-05, gnorm=2.726, loss_scale=4, train_wall=11, gb_free=2.8, wall=38484 2021-06-19 05:20:21 | INFO | train_inner | epoch 002: 379 / 3002 loss=2.954, ppl=7.75, wps=5747.2, ups=0.09, wpb=64729, bsz=128, num_updates=3357, lr=9.99811e-05, gnorm=9.456, loss_scale=4, train_wall=11, gb_free=2.8, wall=38495 2021-06-19 05:20:32 | INFO | train_inner | epoch 002: 380 / 3002 loss=2.625, ppl=6.17, wps=5833.4, ups=0.09, wpb=64914, bsz=128, num_updates=3358, lr=9.99811e-05, gnorm=2.732, loss_scale=4, train_wall=11, gb_free=2.8, wall=38506 2021-06-19 05:20:43 | INFO | train_inner | epoch 002: 381 / 3002 loss=2.854, ppl=7.23, wps=5791, ups=0.09, wpb=64783, bsz=128, num_updates=3359, lr=9.99811e-05, gnorm=3.058, loss_scale=4, train_wall=11, gb_free=2.8, wall=38518 2021-06-19 05:20:54 | INFO | train_inner | epoch 002: 382 / 3002 loss=2.752, ppl=6.74, wps=5855.1, ups=0.09, wpb=64867, bsz=128, num_updates=3360, lr=9.99811e-05, gnorm=2.624, loss_scale=4, train_wall=11, gb_free=2.8, wall=38529 2021-06-19 05:21:05 | INFO | train_inner | epoch 002: 383 / 3002 loss=2.922, ppl=7.58, wps=5867.8, ups=0.09, wpb=64900, bsz=128, num_updates=3361, lr=9.99811e-05, gnorm=2.394, loss_scale=4, train_wall=11, gb_free=2.8, wall=38540 2021-06-19 05:21:17 | INFO | train_inner | epoch 002: 384 / 3002 loss=2.789, ppl=6.91, wps=5792.3, ups=0.09, wpb=64849, bsz=128, num_updates=3362, lr=9.99811e-05, gnorm=12.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=38551 2021-06-19 05:21:28 | INFO | train_inner | epoch 002: 385 / 3002 loss=2.886, ppl=7.39, wps=5767.8, ups=0.09, wpb=64861, bsz=128, num_updates=3363, lr=9.99811e-05, gnorm=2.317, loss_scale=4, train_wall=11, gb_free=2.8, wall=38562 2021-06-19 05:21:39 | INFO | train_inner | epoch 002: 386 / 3002 loss=2.854, ppl=7.23, wps=5682.2, ups=0.09, wpb=64857, bsz=128, num_updates=3364, lr=9.99811e-05, gnorm=2.519, loss_scale=4, train_wall=11, gb_free=2.8, wall=38574 2021-06-19 05:21:50 | INFO | train_inner | epoch 002: 387 / 3002 loss=2.85, ppl=7.21, wps=5773.4, ups=0.09, wpb=64851, bsz=128, num_updates=3365, lr=9.99811e-05, gnorm=2.34, loss_scale=4, train_wall=11, gb_free=2.8, wall=38585 2021-06-19 05:22:02 | INFO | train_inner | epoch 002: 388 / 3002 loss=2.783, ppl=6.88, wps=5844.4, ups=0.09, wpb=64818, bsz=128, num_updates=3366, lr=9.99811e-05, gnorm=2.61, loss_scale=4, train_wall=11, gb_free=2.8, wall=38596 2021-06-19 05:22:13 | INFO | train_inner | epoch 002: 389 / 3002 loss=2.843, ppl=7.18, wps=5772.5, ups=0.09, wpb=64871, bsz=128, num_updates=3367, lr=9.99811e-05, gnorm=2.447, loss_scale=4, train_wall=11, gb_free=2.8, wall=38607 2021-06-19 05:22:24 | INFO | train_inner | epoch 002: 390 / 3002 loss=2.937, ppl=7.66, wps=5787.1, ups=0.09, wpb=64832, bsz=128, num_updates=3368, lr=9.99811e-05, gnorm=2.438, loss_scale=4, train_wall=11, gb_free=2.8, wall=38618 2021-06-19 05:22:35 | INFO | train_inner | epoch 002: 391 / 3002 loss=2.893, ppl=7.43, wps=5739, ups=0.09, wpb=64764, bsz=128, num_updates=3369, lr=9.9981e-05, gnorm=2.66, loss_scale=4, train_wall=11, gb_free=2.8, wall=38630 2021-06-19 05:22:46 | INFO | train_inner | epoch 002: 392 / 3002 loss=3.023, ppl=8.13, wps=5848.6, ups=0.09, wpb=64760, bsz=128, num_updates=3370, lr=9.9981e-05, gnorm=2.491, loss_scale=4, train_wall=11, gb_free=2.8, wall=38641 2021-06-19 05:22:57 | INFO | train_inner | epoch 002: 393 / 3002 loss=2.944, ppl=7.69, wps=5913.4, ups=0.09, wpb=64880, bsz=128, num_updates=3371, lr=9.9981e-05, gnorm=2.477, loss_scale=4, train_wall=10, gb_free=2.8, wall=38652 2021-06-19 05:23:08 | INFO | train_inner | epoch 002: 394 / 3002 loss=2.83, ppl=7.11, wps=5930.5, ups=0.09, wpb=64847, bsz=128, num_updates=3372, lr=9.9981e-05, gnorm=2.401, loss_scale=4, train_wall=10, gb_free=2.8, wall=38663 2021-06-19 05:23:19 | INFO | train_inner | epoch 002: 395 / 3002 loss=2.805, ppl=6.99, wps=5890.3, ups=0.09, wpb=64849, bsz=128, num_updates=3373, lr=9.9981e-05, gnorm=2.846, loss_scale=4, train_wall=11, gb_free=2.8, wall=38674 2021-06-19 05:23:31 | INFO | train_inner | epoch 002: 396 / 3002 loss=2.713, ppl=6.55, wps=5673.6, ups=0.09, wpb=64769, bsz=128, num_updates=3374, lr=9.9981e-05, gnorm=2.477, loss_scale=4, train_wall=11, gb_free=2.8, wall=38685 2021-06-19 05:23:42 | INFO | train_inner | epoch 002: 397 / 3002 loss=2.834, ppl=7.13, wps=5704.5, ups=0.09, wpb=64669, bsz=128, num_updates=3375, lr=9.9981e-05, gnorm=2.462, loss_scale=4, train_wall=11, gb_free=2.8, wall=38696 2021-06-19 05:23:53 | INFO | train_inner | epoch 002: 398 / 3002 loss=2.825, ppl=7.09, wps=5878.4, ups=0.09, wpb=64918, bsz=128, num_updates=3376, lr=9.9981e-05, gnorm=2.569, loss_scale=4, train_wall=11, gb_free=2.8, wall=38707 2021-06-19 05:24:04 | INFO | train_inner | epoch 002: 399 / 3002 loss=2.727, ppl=6.62, wps=5792.7, ups=0.09, wpb=64881, bsz=128, num_updates=3377, lr=9.9981e-05, gnorm=2.427, loss_scale=4, train_wall=11, gb_free=2.8, wall=38719 2021-06-19 05:24:15 | INFO | train_inner | epoch 002: 400 / 3002 loss=2.656, ppl=6.3, wps=5935.3, ups=0.09, wpb=64929, bsz=128, num_updates=3378, lr=9.9981e-05, gnorm=3.606, loss_scale=4, train_wall=10, gb_free=2.8, wall=38730 2021-06-19 05:24:26 | INFO | train_inner | epoch 002: 401 / 3002 loss=2.772, ppl=6.83, wps=5758.7, ups=0.09, wpb=64779, bsz=128, num_updates=3379, lr=9.9981e-05, gnorm=4.599, loss_scale=4, train_wall=11, gb_free=2.8, wall=38741 2021-06-19 05:24:37 | INFO | train_inner | epoch 002: 402 / 3002 loss=2.819, ppl=7.06, wps=5859.6, ups=0.09, wpb=64830, bsz=128, num_updates=3380, lr=9.9981e-05, gnorm=2.288, loss_scale=4, train_wall=11, gb_free=2.8, wall=38752 2021-06-19 05:24:49 | INFO | train_inner | epoch 002: 403 / 3002 loss=2.929, ppl=7.61, wps=5824.7, ups=0.09, wpb=64750, bsz=128, num_updates=3381, lr=9.9981e-05, gnorm=2.677, loss_scale=4, train_wall=11, gb_free=2.8, wall=38763 2021-06-19 05:25:00 | INFO | train_inner | epoch 002: 404 / 3002 loss=2.684, ppl=6.43, wps=5764.7, ups=0.09, wpb=64836, bsz=128, num_updates=3382, lr=9.99809e-05, gnorm=2.561, loss_scale=4, train_wall=11, gb_free=2.8, wall=38774 2021-06-19 05:25:11 | INFO | train_inner | epoch 002: 405 / 3002 loss=2.872, ppl=7.32, wps=5795.3, ups=0.09, wpb=64880, bsz=128, num_updates=3383, lr=9.99809e-05, gnorm=5.759, loss_scale=4, train_wall=11, gb_free=2.8, wall=38785 2021-06-19 05:25:22 | INFO | train_inner | epoch 002: 406 / 3002 loss=2.836, ppl=7.14, wps=5830.1, ups=0.09, wpb=64808, bsz=128, num_updates=3384, lr=9.99809e-05, gnorm=2.501, loss_scale=4, train_wall=11, gb_free=2.8, wall=38796 2021-06-19 05:25:33 | INFO | train_inner | epoch 002: 407 / 3002 loss=2.894, ppl=7.43, wps=5829.4, ups=0.09, wpb=64857, bsz=128, num_updates=3385, lr=9.99809e-05, gnorm=7.51, loss_scale=4, train_wall=11, gb_free=2.8, wall=38808 2021-06-19 05:25:44 | INFO | train_inner | epoch 002: 408 / 3002 loss=2.922, ppl=7.58, wps=5894.5, ups=0.09, wpb=64824, bsz=128, num_updates=3386, lr=9.99809e-05, gnorm=12.126, loss_scale=4, train_wall=11, gb_free=2.8, wall=38819 2021-06-19 05:25:55 | INFO | train_inner | epoch 002: 409 / 3002 loss=2.772, ppl=6.83, wps=5828.8, ups=0.09, wpb=64825, bsz=128, num_updates=3387, lr=9.99809e-05, gnorm=2.293, loss_scale=4, train_wall=11, gb_free=2.8, wall=38830 2021-06-19 05:26:06 | INFO | train_inner | epoch 002: 410 / 3002 loss=2.875, ppl=7.34, wps=5858.1, ups=0.09, wpb=64874, bsz=128, num_updates=3388, lr=9.99809e-05, gnorm=2.719, loss_scale=4, train_wall=11, gb_free=2.8, wall=38841 2021-06-19 05:26:17 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-19 05:26:28 | INFO | train_inner | epoch 002: 412 / 3002 loss=2.924, ppl=7.59, wps=2958.8, ups=0.05, wpb=64889, bsz=128, num_updates=3389, lr=9.99809e-05, gnorm=3.039, loss_scale=2, train_wall=21, gb_free=2.8, wall=38863 2021-06-19 05:26:40 | INFO | train_inner | epoch 002: 413 / 3002 loss=2.859, ppl=7.26, wps=5827, ups=0.09, wpb=64761, bsz=128, num_updates=3390, lr=9.99809e-05, gnorm=2.546, loss_scale=2, train_wall=11, gb_free=2.8, wall=38874 2021-06-19 05:26:51 | INFO | train_inner | epoch 002: 414 / 3002 loss=2.929, ppl=7.62, wps=5783, ups=0.09, wpb=64741, bsz=128, num_updates=3391, lr=9.99809e-05, gnorm=3.452, loss_scale=2, train_wall=11, gb_free=2.8, wall=38885 2021-06-19 05:27:02 | INFO | train_inner | epoch 002: 415 / 3002 loss=2.948, ppl=7.72, wps=5700.5, ups=0.09, wpb=64765, bsz=128, num_updates=3392, lr=9.99809e-05, gnorm=2.485, loss_scale=2, train_wall=11, gb_free=2.8, wall=38896 2021-06-19 05:27:13 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 2021-06-19 05:27:24 | INFO | train_inner | epoch 002: 417 / 3002 loss=2.83, ppl=7.11, wps=2934.1, ups=0.05, wpb=64829, bsz=128, num_updates=3393, lr=9.99809e-05, gnorm=4.484, loss_scale=1, train_wall=21, gb_free=2.8, wall=38919 2021-06-19 05:27:35 | INFO | train_inner | epoch 002: 418 / 3002 loss=2.769, ppl=6.82, wps=5829.1, ups=0.09, wpb=64892, bsz=128, num_updates=3394, lr=9.99808e-05, gnorm=2.725, loss_scale=1, train_wall=11, gb_free=2.8, wall=38930 2021-06-19 05:27:46 | INFO | train_inner | epoch 002: 419 / 3002 loss=3.001, ppl=8.01, wps=5913.2, ups=0.09, wpb=64943, bsz=128, num_updates=3395, lr=9.99808e-05, gnorm=2.407, loss_scale=1, train_wall=11, gb_free=2.8, wall=38941 2021-06-19 05:27:57 | INFO | train_inner | epoch 002: 420 / 3002 loss=2.695, ppl=6.48, wps=5801.2, ups=0.09, wpb=64924, bsz=128, num_updates=3396, lr=9.99808e-05, gnorm=2.403, loss_scale=1, train_wall=11, gb_free=2.8, wall=38952 2021-06-19 05:28:08 | INFO | train_inner | epoch 002: 421 / 3002 loss=2.835, ppl=7.13, wps=5880.9, ups=0.09, wpb=64769, bsz=128, num_updates=3397, lr=9.99808e-05, gnorm=2.422, loss_scale=1, train_wall=11, gb_free=2.8, wall=38963 2021-06-19 05:28:19 | INFO | train_inner | epoch 002: 422 / 3002 loss=2.96, ppl=7.78, wps=5913.3, ups=0.09, wpb=64849, bsz=128, num_updates=3398, lr=9.99808e-05, gnorm=2.405, loss_scale=1, train_wall=10, gb_free=2.8, wall=38974 2021-06-19 05:28:30 | INFO | train_inner | epoch 002: 423 / 3002 loss=2.771, ppl=6.83, wps=5902.2, ups=0.09, wpb=64844, bsz=128, num_updates=3399, lr=9.99808e-05, gnorm=2.365, loss_scale=1, train_wall=11, gb_free=2.8, wall=38985 2021-06-19 05:28:42 | INFO | train_inner | epoch 002: 424 / 3002 loss=2.907, ppl=7.5, wps=5817.3, ups=0.09, wpb=64737, bsz=128, num_updates=3400, lr=9.99808e-05, gnorm=2.498, loss_scale=1, train_wall=11, gb_free=2.8, wall=38996 2021-06-19 05:28:53 | INFO | train_inner | epoch 002: 425 / 3002 loss=2.905, ppl=7.49, wps=5894.1, ups=0.09, wpb=64836, bsz=128, num_updates=3401, lr=9.99808e-05, gnorm=2.337, loss_scale=1, train_wall=11, gb_free=2.8, wall=39007 2021-06-19 05:29:04 | INFO | train_inner | epoch 002: 426 / 3002 loss=2.733, ppl=6.65, wps=5931.7, ups=0.09, wpb=64822, bsz=128, num_updates=3402, lr=9.99808e-05, gnorm=2.377, loss_scale=1, train_wall=10, gb_free=2.8, wall=39018 2021-06-19 05:29:15 | INFO | train_inner | epoch 002: 427 / 3002 loss=2.843, ppl=7.18, wps=5849.8, ups=0.09, wpb=64772, bsz=128, num_updates=3403, lr=9.99808e-05, gnorm=2.538, loss_scale=1, train_wall=11, gb_free=2.8, wall=39029 2021-06-19 05:29:26 | INFO | train_inner | epoch 002: 428 / 3002 loss=2.659, ppl=6.32, wps=5783.3, ups=0.09, wpb=64845, bsz=128, num_updates=3404, lr=9.99808e-05, gnorm=2.436, loss_scale=1, train_wall=11, gb_free=2.8, wall=39040 2021-06-19 05:29:37 | INFO | train_inner | epoch 002: 429 / 3002 loss=2.84, ppl=7.16, wps=5981.5, ups=0.09, wpb=64818, bsz=128, num_updates=3405, lr=9.99808e-05, gnorm=2.765, loss_scale=1, train_wall=10, gb_free=2.8, wall=39051 2021-06-19 05:29:47 | INFO | train_inner | epoch 002: 430 / 3002 loss=2.739, ppl=6.68, wps=6011.6, ups=0.09, wpb=64795, bsz=128, num_updates=3406, lr=9.99808e-05, gnorm=2.51, loss_scale=1, train_wall=10, gb_free=2.8, wall=39062 2021-06-19 05:29:58 | INFO | train_inner | epoch 002: 431 / 3002 loss=2.702, ppl=6.51, wps=5867.5, ups=0.09, wpb=64820, bsz=128, num_updates=3407, lr=9.99807e-05, gnorm=2.445, loss_scale=1, train_wall=11, gb_free=2.8, wall=39073 2021-06-19 05:30:09 | INFO | train_inner | epoch 002: 432 / 3002 loss=2.699, ppl=6.49, wps=5942.5, ups=0.09, wpb=64854, bsz=128, num_updates=3408, lr=9.99807e-05, gnorm=3.444, loss_scale=1, train_wall=10, gb_free=2.8, wall=39084 2021-06-19 05:30:20 | INFO | train_inner | epoch 002: 433 / 3002 loss=2.907, ppl=7.5, wps=5871.2, ups=0.09, wpb=64807, bsz=128, num_updates=3409, lr=9.99807e-05, gnorm=2.309, loss_scale=1, train_wall=11, gb_free=2.8, wall=39095 2021-06-19 05:30:31 | INFO | train_inner | epoch 002: 434 / 3002 loss=2.758, ppl=6.76, wps=5985.5, ups=0.09, wpb=64878, bsz=128, num_updates=3410, lr=9.99807e-05, gnorm=2.801, loss_scale=1, train_wall=10, gb_free=2.8, wall=39106 2021-06-19 05:30:42 | INFO | train_inner | epoch 002: 435 / 3002 loss=2.826, ppl=7.09, wps=5820.2, ups=0.09, wpb=64887, bsz=128, num_updates=3411, lr=9.99807e-05, gnorm=2.283, loss_scale=1, train_wall=11, gb_free=2.8, wall=39117 2021-06-19 05:30:54 | INFO | train_inner | epoch 002: 436 / 3002 loss=2.799, ppl=6.96, wps=5820.7, ups=0.09, wpb=64776, bsz=128, num_updates=3412, lr=9.99807e-05, gnorm=2.234, loss_scale=1, train_wall=11, gb_free=2.8, wall=39128 2021-06-19 05:31:05 | INFO | train_inner | epoch 002: 437 / 3002 loss=2.843, ppl=7.18, wps=5795.5, ups=0.09, wpb=64866, bsz=128, num_updates=3413, lr=9.99807e-05, gnorm=2.303, loss_scale=1, train_wall=11, gb_free=2.8, wall=39139 2021-06-19 05:31:16 | INFO | train_inner | epoch 002: 438 / 3002 loss=2.859, ppl=7.25, wps=5822.6, ups=0.09, wpb=64799, bsz=128, num_updates=3414, lr=9.99807e-05, gnorm=14.64, loss_scale=1, train_wall=11, gb_free=2.8, wall=39150 2021-06-19 05:31:27 | INFO | train_inner | epoch 002: 439 / 3002 loss=2.917, ppl=7.55, wps=5741.2, ups=0.09, wpb=64854, bsz=128, num_updates=3415, lr=9.99807e-05, gnorm=2.633, loss_scale=1, train_wall=11, gb_free=2.8, wall=39161 2021-06-19 05:31:38 | INFO | train_inner | epoch 002: 440 / 3002 loss=2.818, ppl=7.05, wps=5878.2, ups=0.09, wpb=64842, bsz=128, num_updates=3416, lr=9.99807e-05, gnorm=2.356, loss_scale=1, train_wall=11, gb_free=2.8, wall=39173 2021-06-19 05:31:49 | INFO | train_inner | epoch 002: 441 / 3002 loss=2.977, ppl=7.87, wps=5856.4, ups=0.09, wpb=64835, bsz=128, num_updates=3417, lr=9.99807e-05, gnorm=2.471, loss_scale=1, train_wall=11, gb_free=2.8, wall=39184 2021-06-19 05:32:01 | INFO | train_inner | epoch 002: 442 / 3002 loss=2.818, ppl=7.05, wps=5752.4, ups=0.09, wpb=64871, bsz=128, num_updates=3418, lr=9.99807e-05, gnorm=2.404, loss_scale=1, train_wall=11, gb_free=2.8, wall=39195 2021-06-19 05:32:12 | INFO | train_inner | epoch 002: 443 / 3002 loss=2.901, ppl=7.47, wps=5801.5, ups=0.09, wpb=64876, bsz=128, num_updates=3419, lr=9.99806e-05, gnorm=2.663, loss_scale=1, train_wall=11, gb_free=2.8, wall=39206 2021-06-19 05:32:23 | INFO | train_inner | epoch 002: 444 / 3002 loss=2.804, ppl=6.99, wps=5790, ups=0.09, wpb=64781, bsz=128, num_updates=3420, lr=9.99806e-05, gnorm=2.342, loss_scale=1, train_wall=11, gb_free=2.8, wall=39217 2021-06-19 05:32:34 | INFO | train_inner | epoch 002: 445 / 3002 loss=2.752, ppl=6.74, wps=5760.7, ups=0.09, wpb=64854, bsz=128, num_updates=3421, lr=9.99806e-05, gnorm=2.378, loss_scale=1, train_wall=11, gb_free=2.8, wall=39228 2021-06-19 05:32:45 | INFO | train_inner | epoch 002: 446 / 3002 loss=2.874, ppl=7.33, wps=5859.6, ups=0.09, wpb=64783, bsz=128, num_updates=3422, lr=9.99806e-05, gnorm=2.394, loss_scale=1, train_wall=11, gb_free=2.8, wall=39240 2021-06-19 05:32:56 | INFO | train_inner | epoch 002: 447 / 3002 loss=2.745, ppl=6.71, wps=5984.1, ups=0.09, wpb=64787, bsz=128, num_updates=3423, lr=9.99806e-05, gnorm=2.332, loss_scale=1, train_wall=10, gb_free=2.8, wall=39250 2021-06-19 05:33:07 | INFO | train_inner | epoch 002: 448 / 3002 loss=2.835, ppl=7.14, wps=5829.3, ups=0.09, wpb=64843, bsz=128, num_updates=3424, lr=9.99806e-05, gnorm=2.402, loss_scale=1, train_wall=11, gb_free=2.8, wall=39261 2021-06-19 05:33:18 | INFO | train_inner | epoch 002: 449 / 3002 loss=2.759, ppl=6.77, wps=5787.5, ups=0.09, wpb=64816, bsz=128, num_updates=3425, lr=9.99806e-05, gnorm=2.514, loss_scale=1, train_wall=11, gb_free=2.8, wall=39273 2021-06-19 05:33:29 | INFO | train_inner | epoch 002: 450 / 3002 loss=2.767, ppl=6.81, wps=5905, ups=0.09, wpb=64892, bsz=128, num_updates=3426, lr=9.99806e-05, gnorm=2.524, loss_scale=1, train_wall=11, gb_free=2.8, wall=39284 2021-06-19 05:33:40 | INFO | train_inner | epoch 002: 451 / 3002 loss=2.69, ppl=6.45, wps=6039.5, ups=0.09, wpb=64854, bsz=128, num_updates=3427, lr=9.99806e-05, gnorm=2.498, loss_scale=1, train_wall=10, gb_free=2.8, wall=39294 2021-06-19 05:33:51 | INFO | train_inner | epoch 002: 452 / 3002 loss=2.762, ppl=6.79, wps=5864.3, ups=0.09, wpb=64805, bsz=128, num_updates=3428, lr=9.99806e-05, gnorm=2.429, loss_scale=1, train_wall=11, gb_free=2.8, wall=39305 2021-06-19 05:34:02 | INFO | train_inner | epoch 002: 453 / 3002 loss=2.743, ppl=6.69, wps=5920.3, ups=0.09, wpb=64823, bsz=128, num_updates=3429, lr=9.99806e-05, gnorm=2.489, loss_scale=1, train_wall=10, gb_free=2.8, wall=39316 2021-06-19 05:34:13 | INFO | train_inner | epoch 002: 454 / 3002 loss=2.653, ppl=6.29, wps=5777.7, ups=0.09, wpb=64857, bsz=128, num_updates=3430, lr=9.99806e-05, gnorm=2.46, loss_scale=1, train_wall=11, gb_free=2.8, wall=39328 2021-06-19 05:34:24 | INFO | train_inner | epoch 002: 455 / 3002 loss=2.826, ppl=7.09, wps=5937.4, ups=0.09, wpb=64880, bsz=128, num_updates=3431, lr=9.99806e-05, gnorm=2.977, loss_scale=1, train_wall=10, gb_free=2.8, wall=39339 2021-06-19 05:34:35 | INFO | train_inner | epoch 002: 456 / 3002 loss=2.543, ppl=5.83, wps=5854.1, ups=0.09, wpb=64892, bsz=128, num_updates=3432, lr=9.99805e-05, gnorm=3.287, loss_scale=1, train_wall=11, gb_free=2.8, wall=39350 2021-06-19 05:34:47 | INFO | train_inner | epoch 002: 457 / 3002 loss=2.874, ppl=7.33, wps=5779.7, ups=0.09, wpb=64774, bsz=128, num_updates=3433, lr=9.99805e-05, gnorm=2.314, loss_scale=1, train_wall=11, gb_free=2.8, wall=39361 2021-06-19 05:34:57 | INFO | train_inner | epoch 002: 458 / 3002 loss=2.796, ppl=6.94, wps=5916.7, ups=0.09, wpb=64824, bsz=128, num_updates=3434, lr=9.99805e-05, gnorm=3.186, loss_scale=1, train_wall=11, gb_free=2.8, wall=39372 2021-06-19 05:35:08 | INFO | train_inner | epoch 002: 459 / 3002 loss=2.679, ppl=6.4, wps=5965.3, ups=0.09, wpb=64910, bsz=128, num_updates=3435, lr=9.99805e-05, gnorm=2.477, loss_scale=1, train_wall=10, gb_free=2.8, wall=39383 2021-06-19 05:35:19 | INFO | train_inner | epoch 002: 460 / 3002 loss=2.865, ppl=7.29, wps=5880.8, ups=0.09, wpb=64888, bsz=128, num_updates=3436, lr=9.99805e-05, gnorm=2.978, loss_scale=1, train_wall=11, gb_free=2.8, wall=39394 2021-06-19 05:35:30 | INFO | train_inner | epoch 002: 461 / 3002 loss=2.938, ppl=7.67, wps=5952.9, ups=0.09, wpb=64871, bsz=128, num_updates=3437, lr=9.99805e-05, gnorm=7.105, loss_scale=1, train_wall=10, gb_free=2.8, wall=39405 2021-06-19 05:35:42 | INFO | train_inner | epoch 002: 462 / 3002 loss=2.867, ppl=7.3, wps=5747.8, ups=0.09, wpb=64886, bsz=128, num_updates=3438, lr=9.99805e-05, gnorm=2.515, loss_scale=1, train_wall=11, gb_free=2.8, wall=39416 2021-06-19 05:35:53 | INFO | train_inner | epoch 002: 463 / 3002 loss=2.863, ppl=7.28, wps=5861.2, ups=0.09, wpb=64827, bsz=128, num_updates=3439, lr=9.99805e-05, gnorm=2.47, loss_scale=1, train_wall=11, gb_free=2.8, wall=39427 2021-06-19 05:36:04 | INFO | train_inner | epoch 002: 464 / 3002 loss=2.753, ppl=6.74, wps=5847.2, ups=0.09, wpb=64789, bsz=128, num_updates=3440, lr=9.99805e-05, gnorm=2.262, loss_scale=1, train_wall=11, gb_free=2.8, wall=39438 2021-06-19 05:36:15 | INFO | train_inner | epoch 002: 465 / 3002 loss=2.871, ppl=7.31, wps=5855.8, ups=0.09, wpb=64829, bsz=128, num_updates=3441, lr=9.99805e-05, gnorm=2.423, loss_scale=1, train_wall=11, gb_free=2.8, wall=39449 2021-06-19 05:36:26 | INFO | train_inner | epoch 002: 466 / 3002 loss=2.836, ppl=7.14, wps=5804.2, ups=0.09, wpb=64864, bsz=128, num_updates=3442, lr=9.99805e-05, gnorm=2.399, loss_scale=1, train_wall=11, gb_free=2.8, wall=39460 2021-06-19 05:36:37 | INFO | train_inner | epoch 002: 467 / 3002 loss=2.842, ppl=7.17, wps=5878.1, ups=0.09, wpb=64880, bsz=128, num_updates=3443, lr=9.99805e-05, gnorm=17.529, loss_scale=1, train_wall=11, gb_free=2.8, wall=39471 2021-06-19 05:36:48 | INFO | train_inner | epoch 002: 468 / 3002 loss=2.888, ppl=7.4, wps=5775.5, ups=0.09, wpb=64804, bsz=128, num_updates=3444, lr=9.99804e-05, gnorm=2.476, loss_scale=1, train_wall=11, gb_free=2.8, wall=39483 2021-06-19 05:36:59 | INFO | train_inner | epoch 002: 469 / 3002 loss=2.797, ppl=6.95, wps=5773.8, ups=0.09, wpb=64810, bsz=128, num_updates=3445, lr=9.99804e-05, gnorm=2.595, loss_scale=1, train_wall=11, gb_free=2.8, wall=39494 2021-06-19 05:37:10 | INFO | train_inner | epoch 002: 470 / 3002 loss=2.897, ppl=7.45, wps=5962.9, ups=0.09, wpb=64859, bsz=128, num_updates=3446, lr=9.99804e-05, gnorm=2.454, loss_scale=1, train_wall=10, gb_free=2.8, wall=39505 2021-06-19 05:37:21 | INFO | train_inner | epoch 002: 471 / 3002 loss=2.888, ppl=7.4, wps=5841.8, ups=0.09, wpb=64747, bsz=128, num_updates=3447, lr=9.99804e-05, gnorm=2.866, loss_scale=1, train_wall=11, gb_free=2.8, wall=39516 2021-06-19 05:37:33 | INFO | train_inner | epoch 002: 472 / 3002 loss=2.785, ppl=6.89, wps=5824.6, ups=0.09, wpb=64837, bsz=128, num_updates=3448, lr=9.99804e-05, gnorm=2.847, loss_scale=1, train_wall=11, gb_free=2.8, wall=39527 2021-06-19 05:37:44 | INFO | train_inner | epoch 002: 473 / 3002 loss=2.829, ppl=7.1, wps=5841.3, ups=0.09, wpb=64818, bsz=128, num_updates=3449, lr=9.99804e-05, gnorm=2.428, loss_scale=1, train_wall=11, gb_free=2.8, wall=39538 2021-06-19 05:37:55 | INFO | train_inner | epoch 002: 474 / 3002 loss=2.809, ppl=7.01, wps=5855.6, ups=0.09, wpb=64846, bsz=128, num_updates=3450, lr=9.99804e-05, gnorm=2.429, loss_scale=1, train_wall=11, gb_free=2.8, wall=39549 2021-06-19 05:38:06 | INFO | train_inner | epoch 002: 475 / 3002 loss=2.826, ppl=7.09, wps=5956.4, ups=0.09, wpb=64861, bsz=128, num_updates=3451, lr=9.99804e-05, gnorm=2.627, loss_scale=1, train_wall=10, gb_free=2.8, wall=39560 2021-06-19 05:38:16 | INFO | train_inner | epoch 002: 476 / 3002 loss=2.918, ppl=7.56, wps=6051.3, ups=0.09, wpb=64841, bsz=128, num_updates=3452, lr=9.99804e-05, gnorm=2.426, loss_scale=1, train_wall=10, gb_free=2.8, wall=39571 2021-06-19 05:38:28 | INFO | train_inner | epoch 002: 477 / 3002 loss=2.878, ppl=7.35, wps=5768.9, ups=0.09, wpb=64925, bsz=128, num_updates=3453, lr=9.99804e-05, gnorm=3.382, loss_scale=1, train_wall=11, gb_free=2.8, wall=39582 2021-06-19 05:38:39 | INFO | train_inner | epoch 002: 478 / 3002 loss=2.865, ppl=7.29, wps=5822.9, ups=0.09, wpb=64850, bsz=128, num_updates=3454, lr=9.99804e-05, gnorm=2.335, loss_scale=1, train_wall=11, gb_free=2.8, wall=39593 2021-06-19 05:38:50 | INFO | train_inner | epoch 002: 479 / 3002 loss=2.647, ppl=6.26, wps=5828.1, ups=0.09, wpb=64814, bsz=128, num_updates=3455, lr=9.99804e-05, gnorm=2.399, loss_scale=1, train_wall=11, gb_free=2.8, wall=39604 2021-06-19 05:39:01 | INFO | train_inner | epoch 002: 480 / 3002 loss=2.768, ppl=6.81, wps=5839.8, ups=0.09, wpb=64784, bsz=128, num_updates=3456, lr=9.99804e-05, gnorm=2.467, loss_scale=1, train_wall=11, gb_free=2.8, wall=39615 2021-06-19 05:39:12 | INFO | train_inner | epoch 002: 481 / 3002 loss=2.613, ppl=6.12, wps=5956.5, ups=0.09, wpb=64905, bsz=128, num_updates=3457, lr=9.99803e-05, gnorm=2.17, loss_scale=1, train_wall=10, gb_free=2.8, wall=39626 2021-06-19 05:39:23 | INFO | train_inner | epoch 002: 482 / 3002 loss=2.718, ppl=6.58, wps=5808.2, ups=0.09, wpb=64867, bsz=128, num_updates=3458, lr=9.99803e-05, gnorm=4.051, loss_scale=1, train_wall=11, gb_free=2.8, wall=39637 2021-06-19 05:39:34 | INFO | train_inner | epoch 002: 483 / 3002 loss=2.797, ppl=6.95, wps=5739, ups=0.09, wpb=64805, bsz=128, num_updates=3459, lr=9.99803e-05, gnorm=2.28, loss_scale=1, train_wall=11, gb_free=2.8, wall=39649 2021-06-19 05:39:46 | INFO | train_inner | epoch 002: 484 / 3002 loss=2.686, ppl=6.44, wps=5708.1, ups=0.09, wpb=64796, bsz=128, num_updates=3460, lr=9.99803e-05, gnorm=2.3, loss_scale=1, train_wall=11, gb_free=2.8, wall=39660 2021-06-19 05:39:57 | INFO | train_inner | epoch 002: 485 / 3002 loss=2.854, ppl=7.23, wps=5837.9, ups=0.09, wpb=64818, bsz=128, num_updates=3461, lr=9.99803e-05, gnorm=5.205, loss_scale=1, train_wall=11, gb_free=2.8, wall=39671 2021-06-19 05:40:08 | INFO | train_inner | epoch 002: 486 / 3002 loss=2.886, ppl=7.39, wps=5824.5, ups=0.09, wpb=64785, bsz=128, num_updates=3462, lr=9.99803e-05, gnorm=2.262, loss_scale=1, train_wall=11, gb_free=2.8, wall=39682 2021-06-19 05:40:19 | INFO | train_inner | epoch 002: 487 / 3002 loss=2.771, ppl=6.83, wps=5938.5, ups=0.09, wpb=64917, bsz=128, num_updates=3463, lr=9.99803e-05, gnorm=2.371, loss_scale=1, train_wall=10, gb_free=2.8, wall=39693 2021-06-19 05:40:30 | INFO | train_inner | epoch 002: 488 / 3002 loss=2.928, ppl=7.61, wps=5824.5, ups=0.09, wpb=64895, bsz=128, num_updates=3464, lr=9.99803e-05, gnorm=2.258, loss_scale=1, train_wall=11, gb_free=2.8, wall=39704 2021-06-19 05:40:41 | INFO | train_inner | epoch 002: 489 / 3002 loss=2.785, ppl=6.89, wps=5846.8, ups=0.09, wpb=64817, bsz=128, num_updates=3465, lr=9.99803e-05, gnorm=2.332, loss_scale=1, train_wall=11, gb_free=2.8, wall=39715 2021-06-19 05:40:52 | INFO | train_inner | epoch 002: 490 / 3002 loss=2.951, ppl=7.74, wps=5892.7, ups=0.09, wpb=64797, bsz=128, num_updates=3466, lr=9.99803e-05, gnorm=2.447, loss_scale=1, train_wall=11, gb_free=2.8, wall=39726 2021-06-19 05:41:03 | INFO | train_inner | epoch 002: 491 / 3002 loss=2.864, ppl=7.28, wps=5882.2, ups=0.09, wpb=64895, bsz=128, num_updates=3467, lr=9.99803e-05, gnorm=4.819, loss_scale=1, train_wall=11, gb_free=2.8, wall=39737 2021-06-19 05:41:14 | INFO | train_inner | epoch 002: 492 / 3002 loss=2.745, ppl=6.71, wps=5964.7, ups=0.09, wpb=64868, bsz=128, num_updates=3468, lr=9.99803e-05, gnorm=2.302, loss_scale=1, train_wall=10, gb_free=2.8, wall=39748 2021-06-19 05:41:25 | INFO | train_inner | epoch 002: 493 / 3002 loss=2.849, ppl=7.21, wps=5802.1, ups=0.09, wpb=64776, bsz=128, num_updates=3469, lr=9.99802e-05, gnorm=2.212, loss_scale=1, train_wall=11, gb_free=2.8, wall=39759 2021-06-19 05:41:36 | INFO | train_inner | epoch 002: 494 / 3002 loss=2.687, ppl=6.44, wps=5789.2, ups=0.09, wpb=64812, bsz=128, num_updates=3470, lr=9.99802e-05, gnorm=2.257, loss_scale=1, train_wall=11, gb_free=2.8, wall=39771 2021-06-19 05:41:47 | INFO | train_inner | epoch 002: 495 / 3002 loss=2.64, ppl=6.23, wps=5844.9, ups=0.09, wpb=64841, bsz=128, num_updates=3471, lr=9.99802e-05, gnorm=2.502, loss_scale=1, train_wall=11, gb_free=2.8, wall=39782 2021-06-19 05:41:59 | INFO | train_inner | epoch 002: 496 / 3002 loss=2.885, ppl=7.39, wps=5785.6, ups=0.09, wpb=64820, bsz=128, num_updates=3472, lr=9.99802e-05, gnorm=2.22, loss_scale=1, train_wall=11, gb_free=2.8, wall=39793 2021-06-19 05:42:10 | INFO | train_inner | epoch 002: 497 / 3002 loss=2.958, ppl=7.77, wps=5846.3, ups=0.09, wpb=64831, bsz=128, num_updates=3473, lr=9.99802e-05, gnorm=2.386, loss_scale=1, train_wall=11, gb_free=2.8, wall=39804 2021-06-19 05:42:21 | INFO | train_inner | epoch 002: 498 / 3002 loss=2.658, ppl=6.31, wps=5793.7, ups=0.09, wpb=64849, bsz=128, num_updates=3474, lr=9.99802e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=39815 2021-06-19 05:42:32 | INFO | train_inner | epoch 002: 499 / 3002 loss=2.745, ppl=6.7, wps=6032.8, ups=0.09, wpb=64835, bsz=128, num_updates=3475, lr=9.99802e-05, gnorm=2.147, loss_scale=1, train_wall=10, gb_free=2.8, wall=39826 2021-06-19 05:42:43 | INFO | train_inner | epoch 002: 500 / 3002 loss=2.777, ppl=6.86, wps=5818.3, ups=0.09, wpb=64926, bsz=128, num_updates=3476, lr=9.99802e-05, gnorm=2.395, loss_scale=1, train_wall=11, gb_free=2.8, wall=39837 2021-06-19 05:42:54 | INFO | train_inner | epoch 002: 501 / 3002 loss=2.831, ppl=7.12, wps=5884.5, ups=0.09, wpb=64910, bsz=128, num_updates=3477, lr=9.99802e-05, gnorm=2.373, loss_scale=1, train_wall=11, gb_free=2.8, wall=39848 2021-06-19 05:43:05 | INFO | train_inner | epoch 002: 502 / 3002 loss=2.861, ppl=7.27, wps=5911.1, ups=0.09, wpb=64812, bsz=128, num_updates=3478, lr=9.99802e-05, gnorm=2.414, loss_scale=1, train_wall=11, gb_free=2.8, wall=39859 2021-06-19 05:43:16 | INFO | train_inner | epoch 002: 503 / 3002 loss=2.796, ppl=6.95, wps=5833.6, ups=0.09, wpb=64801, bsz=128, num_updates=3479, lr=9.99802e-05, gnorm=2.374, loss_scale=1, train_wall=11, gb_free=2.8, wall=39870 2021-06-19 05:43:27 | INFO | train_inner | epoch 002: 504 / 3002 loss=2.769, ppl=6.82, wps=5775.6, ups=0.09, wpb=64837, bsz=128, num_updates=3480, lr=9.99802e-05, gnorm=2.34, loss_scale=1, train_wall=11, gb_free=2.8, wall=39881 2021-06-19 05:43:38 | INFO | train_inner | epoch 002: 505 / 3002 loss=2.87, ppl=7.31, wps=5973.6, ups=0.09, wpb=64850, bsz=128, num_updates=3481, lr=9.99802e-05, gnorm=2.205, loss_scale=1, train_wall=10, gb_free=2.8, wall=39892 2021-06-19 05:43:49 | INFO | train_inner | epoch 002: 506 / 3002 loss=2.839, ppl=7.15, wps=5907.6, ups=0.09, wpb=64769, bsz=128, num_updates=3482, lr=9.99801e-05, gnorm=3.407, loss_scale=1, train_wall=10, gb_free=2.8, wall=39903 2021-06-19 05:44:00 | INFO | train_inner | epoch 002: 507 / 3002 loss=2.883, ppl=7.38, wps=5887, ups=0.09, wpb=64878, bsz=128, num_updates=3483, lr=9.99801e-05, gnorm=2.384, loss_scale=1, train_wall=11, gb_free=2.8, wall=39914 2021-06-19 05:44:11 | INFO | train_inner | epoch 002: 508 / 3002 loss=2.886, ppl=7.39, wps=5966.9, ups=0.09, wpb=64883, bsz=128, num_updates=3484, lr=9.99801e-05, gnorm=2.338, loss_scale=1, train_wall=10, gb_free=2.8, wall=39925 2021-06-19 05:44:22 | INFO | train_inner | epoch 002: 509 / 3002 loss=2.728, ppl=6.63, wps=5806.4, ups=0.09, wpb=64838, bsz=128, num_updates=3485, lr=9.99801e-05, gnorm=2.422, loss_scale=1, train_wall=11, gb_free=2.8, wall=39936 2021-06-19 05:44:33 | INFO | train_inner | epoch 002: 510 / 3002 loss=2.807, ppl=7, wps=5826, ups=0.09, wpb=64890, bsz=128, num_updates=3486, lr=9.99801e-05, gnorm=2.461, loss_scale=1, train_wall=11, gb_free=2.8, wall=39947 2021-06-19 05:44:44 | INFO | train_inner | epoch 002: 511 / 3002 loss=2.834, ppl=7.13, wps=5937.4, ups=0.09, wpb=64821, bsz=128, num_updates=3487, lr=9.99801e-05, gnorm=2.356, loss_scale=1, train_wall=10, gb_free=2.8, wall=39958 2021-06-19 05:44:55 | INFO | train_inner | epoch 002: 512 / 3002 loss=2.806, ppl=6.99, wps=5830.7, ups=0.09, wpb=64902, bsz=128, num_updates=3488, lr=9.99801e-05, gnorm=2.435, loss_scale=1, train_wall=11, gb_free=2.8, wall=39970 2021-06-19 05:45:06 | INFO | train_inner | epoch 002: 513 / 3002 loss=2.923, ppl=7.58, wps=5822.2, ups=0.09, wpb=64780, bsz=128, num_updates=3489, lr=9.99801e-05, gnorm=2.561, loss_scale=1, train_wall=11, gb_free=2.8, wall=39981 2021-06-19 05:45:17 | INFO | train_inner | epoch 002: 514 / 3002 loss=2.77, ppl=6.82, wps=5890.7, ups=0.09, wpb=64889, bsz=128, num_updates=3490, lr=9.99801e-05, gnorm=2.51, loss_scale=1, train_wall=11, gb_free=2.8, wall=39992 2021-06-19 05:45:28 | INFO | train_inner | epoch 002: 515 / 3002 loss=2.761, ppl=6.78, wps=5823.2, ups=0.09, wpb=64824, bsz=128, num_updates=3491, lr=9.99801e-05, gnorm=2.256, loss_scale=1, train_wall=11, gb_free=2.8, wall=40003 2021-06-19 05:45:40 | INFO | train_inner | epoch 002: 516 / 3002 loss=2.851, ppl=7.22, wps=5857.6, ups=0.09, wpb=64802, bsz=128, num_updates=3492, lr=9.99801e-05, gnorm=2.31, loss_scale=1, train_wall=11, gb_free=2.8, wall=40014 2021-06-19 05:45:51 | INFO | train_inner | epoch 002: 517 / 3002 loss=2.662, ppl=6.33, wps=5824.5, ups=0.09, wpb=64857, bsz=128, num_updates=3493, lr=9.99801e-05, gnorm=2.57, loss_scale=1, train_wall=11, gb_free=2.8, wall=40025 2021-06-19 05:46:02 | INFO | train_inner | epoch 002: 518 / 3002 loss=2.746, ppl=6.71, wps=5762, ups=0.09, wpb=64881, bsz=128, num_updates=3494, lr=9.998e-05, gnorm=2.598, loss_scale=1, train_wall=11, gb_free=2.8, wall=40036 2021-06-19 05:46:13 | INFO | train_inner | epoch 002: 519 / 3002 loss=2.733, ppl=6.65, wps=5888.1, ups=0.09, wpb=64881, bsz=128, num_updates=3495, lr=9.998e-05, gnorm=2.308, loss_scale=1, train_wall=11, gb_free=2.8, wall=40047 2021-06-19 05:46:24 | INFO | train_inner | epoch 002: 520 / 3002 loss=2.927, ppl=7.6, wps=5852.5, ups=0.09, wpb=64773, bsz=128, num_updates=3496, lr=9.998e-05, gnorm=2.474, loss_scale=1, train_wall=11, gb_free=2.8, wall=40058 2021-06-19 05:46:35 | INFO | train_inner | epoch 002: 521 / 3002 loss=2.869, ppl=7.3, wps=5934.2, ups=0.09, wpb=64749, bsz=128, num_updates=3497, lr=9.998e-05, gnorm=2.347, loss_scale=1, train_wall=10, gb_free=2.8, wall=40069 2021-06-19 05:46:46 | INFO | train_inner | epoch 002: 522 / 3002 loss=2.802, ppl=6.97, wps=5872.1, ups=0.09, wpb=64876, bsz=128, num_updates=3498, lr=9.998e-05, gnorm=2.797, loss_scale=1, train_wall=11, gb_free=2.8, wall=40080 2021-06-19 05:46:57 | INFO | train_inner | epoch 002: 523 / 3002 loss=2.776, ppl=6.85, wps=5856.4, ups=0.09, wpb=64790, bsz=128, num_updates=3499, lr=9.998e-05, gnorm=2.348, loss_scale=1, train_wall=11, gb_free=2.8, wall=40091 2021-06-19 05:47:08 | INFO | train_inner | epoch 002: 524 / 3002 loss=2.797, ppl=6.95, wps=5943.5, ups=0.09, wpb=64865, bsz=128, num_updates=3500, lr=9.998e-05, gnorm=2.401, loss_scale=1, train_wall=10, gb_free=2.8, wall=40102 2021-06-19 05:47:19 | INFO | train_inner | epoch 002: 525 / 3002 loss=2.827, ppl=7.09, wps=5790.2, ups=0.09, wpb=64744, bsz=128, num_updates=3501, lr=9.998e-05, gnorm=2.329, loss_scale=1, train_wall=11, gb_free=2.8, wall=40113 2021-06-19 05:47:30 | INFO | train_inner | epoch 002: 526 / 3002 loss=2.706, ppl=6.52, wps=5967.3, ups=0.09, wpb=64878, bsz=128, num_updates=3502, lr=9.998e-05, gnorm=2.412, loss_scale=1, train_wall=10, gb_free=2.8, wall=40124 2021-06-19 05:47:41 | INFO | train_inner | epoch 002: 527 / 3002 loss=2.91, ppl=7.51, wps=5878.7, ups=0.09, wpb=64866, bsz=128, num_updates=3503, lr=9.998e-05, gnorm=2.383, loss_scale=1, train_wall=11, gb_free=2.8, wall=40135 2021-06-19 05:47:52 | INFO | train_inner | epoch 002: 528 / 3002 loss=2.828, ppl=7.1, wps=5819.3, ups=0.09, wpb=64847, bsz=128, num_updates=3504, lr=9.998e-05, gnorm=2.336, loss_scale=1, train_wall=11, gb_free=2.8, wall=40147 2021-06-19 05:48:03 | INFO | train_inner | epoch 002: 529 / 3002 loss=2.839, ppl=7.15, wps=5882.8, ups=0.09, wpb=64826, bsz=128, num_updates=3505, lr=9.998e-05, gnorm=2.572, loss_scale=1, train_wall=11, gb_free=2.8, wall=40158 2021-06-19 05:48:14 | INFO | train_inner | epoch 002: 530 / 3002 loss=2.756, ppl=6.76, wps=5748.8, ups=0.09, wpb=64821, bsz=128, num_updates=3506, lr=9.998e-05, gnorm=2.237, loss_scale=1, train_wall=11, gb_free=2.8, wall=40169 2021-06-19 05:48:25 | INFO | train_inner | epoch 002: 531 / 3002 loss=2.796, ppl=6.94, wps=5916.8, ups=0.09, wpb=64885, bsz=128, num_updates=3507, lr=9.99799e-05, gnorm=2.27, loss_scale=1, train_wall=10, gb_free=2.8, wall=40180 2021-06-19 05:48:36 | INFO | train_inner | epoch 002: 532 / 3002 loss=2.804, ppl=6.98, wps=5908.4, ups=0.09, wpb=64856, bsz=128, num_updates=3508, lr=9.99799e-05, gnorm=2.198, loss_scale=1, train_wall=11, gb_free=2.8, wall=40191 2021-06-19 05:48:47 | INFO | train_inner | epoch 002: 533 / 3002 loss=2.81, ppl=7.01, wps=5942.5, ups=0.09, wpb=64932, bsz=128, num_updates=3509, lr=9.99799e-05, gnorm=2.257, loss_scale=1, train_wall=10, gb_free=2.8, wall=40202 2021-06-19 05:48:59 | INFO | train_inner | epoch 002: 534 / 3002 loss=2.795, ppl=6.94, wps=5786.9, ups=0.09, wpb=64832, bsz=128, num_updates=3510, lr=9.99799e-05, gnorm=2.257, loss_scale=1, train_wall=11, gb_free=2.8, wall=40213 2021-06-19 05:49:10 | INFO | train_inner | epoch 002: 535 / 3002 loss=2.928, ppl=7.61, wps=5799.8, ups=0.09, wpb=64919, bsz=128, num_updates=3511, lr=9.99799e-05, gnorm=2.268, loss_scale=1, train_wall=11, gb_free=2.8, wall=40224 2021-06-19 05:49:21 | INFO | train_inner | epoch 002: 536 / 3002 loss=2.665, ppl=6.34, wps=5808.9, ups=0.09, wpb=64854, bsz=128, num_updates=3512, lr=9.99799e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=40235 2021-06-19 05:49:32 | INFO | train_inner | epoch 002: 537 / 3002 loss=2.755, ppl=6.75, wps=5878.7, ups=0.09, wpb=64770, bsz=128, num_updates=3513, lr=9.99799e-05, gnorm=4.439, loss_scale=1, train_wall=11, gb_free=2.8, wall=40246 2021-06-19 05:49:43 | INFO | train_inner | epoch 002: 538 / 3002 loss=2.775, ppl=6.84, wps=5930.2, ups=0.09, wpb=64868, bsz=128, num_updates=3514, lr=9.99799e-05, gnorm=2.738, loss_scale=1, train_wall=10, gb_free=2.8, wall=40257 2021-06-19 05:49:54 | INFO | train_inner | epoch 002: 539 / 3002 loss=2.83, ppl=7.11, wps=5769.2, ups=0.09, wpb=64836, bsz=128, num_updates=3515, lr=9.99799e-05, gnorm=2.34, loss_scale=1, train_wall=11, gb_free=2.8, wall=40268 2021-06-19 05:50:05 | INFO | train_inner | epoch 002: 540 / 3002 loss=2.825, ppl=7.08, wps=5786.9, ups=0.09, wpb=64755, bsz=128, num_updates=3516, lr=9.99799e-05, gnorm=2.215, loss_scale=1, train_wall=11, gb_free=2.8, wall=40280 2021-06-19 05:50:16 | INFO | train_inner | epoch 002: 541 / 3002 loss=2.966, ppl=7.81, wps=5844.9, ups=0.09, wpb=64947, bsz=128, num_updates=3517, lr=9.99799e-05, gnorm=14.89, loss_scale=1, train_wall=11, gb_free=2.8, wall=40291 2021-06-19 05:50:27 | INFO | train_inner | epoch 002: 542 / 3002 loss=2.917, ppl=7.56, wps=5918.7, ups=0.09, wpb=64808, bsz=128, num_updates=3518, lr=9.99799e-05, gnorm=2.279, loss_scale=1, train_wall=11, gb_free=2.8, wall=40302 2021-06-19 05:50:38 | INFO | train_inner | epoch 002: 543 / 3002 loss=2.623, ppl=6.16, wps=5862.9, ups=0.09, wpb=64850, bsz=128, num_updates=3519, lr=9.99798e-05, gnorm=2.359, loss_scale=1, train_wall=11, gb_free=2.8, wall=40313 2021-06-19 05:50:50 | INFO | train_inner | epoch 002: 544 / 3002 loss=2.773, ppl=6.83, wps=5781.5, ups=0.09, wpb=64819, bsz=128, num_updates=3520, lr=9.99798e-05, gnorm=2.399, loss_scale=2, train_wall=11, gb_free=2.8, wall=40324 2021-06-19 05:51:01 | INFO | train_inner | epoch 002: 545 / 3002 loss=2.765, ppl=6.8, wps=5826.9, ups=0.09, wpb=64909, bsz=128, num_updates=3521, lr=9.99798e-05, gnorm=2.541, loss_scale=2, train_wall=11, gb_free=2.8, wall=40335 2021-06-19 05:51:12 | INFO | train_inner | epoch 002: 546 / 3002 loss=2.75, ppl=6.73, wps=5930.7, ups=0.09, wpb=64880, bsz=128, num_updates=3522, lr=9.99798e-05, gnorm=2.538, loss_scale=2, train_wall=11, gb_free=2.8, wall=40346 2021-06-19 05:51:23 | INFO | train_inner | epoch 002: 547 / 3002 loss=2.882, ppl=7.37, wps=5918.9, ups=0.09, wpb=64885, bsz=128, num_updates=3523, lr=9.99798e-05, gnorm=2.513, loss_scale=2, train_wall=10, gb_free=2.8, wall=40357 2021-06-19 05:51:34 | INFO | train_inner | epoch 002: 548 / 3002 loss=2.727, ppl=6.62, wps=5847.1, ups=0.09, wpb=64825, bsz=128, num_updates=3524, lr=9.99798e-05, gnorm=2.378, loss_scale=2, train_wall=11, gb_free=2.8, wall=40368 2021-06-19 05:51:45 | INFO | train_inner | epoch 002: 549 / 3002 loss=2.785, ppl=6.89, wps=5914.2, ups=0.09, wpb=64817, bsz=128, num_updates=3525, lr=9.99798e-05, gnorm=2.658, loss_scale=2, train_wall=11, gb_free=2.8, wall=40379 2021-06-19 05:51:56 | INFO | train_inner | epoch 002: 550 / 3002 loss=2.791, ppl=6.92, wps=5950.5, ups=0.09, wpb=64799, bsz=128, num_updates=3526, lr=9.99798e-05, gnorm=2.346, loss_scale=2, train_wall=10, gb_free=2.8, wall=40390 2021-06-19 05:52:07 | INFO | train_inner | epoch 002: 551 / 3002 loss=2.949, ppl=7.72, wps=5855.3, ups=0.09, wpb=64847, bsz=128, num_updates=3527, lr=9.99798e-05, gnorm=2.677, loss_scale=2, train_wall=11, gb_free=2.8, wall=40401 2021-06-19 05:52:18 | INFO | train_inner | epoch 002: 552 / 3002 loss=2.766, ppl=6.8, wps=5911.7, ups=0.09, wpb=64806, bsz=128, num_updates=3528, lr=9.99798e-05, gnorm=2.376, loss_scale=2, train_wall=11, gb_free=2.8, wall=40412 2021-06-19 05:52:29 | INFO | train_inner | epoch 002: 553 / 3002 loss=2.833, ppl=7.13, wps=5879.2, ups=0.09, wpb=64861, bsz=128, num_updates=3529, lr=9.99798e-05, gnorm=2.319, loss_scale=2, train_wall=11, gb_free=2.8, wall=40423 2021-06-19 05:52:40 | INFO | train_inner | epoch 002: 554 / 3002 loss=2.701, ppl=6.5, wps=5831.2, ups=0.09, wpb=64814, bsz=128, num_updates=3530, lr=9.99798e-05, gnorm=2.384, loss_scale=2, train_wall=11, gb_free=2.8, wall=40434 2021-06-19 05:52:51 | INFO | train_inner | epoch 002: 555 / 3002 loss=2.881, ppl=7.37, wps=5761.3, ups=0.09, wpb=64836, bsz=128, num_updates=3531, lr=9.99798e-05, gnorm=2.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=40445 2021-06-19 05:53:02 | INFO | train_inner | epoch 002: 556 / 3002 loss=3, ppl=8, wps=5901.4, ups=0.09, wpb=64878, bsz=128, num_updates=3532, lr=9.99797e-05, gnorm=2.239, loss_scale=2, train_wall=11, gb_free=2.8, wall=40456 2021-06-19 05:53:13 | INFO | train_inner | epoch 002: 557 / 3002 loss=2.748, ppl=6.72, wps=5791.9, ups=0.09, wpb=64725, bsz=128, num_updates=3533, lr=9.99797e-05, gnorm=2.349, loss_scale=2, train_wall=11, gb_free=2.8, wall=40468 2021-06-19 05:53:24 | INFO | train_inner | epoch 002: 558 / 3002 loss=2.952, ppl=7.74, wps=5899.2, ups=0.09, wpb=64803, bsz=128, num_updates=3534, lr=9.99797e-05, gnorm=2.264, loss_scale=2, train_wall=11, gb_free=2.8, wall=40479 2021-06-19 05:53:35 | INFO | train_inner | epoch 002: 559 / 3002 loss=2.947, ppl=7.71, wps=5818.2, ups=0.09, wpb=64879, bsz=128, num_updates=3535, lr=9.99797e-05, gnorm=2.455, loss_scale=2, train_wall=11, gb_free=2.8, wall=40490 2021-06-19 05:53:46 | INFO | train_inner | epoch 002: 560 / 3002 loss=2.878, ppl=7.35, wps=5879.4, ups=0.09, wpb=64817, bsz=128, num_updates=3536, lr=9.99797e-05, gnorm=2.433, loss_scale=2, train_wall=11, gb_free=2.8, wall=40501 2021-06-19 05:53:57 | INFO | train_inner | epoch 002: 561 / 3002 loss=2.856, ppl=7.24, wps=5876, ups=0.09, wpb=64853, bsz=128, num_updates=3537, lr=9.99797e-05, gnorm=2.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=40512 2021-06-19 05:54:09 | INFO | train_inner | epoch 002: 562 / 3002 loss=2.801, ppl=6.97, wps=5788.3, ups=0.09, wpb=64864, bsz=128, num_updates=3538, lr=9.99797e-05, gnorm=2.341, loss_scale=2, train_wall=11, gb_free=2.8, wall=40523 2021-06-19 05:54:20 | INFO | train_inner | epoch 002: 563 / 3002 loss=2.814, ppl=7.03, wps=5833.7, ups=0.09, wpb=64868, bsz=128, num_updates=3539, lr=9.99797e-05, gnorm=2.358, loss_scale=2, train_wall=11, gb_free=2.8, wall=40534 2021-06-19 05:54:31 | INFO | train_inner | epoch 002: 564 / 3002 loss=2.926, ppl=7.6, wps=5732.5, ups=0.09, wpb=64791, bsz=128, num_updates=3540, lr=9.99797e-05, gnorm=2.353, loss_scale=2, train_wall=11, gb_free=2.8, wall=40545 2021-06-19 05:54:42 | INFO | train_inner | epoch 002: 565 / 3002 loss=2.792, ppl=6.93, wps=5878.5, ups=0.09, wpb=64866, bsz=128, num_updates=3541, lr=9.99797e-05, gnorm=2.306, loss_scale=2, train_wall=11, gb_free=2.8, wall=40556 2021-06-19 05:54:53 | INFO | train_inner | epoch 002: 566 / 3002 loss=2.764, ppl=6.79, wps=5793.9, ups=0.09, wpb=64784, bsz=128, num_updates=3542, lr=9.99797e-05, gnorm=3.043, loss_scale=2, train_wall=11, gb_free=2.8, wall=40568 2021-06-19 05:55:04 | INFO | train_inner | epoch 002: 567 / 3002 loss=2.827, ppl=7.09, wps=5826.2, ups=0.09, wpb=64863, bsz=128, num_updates=3543, lr=9.99797e-05, gnorm=2.307, loss_scale=2, train_wall=11, gb_free=2.8, wall=40579 2021-06-19 05:55:16 | INFO | train_inner | epoch 002: 568 / 3002 loss=2.818, ppl=7.05, wps=5832.7, ups=0.09, wpb=64804, bsz=128, num_updates=3544, lr=9.99796e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=40590 2021-06-19 05:55:27 | INFO | train_inner | epoch 002: 569 / 3002 loss=2.718, ppl=6.58, wps=5819.9, ups=0.09, wpb=64771, bsz=128, num_updates=3545, lr=9.99796e-05, gnorm=2.225, loss_scale=2, train_wall=11, gb_free=2.8, wall=40601 2021-06-19 05:55:38 | INFO | train_inner | epoch 002: 570 / 3002 loss=2.703, ppl=6.51, wps=5799.8, ups=0.09, wpb=64892, bsz=128, num_updates=3546, lr=9.99796e-05, gnorm=2.355, loss_scale=2, train_wall=11, gb_free=2.8, wall=40612 2021-06-19 05:55:49 | INFO | train_inner | epoch 002: 571 / 3002 loss=2.627, ppl=6.18, wps=5769.8, ups=0.09, wpb=64792, bsz=128, num_updates=3547, lr=9.99796e-05, gnorm=2.413, loss_scale=2, train_wall=11, gb_free=2.8, wall=40623 2021-06-19 05:56:00 | INFO | train_inner | epoch 002: 572 / 3002 loss=2.662, ppl=6.33, wps=5869.2, ups=0.09, wpb=64793, bsz=128, num_updates=3548, lr=9.99796e-05, gnorm=2.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=40634 2021-06-19 05:56:11 | INFO | train_inner | epoch 002: 573 / 3002 loss=2.893, ppl=7.43, wps=5752.9, ups=0.09, wpb=64788, bsz=128, num_updates=3549, lr=9.99796e-05, gnorm=2.253, loss_scale=2, train_wall=11, gb_free=2.8, wall=40646 2021-06-19 05:56:22 | INFO | train_inner | epoch 002: 574 / 3002 loss=2.764, ppl=6.79, wps=5890.5, ups=0.09, wpb=64869, bsz=128, num_updates=3550, lr=9.99796e-05, gnorm=2.294, loss_scale=2, train_wall=11, gb_free=2.8, wall=40657 2021-06-19 05:56:33 | INFO | train_inner | epoch 002: 575 / 3002 loss=2.816, ppl=7.04, wps=5836, ups=0.09, wpb=64854, bsz=128, num_updates=3551, lr=9.99796e-05, gnorm=2.235, loss_scale=2, train_wall=11, gb_free=2.8, wall=40668 2021-06-19 05:56:44 | INFO | train_inner | epoch 002: 576 / 3002 loss=2.874, ppl=7.33, wps=5906.1, ups=0.09, wpb=64804, bsz=128, num_updates=3552, lr=9.99796e-05, gnorm=2.342, loss_scale=2, train_wall=10, gb_free=2.8, wall=40679 2021-06-19 05:56:55 | INFO | train_inner | epoch 002: 577 / 3002 loss=2.854, ppl=7.23, wps=5870.6, ups=0.09, wpb=64822, bsz=128, num_updates=3553, lr=9.99796e-05, gnorm=2.301, loss_scale=2, train_wall=11, gb_free=2.8, wall=40690 2021-06-19 05:57:06 | INFO | train_inner | epoch 002: 578 / 3002 loss=2.758, ppl=6.76, wps=5935.2, ups=0.09, wpb=64783, bsz=128, num_updates=3554, lr=9.99796e-05, gnorm=2.419, loss_scale=2, train_wall=10, gb_free=2.8, wall=40701 2021-06-19 05:57:17 | INFO | train_inner | epoch 002: 579 / 3002 loss=2.77, ppl=6.82, wps=5906.2, ups=0.09, wpb=64881, bsz=128, num_updates=3555, lr=9.99796e-05, gnorm=2.333, loss_scale=2, train_wall=11, gb_free=2.8, wall=40712 2021-06-19 05:57:28 | INFO | train_inner | epoch 002: 580 / 3002 loss=2.72, ppl=6.59, wps=5961.3, ups=0.09, wpb=64797, bsz=128, num_updates=3556, lr=9.99796e-05, gnorm=2.337, loss_scale=2, train_wall=10, gb_free=2.8, wall=40723 2021-06-19 05:57:39 | INFO | train_inner | epoch 002: 581 / 3002 loss=2.944, ppl=7.7, wps=5811.6, ups=0.09, wpb=64781, bsz=128, num_updates=3557, lr=9.99795e-05, gnorm=2.374, loss_scale=2, train_wall=11, gb_free=2.8, wall=40734 2021-06-19 05:57:51 | INFO | train_inner | epoch 002: 582 / 3002 loss=2.78, ppl=6.87, wps=5804, ups=0.09, wpb=64835, bsz=128, num_updates=3558, lr=9.99795e-05, gnorm=2.425, loss_scale=2, train_wall=11, gb_free=2.8, wall=40745 2021-06-19 05:58:01 | INFO | train_inner | epoch 002: 583 / 3002 loss=2.852, ppl=7.22, wps=5944.9, ups=0.09, wpb=64834, bsz=128, num_updates=3559, lr=9.99795e-05, gnorm=2.492, loss_scale=2, train_wall=10, gb_free=2.8, wall=40756 2021-06-19 05:58:13 | INFO | train_inner | epoch 002: 584 / 3002 loss=2.925, ppl=7.59, wps=5818.5, ups=0.09, wpb=64844, bsz=128, num_updates=3560, lr=9.99795e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=40767 2021-06-19 05:58:24 | INFO | train_inner | epoch 002: 585 / 3002 loss=2.815, ppl=7.04, wps=5749.8, ups=0.09, wpb=64823, bsz=128, num_updates=3561, lr=9.99795e-05, gnorm=2.611, loss_scale=2, train_wall=11, gb_free=2.8, wall=40778 2021-06-19 05:58:35 | INFO | train_inner | epoch 002: 586 / 3002 loss=2.891, ppl=7.42, wps=5811.8, ups=0.09, wpb=64875, bsz=128, num_updates=3562, lr=9.99795e-05, gnorm=2.254, loss_scale=2, train_wall=11, gb_free=2.8, wall=40789 2021-06-19 05:58:46 | INFO | train_inner | epoch 002: 587 / 3002 loss=2.909, ppl=7.51, wps=5877.7, ups=0.09, wpb=64791, bsz=128, num_updates=3563, lr=9.99795e-05, gnorm=2.39, loss_scale=2, train_wall=11, gb_free=2.8, wall=40800 2021-06-19 05:58:57 | INFO | train_inner | epoch 002: 588 / 3002 loss=2.702, ppl=6.51, wps=5926, ups=0.09, wpb=64770, bsz=128, num_updates=3564, lr=9.99795e-05, gnorm=2.311, loss_scale=2, train_wall=10, gb_free=2.8, wall=40811 2021-06-19 05:59:08 | INFO | train_inner | epoch 002: 589 / 3002 loss=2.817, ppl=7.04, wps=5930.6, ups=0.09, wpb=64858, bsz=128, num_updates=3565, lr=9.99795e-05, gnorm=2.248, loss_scale=2, train_wall=10, gb_free=2.8, wall=40822 2021-06-19 05:59:19 | INFO | train_inner | epoch 002: 590 / 3002 loss=2.719, ppl=6.58, wps=5825.5, ups=0.09, wpb=64854, bsz=128, num_updates=3566, lr=9.99795e-05, gnorm=2.471, loss_scale=2, train_wall=11, gb_free=2.8, wall=40833 2021-06-19 05:59:30 | INFO | train_inner | epoch 002: 591 / 3002 loss=2.806, ppl=6.99, wps=5792.7, ups=0.09, wpb=64882, bsz=128, num_updates=3567, lr=9.99795e-05, gnorm=2.367, loss_scale=2, train_wall=11, gb_free=2.8, wall=40845 2021-06-19 05:59:42 | INFO | train_inner | epoch 002: 592 / 3002 loss=2.714, ppl=6.56, wps=5780.9, ups=0.09, wpb=64863, bsz=128, num_updates=3568, lr=9.99795e-05, gnorm=2.339, loss_scale=2, train_wall=11, gb_free=2.8, wall=40856 2021-06-19 05:59:53 | INFO | train_inner | epoch 002: 593 / 3002 loss=2.863, ppl=7.28, wps=5776.6, ups=0.09, wpb=64764, bsz=128, num_updates=3569, lr=9.99794e-05, gnorm=2.331, loss_scale=2, train_wall=11, gb_free=2.8, wall=40867 2021-06-19 06:00:04 | INFO | train_inner | epoch 002: 594 / 3002 loss=2.777, ppl=6.85, wps=5984, ups=0.09, wpb=64775, bsz=128, num_updates=3570, lr=9.99794e-05, gnorm=2.302, loss_scale=2, train_wall=10, gb_free=2.8, wall=40878 2021-06-19 06:00:15 | INFO | train_inner | epoch 002: 595 / 3002 loss=2.78, ppl=6.87, wps=5886.2, ups=0.09, wpb=64870, bsz=128, num_updates=3571, lr=9.99794e-05, gnorm=2.344, loss_scale=2, train_wall=11, gb_free=2.8, wall=40889 2021-06-19 06:00:26 | INFO | train_inner | epoch 002: 596 / 3002 loss=2.897, ppl=7.45, wps=5921.6, ups=0.09, wpb=64775, bsz=128, num_updates=3572, lr=9.99794e-05, gnorm=2.221, loss_scale=2, train_wall=10, gb_free=2.8, wall=40900 2021-06-19 06:00:37 | INFO | train_inner | epoch 002: 597 / 3002 loss=2.832, ppl=7.12, wps=5796.9, ups=0.09, wpb=64773, bsz=128, num_updates=3573, lr=9.99794e-05, gnorm=2.344, loss_scale=2, train_wall=11, gb_free=2.8, wall=40911 2021-06-19 06:00:48 | INFO | train_inner | epoch 002: 598 / 3002 loss=2.82, ppl=7.06, wps=5840.3, ups=0.09, wpb=64712, bsz=128, num_updates=3574, lr=9.99794e-05, gnorm=2.51, loss_scale=2, train_wall=11, gb_free=2.8, wall=40922 2021-06-19 06:00:59 | INFO | train_inner | epoch 002: 599 / 3002 loss=2.612, ppl=6.11, wps=5895.3, ups=0.09, wpb=64811, bsz=128, num_updates=3575, lr=9.99794e-05, gnorm=2.369, loss_scale=2, train_wall=11, gb_free=2.8, wall=40933 2021-06-19 06:01:10 | INFO | train_inner | epoch 002: 600 / 3002 loss=3.013, ppl=8.07, wps=5858.1, ups=0.09, wpb=64764, bsz=128, num_updates=3576, lr=9.99794e-05, gnorm=2.454, loss_scale=2, train_wall=11, gb_free=2.8, wall=40944 2021-06-19 06:01:21 | INFO | train_inner | epoch 002: 601 / 3002 loss=2.693, ppl=6.47, wps=6009.5, ups=0.09, wpb=64838, bsz=128, num_updates=3577, lr=9.99794e-05, gnorm=2.307, loss_scale=2, train_wall=10, gb_free=2.8, wall=40955 2021-06-19 06:01:32 | INFO | train_inner | epoch 002: 602 / 3002 loss=2.787, ppl=6.9, wps=5812.8, ups=0.09, wpb=64821, bsz=128, num_updates=3578, lr=9.99794e-05, gnorm=2.446, loss_scale=2, train_wall=11, gb_free=2.8, wall=40966 2021-06-19 06:01:43 | INFO | train_inner | epoch 002: 603 / 3002 loss=2.857, ppl=7.24, wps=5906.8, ups=0.09, wpb=64811, bsz=128, num_updates=3579, lr=9.99794e-05, gnorm=2.308, loss_scale=2, train_wall=11, gb_free=2.8, wall=40977 2021-06-19 06:01:54 | INFO | train_inner | epoch 002: 604 / 3002 loss=2.842, ppl=7.17, wps=5879.5, ups=0.09, wpb=64846, bsz=128, num_updates=3580, lr=9.99794e-05, gnorm=2.224, loss_scale=2, train_wall=11, gb_free=2.8, wall=40988 2021-06-19 06:02:05 | INFO | train_inner | epoch 002: 605 / 3002 loss=2.679, ppl=6.41, wps=5945.7, ups=0.09, wpb=64865, bsz=128, num_updates=3581, lr=9.99794e-05, gnorm=2.425, loss_scale=2, train_wall=10, gb_free=2.8, wall=40999 2021-06-19 06:02:16 | INFO | train_inner | epoch 002: 606 / 3002 loss=2.812, ppl=7.02, wps=5959.7, ups=0.09, wpb=64882, bsz=128, num_updates=3582, lr=9.99793e-05, gnorm=2.31, loss_scale=2, train_wall=10, gb_free=2.8, wall=41010 2021-06-19 06:02:27 | INFO | train_inner | epoch 002: 607 / 3002 loss=2.725, ppl=6.61, wps=5863.3, ups=0.09, wpb=64735, bsz=128, num_updates=3583, lr=9.99793e-05, gnorm=2.326, loss_scale=2, train_wall=11, gb_free=2.8, wall=41021 2021-06-19 06:02:38 | INFO | train_inner | epoch 002: 608 / 3002 loss=2.879, ppl=7.35, wps=5860.7, ups=0.09, wpb=64721, bsz=128, num_updates=3584, lr=9.99793e-05, gnorm=2.486, loss_scale=2, train_wall=11, gb_free=2.8, wall=41032 2021-06-19 06:02:48 | INFO | train_inner | epoch 002: 609 / 3002 loss=2.782, ppl=6.88, wps=5967.3, ups=0.09, wpb=64750, bsz=128, num_updates=3585, lr=9.99793e-05, gnorm=2.434, loss_scale=2, train_wall=10, gb_free=2.8, wall=41043 2021-06-19 06:03:00 | INFO | train_inner | epoch 002: 610 / 3002 loss=2.783, ppl=6.88, wps=5817.2, ups=0.09, wpb=64836, bsz=128, num_updates=3586, lr=9.99793e-05, gnorm=2.853, loss_scale=2, train_wall=11, gb_free=2.8, wall=41054 2021-06-19 06:03:11 | INFO | train_inner | epoch 002: 611 / 3002 loss=2.65, ppl=6.28, wps=5831, ups=0.09, wpb=64790, bsz=128, num_updates=3587, lr=9.99793e-05, gnorm=2.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=41065 2021-06-19 06:03:22 | INFO | train_inner | epoch 002: 612 / 3002 loss=2.669, ppl=6.36, wps=5725.6, ups=0.09, wpb=64720, bsz=128, num_updates=3588, lr=9.99793e-05, gnorm=2.266, loss_scale=2, train_wall=11, gb_free=2.8, wall=41076 2021-06-19 06:03:33 | INFO | train_inner | epoch 002: 613 / 3002 loss=2.789, ppl=6.91, wps=5737.3, ups=0.09, wpb=64847, bsz=128, num_updates=3589, lr=9.99793e-05, gnorm=2.281, loss_scale=2, train_wall=11, gb_free=2.8, wall=41088 2021-06-19 06:03:44 | INFO | train_inner | epoch 002: 614 / 3002 loss=2.789, ppl=6.91, wps=5895.8, ups=0.09, wpb=64784, bsz=128, num_updates=3590, lr=9.99793e-05, gnorm=2.393, loss_scale=2, train_wall=10, gb_free=2.8, wall=41099 2021-06-19 06:03:55 | INFO | train_inner | epoch 002: 615 / 3002 loss=2.624, ppl=6.17, wps=5986.6, ups=0.09, wpb=64832, bsz=128, num_updates=3591, lr=9.99793e-05, gnorm=2.403, loss_scale=2, train_wall=10, gb_free=2.8, wall=41110 2021-06-19 06:04:06 | INFO | train_inner | epoch 002: 616 / 3002 loss=2.725, ppl=6.61, wps=5848.3, ups=0.09, wpb=64847, bsz=128, num_updates=3592, lr=9.99793e-05, gnorm=2.37, loss_scale=2, train_wall=11, gb_free=2.8, wall=41121 2021-06-19 06:04:17 | INFO | train_inner | epoch 002: 617 / 3002 loss=2.699, ppl=6.49, wps=5815.6, ups=0.09, wpb=64804, bsz=128, num_updates=3593, lr=9.99793e-05, gnorm=2.303, loss_scale=2, train_wall=11, gb_free=2.8, wall=41132 2021-06-19 06:04:29 | INFO | train_inner | epoch 002: 618 / 3002 loss=2.537, ppl=5.8, wps=5844.6, ups=0.09, wpb=64907, bsz=128, num_updates=3594, lr=9.99792e-05, gnorm=2.303, loss_scale=2, train_wall=11, gb_free=2.8, wall=41143 2021-06-19 06:04:40 | INFO | train_inner | epoch 002: 619 / 3002 loss=2.979, ppl=7.88, wps=5811.1, ups=0.09, wpb=64708, bsz=128, num_updates=3595, lr=9.99792e-05, gnorm=2.23, loss_scale=2, train_wall=11, gb_free=2.8, wall=41154 2021-06-19 06:04:51 | INFO | train_inner | epoch 002: 620 / 3002 loss=2.998, ppl=7.99, wps=5791, ups=0.09, wpb=64737, bsz=128, num_updates=3596, lr=9.99792e-05, gnorm=2.705, loss_scale=2, train_wall=11, gb_free=2.8, wall=41165 2021-06-19 06:05:02 | INFO | train_inner | epoch 002: 621 / 3002 loss=2.824, ppl=7.08, wps=5898.4, ups=0.09, wpb=64810, bsz=128, num_updates=3597, lr=9.99792e-05, gnorm=2.304, loss_scale=2, train_wall=11, gb_free=2.8, wall=41176 2021-06-19 06:05:13 | INFO | train_inner | epoch 002: 622 / 3002 loss=2.761, ppl=6.78, wps=5928.5, ups=0.09, wpb=64799, bsz=128, num_updates=3598, lr=9.99792e-05, gnorm=2.249, loss_scale=2, train_wall=10, gb_free=2.8, wall=41187 2021-06-19 06:05:24 | INFO | train_inner | epoch 002: 623 / 3002 loss=2.77, ppl=6.82, wps=5875.4, ups=0.09, wpb=64823, bsz=128, num_updates=3599, lr=9.99792e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=41198 2021-06-19 06:05:35 | INFO | train_inner | epoch 002: 624 / 3002 loss=2.807, ppl=7, wps=5986.1, ups=0.09, wpb=64789, bsz=128, num_updates=3600, lr=9.99792e-05, gnorm=2.369, loss_scale=2, train_wall=10, gb_free=2.8, wall=41209 2021-06-19 06:05:46 | INFO | train_inner | epoch 002: 625 / 3002 loss=2.783, ppl=6.88, wps=5870.3, ups=0.09, wpb=64758, bsz=128, num_updates=3601, lr=9.99792e-05, gnorm=2.272, loss_scale=2, train_wall=11, gb_free=2.8, wall=41220 2021-06-19 06:05:57 | INFO | train_inner | epoch 002: 626 / 3002 loss=2.865, ppl=7.29, wps=5881.9, ups=0.09, wpb=64808, bsz=128, num_updates=3602, lr=9.99792e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=41231 2021-06-19 06:06:08 | INFO | train_inner | epoch 002: 627 / 3002 loss=2.834, ppl=7.13, wps=5797.8, ups=0.09, wpb=64791, bsz=128, num_updates=3603, lr=9.99792e-05, gnorm=2.259, loss_scale=2, train_wall=11, gb_free=2.8, wall=41242 2021-06-19 06:06:19 | INFO | train_inner | epoch 002: 628 / 3002 loss=2.906, ppl=7.49, wps=5804, ups=0.09, wpb=64850, bsz=128, num_updates=3604, lr=9.99792e-05, gnorm=2.385, loss_scale=2, train_wall=11, gb_free=2.8, wall=41253 2021-06-19 06:06:30 | INFO | train_inner | epoch 002: 629 / 3002 loss=2.847, ppl=7.2, wps=5742.2, ups=0.09, wpb=64763, bsz=128, num_updates=3605, lr=9.99792e-05, gnorm=2.234, loss_scale=2, train_wall=11, gb_free=2.8, wall=41265 2021-06-19 06:06:41 | INFO | train_inner | epoch 002: 630 / 3002 loss=2.817, ppl=7.05, wps=5866.4, ups=0.09, wpb=64836, bsz=128, num_updates=3606, lr=9.99792e-05, gnorm=2.434, loss_scale=2, train_wall=11, gb_free=2.8, wall=41276 2021-06-19 06:06:53 | INFO | train_inner | epoch 002: 631 / 3002 loss=2.706, ppl=6.52, wps=5809.9, ups=0.09, wpb=64910, bsz=128, num_updates=3607, lr=9.99791e-05, gnorm=2.437, loss_scale=2, train_wall=11, gb_free=2.8, wall=41287 2021-06-19 06:07:04 | INFO | train_inner | epoch 002: 632 / 3002 loss=2.927, ppl=7.6, wps=5863.8, ups=0.09, wpb=64760, bsz=128, num_updates=3608, lr=9.99791e-05, gnorm=2.664, loss_scale=2, train_wall=11, gb_free=2.8, wall=41298 2021-06-19 06:07:15 | INFO | train_inner | epoch 002: 633 / 3002 loss=2.828, ppl=7.1, wps=5907.2, ups=0.09, wpb=64824, bsz=128, num_updates=3609, lr=9.99791e-05, gnorm=2.323, loss_scale=2, train_wall=11, gb_free=2.8, wall=41309 2021-06-19 06:07:25 | INFO | train_inner | epoch 002: 634 / 3002 loss=2.787, ppl=6.9, wps=5907.2, ups=0.09, wpb=64779, bsz=128, num_updates=3610, lr=9.99791e-05, gnorm=2.383, loss_scale=2, train_wall=10, gb_free=2.8, wall=41320 2021-06-19 06:07:36 | INFO | train_inner | epoch 002: 635 / 3002 loss=2.737, ppl=6.67, wps=5971.6, ups=0.09, wpb=64845, bsz=128, num_updates=3611, lr=9.99791e-05, gnorm=2.205, loss_scale=2, train_wall=10, gb_free=2.8, wall=41331 2021-06-19 06:07:47 | INFO | train_inner | epoch 002: 636 / 3002 loss=2.832, ppl=7.12, wps=5861.7, ups=0.09, wpb=64842, bsz=128, num_updates=3612, lr=9.99791e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=41342 2021-06-19 06:07:58 | INFO | train_inner | epoch 002: 637 / 3002 loss=2.703, ppl=6.51, wps=5980.1, ups=0.09, wpb=64848, bsz=128, num_updates=3613, lr=9.99791e-05, gnorm=2.266, loss_scale=2, train_wall=10, gb_free=2.8, wall=41353 2021-06-19 06:08:09 | INFO | train_inner | epoch 002: 638 / 3002 loss=2.678, ppl=6.4, wps=5776.6, ups=0.09, wpb=64866, bsz=128, num_updates=3614, lr=9.99791e-05, gnorm=3.891, loss_scale=2, train_wall=11, gb_free=2.8, wall=41364 2021-06-19 06:08:21 | INFO | train_inner | epoch 002: 639 / 3002 loss=2.805, ppl=6.99, wps=5759.1, ups=0.09, wpb=64779, bsz=128, num_updates=3615, lr=9.99791e-05, gnorm=2.402, loss_scale=2, train_wall=11, gb_free=2.8, wall=41375 2021-06-19 06:08:32 | INFO | train_inner | epoch 002: 640 / 3002 loss=2.847, ppl=7.2, wps=5882.4, ups=0.09, wpb=64867, bsz=128, num_updates=3616, lr=9.99791e-05, gnorm=2.536, loss_scale=2, train_wall=11, gb_free=2.8, wall=41386 2021-06-19 06:08:43 | INFO | train_inner | epoch 002: 641 / 3002 loss=2.833, ppl=7.13, wps=5775.5, ups=0.09, wpb=64768, bsz=128, num_updates=3617, lr=9.99791e-05, gnorm=2.304, loss_scale=2, train_wall=11, gb_free=2.8, wall=41397 2021-06-19 06:08:54 | INFO | train_inner | epoch 002: 642 / 3002 loss=2.736, ppl=6.66, wps=5757.2, ups=0.09, wpb=64853, bsz=128, num_updates=3618, lr=9.99791e-05, gnorm=2.254, loss_scale=2, train_wall=11, gb_free=2.8, wall=41409 2021-06-19 06:09:05 | INFO | train_inner | epoch 002: 643 / 3002 loss=3.087, ppl=8.5, wps=5988.5, ups=0.09, wpb=64894, bsz=128, num_updates=3619, lr=9.9979e-05, gnorm=2.509, loss_scale=2, train_wall=10, gb_free=2.8, wall=41419 2021-06-19 06:09:16 | INFO | train_inner | epoch 002: 644 / 3002 loss=2.753, ppl=6.74, wps=5842.8, ups=0.09, wpb=64838, bsz=128, num_updates=3620, lr=9.9979e-05, gnorm=2.247, loss_scale=2, train_wall=11, gb_free=2.8, wall=41431 2021-06-19 06:09:27 | INFO | train_inner | epoch 002: 645 / 3002 loss=2.807, ppl=7, wps=5884.6, ups=0.09, wpb=64856, bsz=128, num_updates=3621, lr=9.9979e-05, gnorm=12.368, loss_scale=2, train_wall=11, gb_free=2.8, wall=41442 2021-06-19 06:09:38 | INFO | train_inner | epoch 002: 646 / 3002 loss=2.774, ppl=6.84, wps=5787.9, ups=0.09, wpb=64793, bsz=128, num_updates=3622, lr=9.9979e-05, gnorm=2.186, loss_scale=2, train_wall=11, gb_free=2.8, wall=41453 2021-06-19 06:09:49 | INFO | train_inner | epoch 002: 647 / 3002 loss=2.729, ppl=6.63, wps=5857.1, ups=0.09, wpb=64867, bsz=128, num_updates=3623, lr=9.9979e-05, gnorm=2.297, loss_scale=2, train_wall=11, gb_free=2.8, wall=41464 2021-06-19 06:10:00 | INFO | train_inner | epoch 002: 648 / 3002 loss=2.817, ppl=7.05, wps=5886.1, ups=0.09, wpb=64810, bsz=128, num_updates=3624, lr=9.9979e-05, gnorm=2.284, loss_scale=2, train_wall=11, gb_free=2.8, wall=41475 2021-06-19 06:10:12 | INFO | train_inner | epoch 002: 649 / 3002 loss=2.849, ppl=7.2, wps=5857.7, ups=0.09, wpb=64876, bsz=128, num_updates=3625, lr=9.9979e-05, gnorm=2.272, loss_scale=2, train_wall=11, gb_free=2.8, wall=41486 2021-06-19 06:10:23 | INFO | train_inner | epoch 002: 650 / 3002 loss=2.871, ppl=7.32, wps=5891.3, ups=0.09, wpb=64790, bsz=128, num_updates=3626, lr=9.9979e-05, gnorm=2.199, loss_scale=2, train_wall=11, gb_free=2.8, wall=41497 2021-06-19 06:10:34 | INFO | train_inner | epoch 002: 651 / 3002 loss=2.791, ppl=6.92, wps=5752.6, ups=0.09, wpb=64820, bsz=128, num_updates=3627, lr=9.9979e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=41508 2021-06-19 06:10:45 | INFO | train_inner | epoch 002: 652 / 3002 loss=2.73, ppl=6.64, wps=5874.6, ups=0.09, wpb=64907, bsz=128, num_updates=3628, lr=9.9979e-05, gnorm=2.229, loss_scale=2, train_wall=11, gb_free=2.8, wall=41519 2021-06-19 06:10:56 | INFO | train_inner | epoch 002: 653 / 3002 loss=2.714, ppl=6.56, wps=5875.4, ups=0.09, wpb=64805, bsz=128, num_updates=3629, lr=9.9979e-05, gnorm=2.365, loss_scale=2, train_wall=11, gb_free=2.8, wall=41530 2021-06-19 06:11:07 | INFO | train_inner | epoch 002: 654 / 3002 loss=2.804, ppl=6.98, wps=5886.5, ups=0.09, wpb=64846, bsz=128, num_updates=3630, lr=9.9979e-05, gnorm=2.191, loss_scale=2, train_wall=11, gb_free=2.8, wall=41541 2021-06-19 06:11:18 | INFO | train_inner | epoch 002: 655 / 3002 loss=2.669, ppl=6.36, wps=5829.9, ups=0.09, wpb=64905, bsz=128, num_updates=3631, lr=9.9979e-05, gnorm=2.436, loss_scale=2, train_wall=11, gb_free=2.8, wall=41552 2021-06-19 06:11:29 | INFO | train_inner | epoch 002: 656 / 3002 loss=2.849, ppl=7.2, wps=5731.9, ups=0.09, wpb=64815, bsz=128, num_updates=3632, lr=9.99789e-05, gnorm=2.378, loss_scale=2, train_wall=11, gb_free=2.8, wall=41564 2021-06-19 06:11:40 | INFO | train_inner | epoch 002: 657 / 3002 loss=2.785, ppl=6.89, wps=5902.5, ups=0.09, wpb=64839, bsz=128, num_updates=3633, lr=9.99789e-05, gnorm=2.391, loss_scale=2, train_wall=11, gb_free=2.8, wall=41575 2021-06-19 06:11:51 | INFO | train_inner | epoch 002: 658 / 3002 loss=2.858, ppl=7.25, wps=5833.1, ups=0.09, wpb=64834, bsz=128, num_updates=3634, lr=9.99789e-05, gnorm=2.243, loss_scale=2, train_wall=11, gb_free=2.8, wall=41586 2021-06-19 06:12:03 | INFO | train_inner | epoch 002: 659 / 3002 loss=2.859, ppl=7.25, wps=5773.9, ups=0.09, wpb=64816, bsz=128, num_updates=3635, lr=9.99789e-05, gnorm=2.291, loss_scale=2, train_wall=11, gb_free=2.8, wall=41597 2021-06-19 06:12:14 | INFO | train_inner | epoch 002: 660 / 3002 loss=2.809, ppl=7.01, wps=5845.1, ups=0.09, wpb=64850, bsz=128, num_updates=3636, lr=9.99789e-05, gnorm=2.361, loss_scale=2, train_wall=11, gb_free=2.8, wall=41608 2021-06-19 06:12:25 | INFO | train_inner | epoch 002: 661 / 3002 loss=2.737, ppl=6.67, wps=5896.4, ups=0.09, wpb=64895, bsz=128, num_updates=3637, lr=9.99789e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=41619 2021-06-19 06:12:36 | INFO | train_inner | epoch 002: 662 / 3002 loss=2.773, ppl=6.84, wps=5862.7, ups=0.09, wpb=64772, bsz=128, num_updates=3638, lr=9.99789e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=41630 2021-06-19 06:12:47 | INFO | train_inner | epoch 002: 663 / 3002 loss=2.779, ppl=6.86, wps=5769.3, ups=0.09, wpb=64806, bsz=128, num_updates=3639, lr=9.99789e-05, gnorm=2.344, loss_scale=2, train_wall=11, gb_free=2.8, wall=41641 2021-06-19 06:12:58 | INFO | train_inner | epoch 002: 664 / 3002 loss=2.851, ppl=7.22, wps=5887.2, ups=0.09, wpb=64835, bsz=128, num_updates=3640, lr=9.99789e-05, gnorm=2.329, loss_scale=2, train_wall=11, gb_free=2.8, wall=41652 2021-06-19 06:13:09 | INFO | train_inner | epoch 002: 665 / 3002 loss=2.943, ppl=7.69, wps=5781.9, ups=0.09, wpb=64826, bsz=128, num_updates=3641, lr=9.99789e-05, gnorm=2.347, loss_scale=2, train_wall=11, gb_free=2.8, wall=41664 2021-06-19 06:13:21 | INFO | train_inner | epoch 002: 666 / 3002 loss=2.777, ppl=6.86, wps=5757.6, ups=0.09, wpb=64867, bsz=128, num_updates=3642, lr=9.99789e-05, gnorm=2.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=41675 2021-06-19 06:13:32 | INFO | train_inner | epoch 002: 667 / 3002 loss=2.752, ppl=6.74, wps=5680.7, ups=0.09, wpb=64879, bsz=128, num_updates=3643, lr=9.99789e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=41686 2021-06-19 06:13:43 | INFO | train_inner | epoch 002: 668 / 3002 loss=2.785, ppl=6.89, wps=5780.8, ups=0.09, wpb=64860, bsz=128, num_updates=3644, lr=9.99788e-05, gnorm=2.226, loss_scale=2, train_wall=11, gb_free=2.8, wall=41698 2021-06-19 06:13:54 | INFO | train_inner | epoch 002: 669 / 3002 loss=2.673, ppl=6.38, wps=5817.3, ups=0.09, wpb=64839, bsz=128, num_updates=3645, lr=9.99788e-05, gnorm=2.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=41709 2021-06-19 06:14:05 | INFO | train_inner | epoch 002: 670 / 3002 loss=2.751, ppl=6.73, wps=5901.8, ups=0.09, wpb=64849, bsz=128, num_updates=3646, lr=9.99788e-05, gnorm=2.301, loss_scale=2, train_wall=11, gb_free=2.8, wall=41720 2021-06-19 06:14:16 | INFO | train_inner | epoch 002: 671 / 3002 loss=2.781, ppl=6.87, wps=5977, ups=0.09, wpb=64845, bsz=128, num_updates=3647, lr=9.99788e-05, gnorm=2.292, loss_scale=2, train_wall=10, gb_free=2.8, wall=41731 2021-06-19 06:14:27 | INFO | train_inner | epoch 002: 672 / 3002 loss=2.818, ppl=7.05, wps=5789.2, ups=0.09, wpb=64756, bsz=128, num_updates=3648, lr=9.99788e-05, gnorm=2.386, loss_scale=4, train_wall=11, gb_free=2.8, wall=41742 2021-06-19 06:14:39 | INFO | train_inner | epoch 002: 673 / 3002 loss=2.888, ppl=7.4, wps=5769.1, ups=0.09, wpb=64742, bsz=128, num_updates=3649, lr=9.99788e-05, gnorm=2.329, loss_scale=4, train_wall=11, gb_free=2.8, wall=41753 2021-06-19 06:14:50 | INFO | train_inner | epoch 002: 674 / 3002 loss=2.718, ppl=6.58, wps=5926.6, ups=0.09, wpb=64859, bsz=128, num_updates=3650, lr=9.99788e-05, gnorm=2.201, loss_scale=4, train_wall=10, gb_free=2.8, wall=41764 2021-06-19 06:15:01 | INFO | train_inner | epoch 002: 675 / 3002 loss=2.807, ppl=7, wps=5848, ups=0.09, wpb=64879, bsz=128, num_updates=3651, lr=9.99788e-05, gnorm=2.278, loss_scale=4, train_wall=11, gb_free=2.8, wall=41775 2021-06-19 06:15:12 | INFO | train_inner | epoch 002: 676 / 3002 loss=2.905, ppl=7.49, wps=5747.6, ups=0.09, wpb=64754, bsz=128, num_updates=3652, lr=9.99788e-05, gnorm=2.259, loss_scale=4, train_wall=11, gb_free=2.8, wall=41786 2021-06-19 06:15:23 | INFO | train_inner | epoch 002: 677 / 3002 loss=2.83, ppl=7.11, wps=5919, ups=0.09, wpb=64876, bsz=128, num_updates=3653, lr=9.99788e-05, gnorm=2.345, loss_scale=4, train_wall=11, gb_free=2.8, wall=41797 2021-06-19 06:15:34 | INFO | train_inner | epoch 002: 678 / 3002 loss=2.818, ppl=7.05, wps=5833, ups=0.09, wpb=64887, bsz=128, num_updates=3654, lr=9.99788e-05, gnorm=2.259, loss_scale=4, train_wall=11, gb_free=2.8, wall=41808 2021-06-19 06:15:45 | INFO | train_inner | epoch 002: 679 / 3002 loss=2.815, ppl=7.04, wps=5887.4, ups=0.09, wpb=64822, bsz=128, num_updates=3655, lr=9.99788e-05, gnorm=2.321, loss_scale=4, train_wall=11, gb_free=2.8, wall=41819 2021-06-19 06:15:56 | INFO | train_inner | epoch 002: 680 / 3002 loss=2.662, ppl=6.33, wps=5823.4, ups=0.09, wpb=64802, bsz=128, num_updates=3656, lr=9.99788e-05, gnorm=2.263, loss_scale=4, train_wall=11, gb_free=2.8, wall=41830 2021-06-19 06:16:07 | INFO | train_inner | epoch 002: 681 / 3002 loss=2.906, ppl=7.49, wps=5831.2, ups=0.09, wpb=64804, bsz=128, num_updates=3657, lr=9.99787e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=41842 2021-06-19 06:16:18 | INFO | train_inner | epoch 002: 682 / 3002 loss=2.652, ppl=6.29, wps=5986.5, ups=0.09, wpb=64812, bsz=128, num_updates=3658, lr=9.99787e-05, gnorm=2.253, loss_scale=4, train_wall=10, gb_free=2.8, wall=41852 2021-06-19 06:16:29 | INFO | train_inner | epoch 002: 683 / 3002 loss=2.8, ppl=6.96, wps=5777.3, ups=0.09, wpb=64857, bsz=128, num_updates=3659, lr=9.99787e-05, gnorm=2.309, loss_scale=4, train_wall=11, gb_free=2.8, wall=41864 2021-06-19 06:16:40 | INFO | train_inner | epoch 002: 684 / 3002 loss=2.768, ppl=6.81, wps=5851.4, ups=0.09, wpb=64841, bsz=128, num_updates=3660, lr=9.99787e-05, gnorm=2.307, loss_scale=4, train_wall=11, gb_free=2.8, wall=41875 2021-06-19 06:16:51 | INFO | train_inner | epoch 002: 685 / 3002 loss=2.886, ppl=7.39, wps=5894.1, ups=0.09, wpb=64940, bsz=128, num_updates=3661, lr=9.99787e-05, gnorm=6.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=41886 2021-06-19 06:17:03 | INFO | train_inner | epoch 002: 686 / 3002 loss=2.659, ppl=6.32, wps=5832, ups=0.09, wpb=64834, bsz=128, num_updates=3662, lr=9.99787e-05, gnorm=2.323, loss_scale=4, train_wall=11, gb_free=2.8, wall=41897 2021-06-19 06:17:14 | INFO | train_inner | epoch 002: 687 / 3002 loss=2.604, ppl=6.08, wps=5894.7, ups=0.09, wpb=64874, bsz=128, num_updates=3663, lr=9.99787e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=41908 2021-06-19 06:17:25 | INFO | train_inner | epoch 002: 688 / 3002 loss=2.795, ppl=6.94, wps=5890.5, ups=0.09, wpb=64880, bsz=128, num_updates=3664, lr=9.99787e-05, gnorm=2.283, loss_scale=4, train_wall=11, gb_free=2.8, wall=41919 2021-06-19 06:17:36 | INFO | train_inner | epoch 002: 689 / 3002 loss=2.803, ppl=6.98, wps=5818, ups=0.09, wpb=64825, bsz=128, num_updates=3665, lr=9.99787e-05, gnorm=2.25, loss_scale=4, train_wall=11, gb_free=2.8, wall=41930 2021-06-19 06:17:47 | INFO | train_inner | epoch 002: 690 / 3002 loss=2.746, ppl=6.71, wps=5815, ups=0.09, wpb=64848, bsz=128, num_updates=3666, lr=9.99787e-05, gnorm=2.302, loss_scale=4, train_wall=11, gb_free=2.8, wall=41941 2021-06-19 06:17:58 | INFO | train_inner | epoch 002: 691 / 3002 loss=2.796, ppl=6.95, wps=5723.5, ups=0.09, wpb=64880, bsz=128, num_updates=3667, lr=9.99787e-05, gnorm=2.34, loss_scale=4, train_wall=11, gb_free=2.8, wall=41952 2021-06-19 06:18:09 | INFO | train_inner | epoch 002: 692 / 3002 loss=2.816, ppl=7.04, wps=5830.7, ups=0.09, wpb=64805, bsz=128, num_updates=3668, lr=9.99787e-05, gnorm=2.351, loss_scale=4, train_wall=11, gb_free=2.8, wall=41964 2021-06-19 06:18:20 | INFO | train_inner | epoch 002: 693 / 3002 loss=3.05, ppl=8.28, wps=5806.4, ups=0.09, wpb=64788, bsz=128, num_updates=3669, lr=9.99786e-05, gnorm=2.294, loss_scale=4, train_wall=11, gb_free=2.8, wall=41975 2021-06-19 06:18:31 | INFO | train_inner | epoch 002: 694 / 3002 loss=2.802, ppl=6.98, wps=5883.8, ups=0.09, wpb=64767, bsz=128, num_updates=3670, lr=9.99786e-05, gnorm=2.27, loss_scale=4, train_wall=11, gb_free=2.8, wall=41986 2021-06-19 06:18:43 | INFO | train_inner | epoch 002: 695 / 3002 loss=2.786, ppl=6.9, wps=5853.4, ups=0.09, wpb=64864, bsz=128, num_updates=3671, lr=9.99786e-05, gnorm=2.289, loss_scale=4, train_wall=11, gb_free=2.8, wall=41997 2021-06-19 06:18:54 | INFO | train_inner | epoch 002: 696 / 3002 loss=2.813, ppl=7.03, wps=5857, ups=0.09, wpb=64785, bsz=128, num_updates=3672, lr=9.99786e-05, gnorm=2.206, loss_scale=4, train_wall=11, gb_free=2.8, wall=42008 2021-06-19 06:19:05 | INFO | train_inner | epoch 002: 697 / 3002 loss=2.851, ppl=7.21, wps=5900.4, ups=0.09, wpb=64698, bsz=128, num_updates=3673, lr=9.99786e-05, gnorm=2.239, loss_scale=4, train_wall=11, gb_free=2.8, wall=42019 2021-06-19 06:19:16 | INFO | train_inner | epoch 002: 698 / 3002 loss=3.025, ppl=8.14, wps=5714, ups=0.09, wpb=64763, bsz=128, num_updates=3674, lr=9.99786e-05, gnorm=2.318, loss_scale=4, train_wall=11, gb_free=2.8, wall=42030 2021-06-19 06:19:27 | INFO | train_inner | epoch 002: 699 / 3002 loss=2.775, ppl=6.84, wps=5862.1, ups=0.09, wpb=64831, bsz=128, num_updates=3675, lr=9.99786e-05, gnorm=2.349, loss_scale=4, train_wall=11, gb_free=2.8, wall=42041 2021-06-19 06:19:38 | INFO | train_inner | epoch 002: 700 / 3002 loss=2.706, ppl=6.52, wps=5978, ups=0.09, wpb=64923, bsz=128, num_updates=3676, lr=9.99786e-05, gnorm=2.428, loss_scale=4, train_wall=10, gb_free=2.8, wall=42052 2021-06-19 06:19:49 | INFO | train_inner | epoch 002: 701 / 3002 loss=2.804, ppl=6.98, wps=5785.3, ups=0.09, wpb=64721, bsz=128, num_updates=3677, lr=9.99786e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=42063 2021-06-19 06:20:00 | INFO | train_inner | epoch 002: 702 / 3002 loss=2.795, ppl=6.94, wps=5779.9, ups=0.09, wpb=64876, bsz=128, num_updates=3678, lr=9.99786e-05, gnorm=2.337, loss_scale=4, train_wall=11, gb_free=2.8, wall=42075 2021-06-19 06:20:11 | INFO | train_inner | epoch 002: 703 / 3002 loss=2.898, ppl=7.45, wps=5771.1, ups=0.09, wpb=64738, bsz=128, num_updates=3679, lr=9.99786e-05, gnorm=3.324, loss_scale=4, train_wall=11, gb_free=2.8, wall=42086 2021-06-19 06:20:22 | INFO | train_inner | epoch 002: 704 / 3002 loss=2.725, ppl=6.61, wps=5890.2, ups=0.09, wpb=64785, bsz=128, num_updates=3680, lr=9.99786e-05, gnorm=2.335, loss_scale=4, train_wall=11, gb_free=2.8, wall=42097 2021-06-19 06:20:33 | INFO | train_inner | epoch 002: 705 / 3002 loss=2.929, ppl=7.61, wps=6030.1, ups=0.09, wpb=64836, bsz=128, num_updates=3681, lr=9.99786e-05, gnorm=2.373, loss_scale=4, train_wall=10, gb_free=2.8, wall=42108 2021-06-19 06:20:44 | INFO | train_inner | epoch 002: 706 / 3002 loss=2.676, ppl=6.39, wps=5860.1, ups=0.09, wpb=64800, bsz=128, num_updates=3682, lr=9.99785e-05, gnorm=2.432, loss_scale=4, train_wall=11, gb_free=2.8, wall=42119 2021-06-19 06:20:55 | INFO | train_inner | epoch 002: 707 / 3002 loss=2.919, ppl=7.56, wps=5968, ups=0.09, wpb=64790, bsz=128, num_updates=3683, lr=9.99785e-05, gnorm=2.39, loss_scale=4, train_wall=10, gb_free=2.8, wall=42129 2021-06-19 06:21:06 | INFO | train_inner | epoch 002: 708 / 3002 loss=2.863, ppl=7.27, wps=5964.6, ups=0.09, wpb=64812, bsz=128, num_updates=3684, lr=9.99785e-05, gnorm=2.251, loss_scale=4, train_wall=10, gb_free=2.8, wall=42140 2021-06-19 06:21:17 | INFO | train_inner | epoch 002: 709 / 3002 loss=2.801, ppl=6.97, wps=5751.4, ups=0.09, wpb=64861, bsz=128, num_updates=3685, lr=9.99785e-05, gnorm=2.428, loss_scale=4, train_wall=11, gb_free=2.8, wall=42152 2021-06-19 06:21:28 | INFO | train_inner | epoch 002: 710 / 3002 loss=2.975, ppl=7.86, wps=5959.1, ups=0.09, wpb=64752, bsz=128, num_updates=3686, lr=9.99785e-05, gnorm=2.505, loss_scale=4, train_wall=10, gb_free=2.8, wall=42162 2021-06-19 06:21:39 | INFO | train_inner | epoch 002: 711 / 3002 loss=2.787, ppl=6.9, wps=5750.1, ups=0.09, wpb=64806, bsz=128, num_updates=3687, lr=9.99785e-05, gnorm=2.452, loss_scale=4, train_wall=11, gb_free=2.8, wall=42174 2021-06-19 06:21:50 | INFO | train_inner | epoch 002: 712 / 3002 loss=2.759, ppl=6.77, wps=5853.7, ups=0.09, wpb=64818, bsz=128, num_updates=3688, lr=9.99785e-05, gnorm=2.526, loss_scale=4, train_wall=11, gb_free=2.8, wall=42185 2021-06-19 06:22:01 | INFO | train_inner | epoch 002: 713 / 3002 loss=2.79, ppl=6.91, wps=5878.1, ups=0.09, wpb=64797, bsz=128, num_updates=3689, lr=9.99785e-05, gnorm=2.385, loss_scale=4, train_wall=11, gb_free=2.8, wall=42196 2021-06-19 06:22:13 | INFO | train_inner | epoch 002: 714 / 3002 loss=2.828, ppl=7.1, wps=5858.4, ups=0.09, wpb=64796, bsz=128, num_updates=3690, lr=9.99785e-05, gnorm=2.398, loss_scale=4, train_wall=11, gb_free=2.8, wall=42207 2021-06-19 06:22:24 | INFO | train_inner | epoch 002: 715 / 3002 loss=2.836, ppl=7.14, wps=5856.7, ups=0.09, wpb=64861, bsz=128, num_updates=3691, lr=9.99785e-05, gnorm=2.678, loss_scale=4, train_wall=11, gb_free=2.8, wall=42218 2021-06-19 06:22:35 | INFO | train_inner | epoch 002: 716 / 3002 loss=2.691, ppl=6.46, wps=5858.9, ups=0.09, wpb=64809, bsz=128, num_updates=3692, lr=9.99785e-05, gnorm=3.483, loss_scale=4, train_wall=11, gb_free=2.8, wall=42229 2021-06-19 06:22:46 | INFO | train_inner | epoch 002: 717 / 3002 loss=2.906, ppl=7.49, wps=5831.9, ups=0.09, wpb=64739, bsz=128, num_updates=3693, lr=9.99785e-05, gnorm=2.489, loss_scale=4, train_wall=11, gb_free=2.8, wall=42240 2021-06-19 06:22:57 | INFO | train_inner | epoch 002: 718 / 3002 loss=2.843, ppl=7.18, wps=5903.6, ups=0.09, wpb=64798, bsz=128, num_updates=3694, lr=9.99784e-05, gnorm=2.382, loss_scale=4, train_wall=11, gb_free=2.8, wall=42251 2021-06-19 06:23:08 | INFO | train_inner | epoch 002: 719 / 3002 loss=2.784, ppl=6.89, wps=5846.9, ups=0.09, wpb=64831, bsz=128, num_updates=3695, lr=9.99784e-05, gnorm=2.546, loss_scale=4, train_wall=11, gb_free=2.8, wall=42262 2021-06-19 06:23:19 | INFO | train_inner | epoch 002: 720 / 3002 loss=2.71, ppl=6.55, wps=5885.8, ups=0.09, wpb=64874, bsz=128, num_updates=3696, lr=9.99784e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=42273 2021-06-19 06:23:30 | INFO | train_inner | epoch 002: 721 / 3002 loss=2.809, ppl=7.01, wps=5893.1, ups=0.09, wpb=64799, bsz=128, num_updates=3697, lr=9.99784e-05, gnorm=3.687, loss_scale=4, train_wall=11, gb_free=2.8, wall=42284 2021-06-19 06:23:41 | INFO | train_inner | epoch 002: 722 / 3002 loss=2.738, ppl=6.67, wps=5937.8, ups=0.09, wpb=64854, bsz=128, num_updates=3698, lr=9.99784e-05, gnorm=2.335, loss_scale=4, train_wall=10, gb_free=2.8, wall=42295 2021-06-19 06:23:52 | INFO | train_inner | epoch 002: 723 / 3002 loss=2.779, ppl=6.86, wps=5792.3, ups=0.09, wpb=64844, bsz=128, num_updates=3699, lr=9.99784e-05, gnorm=2.337, loss_scale=4, train_wall=11, gb_free=2.8, wall=42306 2021-06-19 06:24:03 | INFO | train_inner | epoch 002: 724 / 3002 loss=2.834, ppl=7.13, wps=5863.4, ups=0.09, wpb=64869, bsz=128, num_updates=3700, lr=9.99784e-05, gnorm=2.329, loss_scale=4, train_wall=11, gb_free=2.8, wall=42317 2021-06-19 06:24:14 | INFO | train_inner | epoch 002: 725 / 3002 loss=2.676, ppl=6.39, wps=5871.5, ups=0.09, wpb=64837, bsz=128, num_updates=3701, lr=9.99784e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=42328 2021-06-19 06:24:25 | INFO | train_inner | epoch 002: 726 / 3002 loss=2.833, ppl=7.12, wps=5861.1, ups=0.09, wpb=64900, bsz=128, num_updates=3702, lr=9.99784e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=42339 2021-06-19 06:24:36 | INFO | train_inner | epoch 002: 727 / 3002 loss=2.787, ppl=6.9, wps=5874.2, ups=0.09, wpb=64953, bsz=128, num_updates=3703, lr=9.99784e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=42351 2021-06-19 06:24:47 | INFO | train_inner | epoch 002: 728 / 3002 loss=2.814, ppl=7.03, wps=6097.2, ups=0.09, wpb=64826, bsz=128, num_updates=3704, lr=9.99784e-05, gnorm=2.267, loss_scale=4, train_wall=10, gb_free=2.8, wall=42361 2021-06-19 06:24:58 | INFO | train_inner | epoch 002: 729 / 3002 loss=2.806, ppl=6.99, wps=5933.9, ups=0.09, wpb=64839, bsz=128, num_updates=3705, lr=9.99784e-05, gnorm=2.318, loss_scale=4, train_wall=10, gb_free=2.8, wall=42372 2021-06-19 06:25:09 | INFO | train_inner | epoch 002: 730 / 3002 loss=2.81, ppl=7.01, wps=5908, ups=0.09, wpb=64947, bsz=128, num_updates=3706, lr=9.99784e-05, gnorm=2.322, loss_scale=4, train_wall=11, gb_free=2.8, wall=42383 2021-06-19 06:25:20 | INFO | train_inner | epoch 002: 731 / 3002 loss=2.877, ppl=7.35, wps=5840.5, ups=0.09, wpb=64886, bsz=128, num_updates=3707, lr=9.99783e-05, gnorm=2.287, loss_scale=4, train_wall=11, gb_free=2.8, wall=42394 2021-06-19 06:25:31 | INFO | train_inner | epoch 002: 732 / 3002 loss=2.702, ppl=6.51, wps=5857.2, ups=0.09, wpb=64812, bsz=128, num_updates=3708, lr=9.99783e-05, gnorm=2.189, loss_scale=4, train_wall=11, gb_free=2.8, wall=42405 2021-06-19 06:25:42 | INFO | train_inner | epoch 002: 733 / 3002 loss=2.812, ppl=7.02, wps=5872.9, ups=0.09, wpb=64824, bsz=128, num_updates=3709, lr=9.99783e-05, gnorm=2.312, loss_scale=4, train_wall=11, gb_free=2.8, wall=42416 2021-06-19 06:25:53 | INFO | train_inner | epoch 002: 734 / 3002 loss=2.861, ppl=7.27, wps=5881, ups=0.09, wpb=64848, bsz=128, num_updates=3710, lr=9.99783e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=42427 2021-06-19 06:26:04 | INFO | train_inner | epoch 002: 735 / 3002 loss=2.871, ppl=7.32, wps=5793.7, ups=0.09, wpb=64844, bsz=128, num_updates=3711, lr=9.99783e-05, gnorm=2.346, loss_scale=4, train_wall=11, gb_free=2.8, wall=42439 2021-06-19 06:26:15 | INFO | train_inner | epoch 002: 736 / 3002 loss=2.871, ppl=7.31, wps=5821.2, ups=0.09, wpb=64783, bsz=128, num_updates=3712, lr=9.99783e-05, gnorm=2.98, loss_scale=4, train_wall=11, gb_free=2.8, wall=42450 2021-06-19 06:26:26 | INFO | train_inner | epoch 002: 737 / 3002 loss=2.997, ppl=7.98, wps=5890.3, ups=0.09, wpb=64796, bsz=128, num_updates=3713, lr=9.99783e-05, gnorm=2.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=42461 2021-06-19 06:26:37 | INFO | train_inner | epoch 002: 738 / 3002 loss=2.925, ppl=7.6, wps=5932.8, ups=0.09, wpb=64919, bsz=128, num_updates=3714, lr=9.99783e-05, gnorm=2.286, loss_scale=4, train_wall=10, gb_free=2.8, wall=42472 2021-06-19 06:26:48 | INFO | train_inner | epoch 002: 739 / 3002 loss=2.598, ppl=6.05, wps=5798.2, ups=0.09, wpb=64807, bsz=128, num_updates=3715, lr=9.99783e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=42483 2021-06-19 06:26:59 | INFO | train_inner | epoch 002: 740 / 3002 loss=2.596, ppl=6.04, wps=5872.3, ups=0.09, wpb=64876, bsz=128, num_updates=3716, lr=9.99783e-05, gnorm=2.33, loss_scale=4, train_wall=11, gb_free=2.8, wall=42494 2021-06-19 06:27:11 | INFO | train_inner | epoch 002: 741 / 3002 loss=2.909, ppl=7.51, wps=5821, ups=0.09, wpb=64828, bsz=128, num_updates=3717, lr=9.99783e-05, gnorm=2.326, loss_scale=4, train_wall=11, gb_free=2.8, wall=42505 2021-06-19 06:27:22 | INFO | train_inner | epoch 002: 742 / 3002 loss=2.598, ppl=6.05, wps=5928.3, ups=0.09, wpb=64893, bsz=128, num_updates=3718, lr=9.99783e-05, gnorm=2.285, loss_scale=4, train_wall=10, gb_free=2.8, wall=42516 2021-06-19 06:27:33 | INFO | train_inner | epoch 002: 743 / 3002 loss=2.699, ppl=6.49, wps=5831.3, ups=0.09, wpb=64872, bsz=128, num_updates=3719, lr=9.99782e-05, gnorm=2.274, loss_scale=4, train_wall=11, gb_free=2.8, wall=42527 2021-06-19 06:27:44 | INFO | train_inner | epoch 002: 744 / 3002 loss=2.802, ppl=6.98, wps=5883, ups=0.09, wpb=64878, bsz=128, num_updates=3720, lr=9.99782e-05, gnorm=2.326, loss_scale=4, train_wall=11, gb_free=2.8, wall=42538 2021-06-19 06:27:55 | INFO | train_inner | epoch 002: 745 / 3002 loss=2.618, ppl=6.14, wps=5883, ups=0.09, wpb=64909, bsz=128, num_updates=3721, lr=9.99782e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=42549 2021-06-19 06:28:06 | INFO | train_inner | epoch 002: 746 / 3002 loss=2.697, ppl=6.48, wps=5780.8, ups=0.09, wpb=64765, bsz=128, num_updates=3722, lr=9.99782e-05, gnorm=2.308, loss_scale=4, train_wall=11, gb_free=2.8, wall=42560 2021-06-19 06:28:17 | INFO | train_inner | epoch 002: 747 / 3002 loss=2.878, ppl=7.35, wps=5799.1, ups=0.09, wpb=64783, bsz=128, num_updates=3723, lr=9.99782e-05, gnorm=2.28, loss_scale=4, train_wall=11, gb_free=2.8, wall=42571 2021-06-19 06:28:28 | INFO | train_inner | epoch 002: 748 / 3002 loss=2.848, ppl=7.2, wps=5847.7, ups=0.09, wpb=64837, bsz=128, num_updates=3724, lr=9.99782e-05, gnorm=2.403, loss_scale=4, train_wall=11, gb_free=2.8, wall=42583 2021-06-19 06:28:39 | INFO | train_inner | epoch 002: 749 / 3002 loss=3.065, ppl=8.37, wps=5905.6, ups=0.09, wpb=64831, bsz=128, num_updates=3725, lr=9.99782e-05, gnorm=2.37, loss_scale=4, train_wall=11, gb_free=2.8, wall=42594 2021-06-19 06:28:50 | INFO | train_inner | epoch 002: 750 / 3002 loss=2.716, ppl=6.57, wps=5820.3, ups=0.09, wpb=64788, bsz=128, num_updates=3726, lr=9.99782e-05, gnorm=2.305, loss_scale=4, train_wall=11, gb_free=2.8, wall=42605 2021-06-19 06:29:01 | INFO | train_inner | epoch 002: 751 / 3002 loss=2.935, ppl=7.65, wps=5884.9, ups=0.09, wpb=64884, bsz=128, num_updates=3727, lr=9.99782e-05, gnorm=2.275, loss_scale=4, train_wall=11, gb_free=2.8, wall=42616 2021-06-19 06:29:12 | INFO | train_inner | epoch 002: 752 / 3002 loss=2.842, ppl=7.17, wps=5829.7, ups=0.09, wpb=64804, bsz=128, num_updates=3728, lr=9.99782e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=42627 2021-06-19 06:29:24 | INFO | train_inner | epoch 002: 753 / 3002 loss=2.858, ppl=7.25, wps=5845.9, ups=0.09, wpb=64868, bsz=128, num_updates=3729, lr=9.99782e-05, gnorm=2.233, loss_scale=4, train_wall=11, gb_free=2.8, wall=42638 2021-06-19 06:29:34 | INFO | train_inner | epoch 002: 754 / 3002 loss=2.896, ppl=7.45, wps=5982.9, ups=0.09, wpb=64828, bsz=128, num_updates=3730, lr=9.99782e-05, gnorm=2.393, loss_scale=4, train_wall=10, gb_free=2.8, wall=42649 2021-06-19 06:29:46 | INFO | train_inner | epoch 002: 755 / 3002 loss=2.811, ppl=7.02, wps=5834.3, ups=0.09, wpb=64794, bsz=128, num_updates=3731, lr=9.99782e-05, gnorm=2.182, loss_scale=4, train_wall=11, gb_free=2.8, wall=42660 2021-06-19 06:29:57 | INFO | train_inner | epoch 002: 756 / 3002 loss=2.807, ppl=7, wps=5893.3, ups=0.09, wpb=64812, bsz=128, num_updates=3732, lr=9.99781e-05, gnorm=2.259, loss_scale=4, train_wall=11, gb_free=2.8, wall=42671 2021-06-19 06:30:07 | INFO | train_inner | epoch 002: 757 / 3002 loss=2.875, ppl=7.34, wps=5908.4, ups=0.09, wpb=64852, bsz=128, num_updates=3733, lr=9.99781e-05, gnorm=2.423, loss_scale=4, train_wall=11, gb_free=2.8, wall=42682 2021-06-19 06:30:19 | INFO | train_inner | epoch 002: 758 / 3002 loss=2.885, ppl=7.39, wps=5805.8, ups=0.09, wpb=64787, bsz=128, num_updates=3734, lr=9.99781e-05, gnorm=2.212, loss_scale=4, train_wall=11, gb_free=2.8, wall=42693 2021-06-19 06:30:30 | INFO | train_inner | epoch 002: 759 / 3002 loss=2.811, ppl=7.02, wps=5848.2, ups=0.09, wpb=64763, bsz=128, num_updates=3735, lr=9.99781e-05, gnorm=2.25, loss_scale=4, train_wall=11, gb_free=2.8, wall=42704 2021-06-19 06:30:41 | INFO | train_inner | epoch 002: 760 / 3002 loss=2.677, ppl=6.4, wps=5852.5, ups=0.09, wpb=64874, bsz=128, num_updates=3736, lr=9.99781e-05, gnorm=2.166, loss_scale=4, train_wall=11, gb_free=2.8, wall=42715 2021-06-19 06:30:52 | INFO | train_inner | epoch 002: 761 / 3002 loss=2.819, ppl=7.06, wps=5713.1, ups=0.09, wpb=64794, bsz=128, num_updates=3737, lr=9.99781e-05, gnorm=2.313, loss_scale=4, train_wall=11, gb_free=2.8, wall=42726 2021-06-19 06:31:03 | INFO | train_inner | epoch 002: 762 / 3002 loss=2.826, ppl=7.09, wps=5819.7, ups=0.09, wpb=64881, bsz=128, num_updates=3738, lr=9.99781e-05, gnorm=2.27, loss_scale=4, train_wall=11, gb_free=2.8, wall=42738 2021-06-19 06:31:14 | INFO | train_inner | epoch 002: 763 / 3002 loss=2.707, ppl=6.53, wps=5850.5, ups=0.09, wpb=64820, bsz=128, num_updates=3739, lr=9.99781e-05, gnorm=2.229, loss_scale=4, train_wall=11, gb_free=2.8, wall=42749 2021-06-19 06:31:25 | INFO | train_inner | epoch 002: 764 / 3002 loss=2.748, ppl=6.72, wps=5892.6, ups=0.09, wpb=64832, bsz=128, num_updates=3740, lr=9.99781e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=42760 2021-06-19 06:31:36 | INFO | train_inner | epoch 002: 765 / 3002 loss=2.854, ppl=7.23, wps=5934.7, ups=0.09, wpb=64801, bsz=128, num_updates=3741, lr=9.99781e-05, gnorm=2.419, loss_scale=4, train_wall=10, gb_free=2.8, wall=42771 2021-06-19 06:31:47 | INFO | train_inner | epoch 002: 766 / 3002 loss=2.773, ppl=6.83, wps=5820.6, ups=0.09, wpb=64828, bsz=128, num_updates=3742, lr=9.99781e-05, gnorm=2.484, loss_scale=4, train_wall=11, gb_free=2.8, wall=42782 2021-06-19 06:31:58 | INFO | train_inner | epoch 002: 767 / 3002 loss=2.812, ppl=7.02, wps=5885.5, ups=0.09, wpb=64821, bsz=128, num_updates=3743, lr=9.99781e-05, gnorm=2.309, loss_scale=4, train_wall=11, gb_free=2.8, wall=42793 2021-06-19 06:32:10 | INFO | train_inner | epoch 002: 768 / 3002 loss=2.789, ppl=6.91, wps=5868.5, ups=0.09, wpb=64919, bsz=128, num_updates=3744, lr=9.9978e-05, gnorm=2.439, loss_scale=4, train_wall=11, gb_free=2.8, wall=42804 2021-06-19 06:32:21 | INFO | train_inner | epoch 002: 769 / 3002 loss=2.709, ppl=6.54, wps=5842.3, ups=0.09, wpb=64844, bsz=128, num_updates=3745, lr=9.9978e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=42815 2021-06-19 06:32:32 | INFO | train_inner | epoch 002: 770 / 3002 loss=2.777, ppl=6.85, wps=5820.9, ups=0.09, wpb=64814, bsz=128, num_updates=3746, lr=9.9978e-05, gnorm=2.18, loss_scale=4, train_wall=11, gb_free=2.8, wall=42826 2021-06-19 06:32:43 | INFO | train_inner | epoch 002: 771 / 3002 loss=2.783, ppl=6.88, wps=5933.6, ups=0.09, wpb=64860, bsz=128, num_updates=3747, lr=9.9978e-05, gnorm=2.375, loss_scale=4, train_wall=10, gb_free=2.8, wall=42837 2021-06-19 06:32:54 | INFO | train_inner | epoch 002: 772 / 3002 loss=2.681, ppl=6.41, wps=5839.1, ups=0.09, wpb=64885, bsz=128, num_updates=3748, lr=9.9978e-05, gnorm=2.269, loss_scale=4, train_wall=11, gb_free=2.8, wall=42848 2021-06-19 06:33:05 | INFO | train_inner | epoch 002: 773 / 3002 loss=2.804, ppl=6.98, wps=5782.9, ups=0.09, wpb=64838, bsz=128, num_updates=3749, lr=9.9978e-05, gnorm=3.044, loss_scale=4, train_wall=11, gb_free=2.8, wall=42859 2021-06-19 06:33:16 | INFO | train_inner | epoch 002: 774 / 3002 loss=2.746, ppl=6.71, wps=5904.8, ups=0.09, wpb=64778, bsz=128, num_updates=3750, lr=9.9978e-05, gnorm=2.427, loss_scale=4, train_wall=11, gb_free=2.8, wall=42870 2021-06-19 06:33:27 | INFO | train_inner | epoch 002: 775 / 3002 loss=2.748, ppl=6.72, wps=5774.2, ups=0.09, wpb=64880, bsz=128, num_updates=3751, lr=9.9978e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=42882 2021-06-19 06:33:38 | INFO | train_inner | epoch 002: 776 / 3002 loss=2.88, ppl=7.36, wps=5923.4, ups=0.09, wpb=64746, bsz=128, num_updates=3752, lr=9.9978e-05, gnorm=2.232, loss_scale=4, train_wall=10, gb_free=2.8, wall=42892 2021-06-19 06:33:49 | INFO | train_inner | epoch 002: 777 / 3002 loss=2.923, ppl=7.58, wps=5851.3, ups=0.09, wpb=64809, bsz=128, num_updates=3753, lr=9.9978e-05, gnorm=2.246, loss_scale=4, train_wall=11, gb_free=2.8, wall=42904 2021-06-19 06:34:00 | INFO | train_inner | epoch 002: 778 / 3002 loss=2.807, ppl=7, wps=5790.4, ups=0.09, wpb=64806, bsz=128, num_updates=3754, lr=9.9978e-05, gnorm=2.341, loss_scale=4, train_wall=11, gb_free=2.8, wall=42915 2021-06-19 06:34:12 | INFO | train_inner | epoch 002: 779 / 3002 loss=2.95, ppl=7.73, wps=5698.3, ups=0.09, wpb=64749, bsz=128, num_updates=3755, lr=9.9978e-05, gnorm=2.337, loss_scale=4, train_wall=11, gb_free=2.8, wall=42926 2021-06-19 06:34:23 | INFO | train_inner | epoch 002: 780 / 3002 loss=2.557, ppl=5.89, wps=5756.2, ups=0.09, wpb=64846, bsz=128, num_updates=3756, lr=9.9978e-05, gnorm=2.159, loss_scale=4, train_wall=11, gb_free=2.8, wall=42937 2021-06-19 06:34:34 | INFO | train_inner | epoch 002: 781 / 3002 loss=2.758, ppl=6.76, wps=5782.7, ups=0.09, wpb=64871, bsz=128, num_updates=3757, lr=9.99779e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=42949 2021-06-19 06:34:46 | INFO | train_inner | epoch 002: 782 / 3002 loss=2.939, ppl=7.67, wps=5709, ups=0.09, wpb=64760, bsz=128, num_updates=3758, lr=9.99779e-05, gnorm=2.359, loss_scale=4, train_wall=11, gb_free=2.8, wall=42960 2021-06-19 06:34:57 | INFO | train_inner | epoch 002: 783 / 3002 loss=2.754, ppl=6.74, wps=5890.5, ups=0.09, wpb=64869, bsz=128, num_updates=3759, lr=9.99779e-05, gnorm=3.447, loss_scale=4, train_wall=11, gb_free=2.8, wall=42971 2021-06-19 06:35:08 | INFO | train_inner | epoch 002: 784 / 3002 loss=2.831, ppl=7.12, wps=5927.4, ups=0.09, wpb=64848, bsz=128, num_updates=3760, lr=9.99779e-05, gnorm=2.26, loss_scale=4, train_wall=10, gb_free=2.8, wall=42982 2021-06-19 06:35:19 | INFO | train_inner | epoch 002: 785 / 3002 loss=2.715, ppl=6.56, wps=5877.2, ups=0.09, wpb=64795, bsz=128, num_updates=3761, lr=9.99779e-05, gnorm=2.409, loss_scale=4, train_wall=11, gb_free=2.8, wall=42993 2021-06-19 06:35:30 | INFO | train_inner | epoch 002: 786 / 3002 loss=2.978, ppl=7.88, wps=5808.5, ups=0.09, wpb=64814, bsz=128, num_updates=3762, lr=9.99779e-05, gnorm=2.733, loss_scale=4, train_wall=11, gb_free=2.8, wall=43004 2021-06-19 06:35:41 | INFO | train_inner | epoch 002: 787 / 3002 loss=2.777, ppl=6.86, wps=5878.9, ups=0.09, wpb=64865, bsz=128, num_updates=3763, lr=9.99779e-05, gnorm=2.32, loss_scale=4, train_wall=11, gb_free=2.8, wall=43015 2021-06-19 06:35:52 | INFO | train_inner | epoch 002: 788 / 3002 loss=2.861, ppl=7.26, wps=5808, ups=0.09, wpb=64692, bsz=128, num_updates=3764, lr=9.99779e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=43026 2021-06-19 06:36:03 | INFO | train_inner | epoch 002: 789 / 3002 loss=2.839, ppl=7.16, wps=5727.3, ups=0.09, wpb=64838, bsz=128, num_updates=3765, lr=9.99779e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=43038 2021-06-19 06:36:14 | INFO | train_inner | epoch 002: 790 / 3002 loss=2.949, ppl=7.72, wps=5950.8, ups=0.09, wpb=64827, bsz=128, num_updates=3766, lr=9.99779e-05, gnorm=2.346, loss_scale=4, train_wall=10, gb_free=2.8, wall=43048 2021-06-19 06:36:25 | INFO | train_inner | epoch 002: 791 / 3002 loss=2.79, ppl=6.92, wps=5807.6, ups=0.09, wpb=64838, bsz=128, num_updates=3767, lr=9.99779e-05, gnorm=2.344, loss_scale=4, train_wall=11, gb_free=2.8, wall=43060 2021-06-19 06:36:36 | INFO | train_inner | epoch 002: 792 / 3002 loss=2.742, ppl=6.69, wps=5908.2, ups=0.09, wpb=64874, bsz=128, num_updates=3768, lr=9.99779e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=43071 2021-06-19 06:36:47 | INFO | train_inner | epoch 002: 793 / 3002 loss=2.847, ppl=7.19, wps=5954.8, ups=0.09, wpb=64775, bsz=128, num_updates=3769, lr=9.99778e-05, gnorm=2.249, loss_scale=4, train_wall=10, gb_free=2.8, wall=43081 2021-06-19 06:36:58 | INFO | train_inner | epoch 002: 794 / 3002 loss=2.685, ppl=6.43, wps=5873.5, ups=0.09, wpb=64859, bsz=128, num_updates=3770, lr=9.99778e-05, gnorm=2.527, loss_scale=4, train_wall=11, gb_free=2.8, wall=43093 2021-06-19 06:37:09 | INFO | train_inner | epoch 002: 795 / 3002 loss=2.846, ppl=7.19, wps=5844.9, ups=0.09, wpb=64897, bsz=128, num_updates=3771, lr=9.99778e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=43104 2021-06-19 06:37:20 | INFO | train_inner | epoch 002: 796 / 3002 loss=2.799, ppl=6.96, wps=5847.4, ups=0.09, wpb=64813, bsz=128, num_updates=3772, lr=9.99778e-05, gnorm=2.324, loss_scale=4, train_wall=11, gb_free=2.8, wall=43115 2021-06-19 06:37:31 | INFO | train_inner | epoch 002: 797 / 3002 loss=2.816, ppl=7.04, wps=5978.7, ups=0.09, wpb=64852, bsz=128, num_updates=3773, lr=9.99778e-05, gnorm=2.241, loss_scale=4, train_wall=10, gb_free=2.8, wall=43126 2021-06-19 06:37:42 | INFO | train_inner | epoch 002: 798 / 3002 loss=2.807, ppl=7, wps=5849.4, ups=0.09, wpb=64847, bsz=128, num_updates=3774, lr=9.99778e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=43137 2021-06-19 06:37:54 | INFO | train_inner | epoch 002: 799 / 3002 loss=2.832, ppl=7.12, wps=5775.3, ups=0.09, wpb=64765, bsz=128, num_updates=3775, lr=9.99778e-05, gnorm=2.294, loss_scale=4, train_wall=11, gb_free=2.8, wall=43148 2021-06-19 06:38:05 | INFO | train_inner | epoch 002: 800 / 3002 loss=2.768, ppl=6.81, wps=5766.4, ups=0.09, wpb=64815, bsz=128, num_updates=3776, lr=9.99778e-05, gnorm=2.182, loss_scale=8, train_wall=11, gb_free=2.8, wall=43159 2021-06-19 06:38:16 | INFO | train_inner | epoch 002: 801 / 3002 loss=2.663, ppl=6.33, wps=5880.5, ups=0.09, wpb=64846, bsz=128, num_updates=3777, lr=9.99778e-05, gnorm=2.511, loss_scale=8, train_wall=11, gb_free=2.8, wall=43170 2021-06-19 06:38:27 | INFO | train_inner | epoch 002: 802 / 3002 loss=2.859, ppl=7.26, wps=5895.3, ups=0.09, wpb=64841, bsz=128, num_updates=3778, lr=9.99778e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=43181 2021-06-19 06:38:38 | INFO | train_inner | epoch 002: 803 / 3002 loss=2.828, ppl=7.1, wps=5778.4, ups=0.09, wpb=64882, bsz=128, num_updates=3779, lr=9.99778e-05, gnorm=2.211, loss_scale=8, train_wall=11, gb_free=2.8, wall=43192 2021-06-19 06:38:49 | INFO | train_inner | epoch 002: 804 / 3002 loss=2.742, ppl=6.69, wps=5946.9, ups=0.09, wpb=64758, bsz=128, num_updates=3780, lr=9.99778e-05, gnorm=3.387, loss_scale=8, train_wall=10, gb_free=2.8, wall=43203 2021-06-19 06:39:00 | INFO | train_inner | epoch 002: 805 / 3002 loss=2.849, ppl=7.2, wps=5962.8, ups=0.09, wpb=64832, bsz=128, num_updates=3781, lr=9.99778e-05, gnorm=2.17, loss_scale=8, train_wall=10, gb_free=2.8, wall=43214 2021-06-19 06:39:11 | INFO | train_inner | epoch 002: 806 / 3002 loss=2.808, ppl=7.01, wps=5742.2, ups=0.09, wpb=64827, bsz=128, num_updates=3782, lr=9.99777e-05, gnorm=2.224, loss_scale=8, train_wall=11, gb_free=2.8, wall=43225 2021-06-19 06:39:22 | INFO | train_inner | epoch 002: 807 / 3002 loss=2.811, ppl=7.02, wps=5888.3, ups=0.09, wpb=64880, bsz=128, num_updates=3783, lr=9.99777e-05, gnorm=2.177, loss_scale=8, train_wall=11, gb_free=2.8, wall=43236 2021-06-19 06:39:33 | INFO | train_inner | epoch 002: 808 / 3002 loss=2.642, ppl=6.24, wps=5845.9, ups=0.09, wpb=64772, bsz=128, num_updates=3784, lr=9.99777e-05, gnorm=2.344, loss_scale=8, train_wall=11, gb_free=2.8, wall=43248 2021-06-19 06:39:44 | INFO | train_inner | epoch 002: 809 / 3002 loss=2.77, ppl=6.82, wps=5805.4, ups=0.09, wpb=64811, bsz=128, num_updates=3785, lr=9.99777e-05, gnorm=2.328, loss_scale=8, train_wall=11, gb_free=2.8, wall=43259 2021-06-19 06:39:55 | INFO | train_inner | epoch 002: 810 / 3002 loss=2.678, ppl=6.4, wps=5899.8, ups=0.09, wpb=64864, bsz=128, num_updates=3786, lr=9.99777e-05, gnorm=2.348, loss_scale=8, train_wall=11, gb_free=2.8, wall=43270 2021-06-19 06:40:07 | INFO | train_inner | epoch 002: 811 / 3002 loss=2.821, ppl=7.07, wps=5788.7, ups=0.09, wpb=64829, bsz=128, num_updates=3787, lr=9.99777e-05, gnorm=2.377, loss_scale=8, train_wall=11, gb_free=2.8, wall=43281 2021-06-19 06:40:18 | INFO | train_inner | epoch 002: 812 / 3002 loss=2.641, ppl=6.24, wps=5804.8, ups=0.09, wpb=64795, bsz=128, num_updates=3788, lr=9.99777e-05, gnorm=2.218, loss_scale=8, train_wall=11, gb_free=2.8, wall=43292 2021-06-19 06:40:29 | INFO | train_inner | epoch 002: 813 / 3002 loss=2.777, ppl=6.85, wps=5869.9, ups=0.09, wpb=64881, bsz=128, num_updates=3789, lr=9.99777e-05, gnorm=2.197, loss_scale=8, train_wall=11, gb_free=2.8, wall=43303 2021-06-19 06:40:40 | INFO | train_inner | epoch 002: 814 / 3002 loss=2.815, ppl=7.04, wps=5900.5, ups=0.09, wpb=64792, bsz=128, num_updates=3790, lr=9.99777e-05, gnorm=2.564, loss_scale=8, train_wall=11, gb_free=2.8, wall=43314 2021-06-19 06:40:51 | INFO | train_inner | epoch 002: 815 / 3002 loss=2.777, ppl=6.86, wps=5777.4, ups=0.09, wpb=64800, bsz=128, num_updates=3791, lr=9.99777e-05, gnorm=2.384, loss_scale=8, train_wall=11, gb_free=2.8, wall=43325 2021-06-19 06:41:02 | INFO | train_inner | epoch 002: 816 / 3002 loss=2.822, ppl=7.07, wps=5832.8, ups=0.09, wpb=64877, bsz=128, num_updates=3792, lr=9.99777e-05, gnorm=2.261, loss_scale=8, train_wall=11, gb_free=2.8, wall=43336 2021-06-19 06:41:13 | INFO | train_inner | epoch 002: 817 / 3002 loss=2.809, ppl=7.01, wps=5863.7, ups=0.09, wpb=64793, bsz=128, num_updates=3793, lr=9.99777e-05, gnorm=2.252, loss_scale=8, train_wall=11, gb_free=2.8, wall=43347 2021-06-19 06:41:24 | INFO | train_inner | epoch 002: 818 / 3002 loss=2.815, ppl=7.04, wps=5892.5, ups=0.09, wpb=64915, bsz=128, num_updates=3794, lr=9.99776e-05, gnorm=2.351, loss_scale=8, train_wall=11, gb_free=2.8, wall=43358 2021-06-19 06:41:35 | INFO | train_inner | epoch 002: 819 / 3002 loss=2.884, ppl=7.38, wps=5874, ups=0.09, wpb=64824, bsz=128, num_updates=3795, lr=9.99776e-05, gnorm=2.503, loss_scale=8, train_wall=11, gb_free=2.8, wall=43370 2021-06-19 06:41:47 | INFO | train_inner | epoch 002: 820 / 3002 loss=2.71, ppl=6.54, wps=5707, ups=0.09, wpb=64923, bsz=128, num_updates=3796, lr=9.99776e-05, gnorm=2.345, loss_scale=8, train_wall=11, gb_free=2.8, wall=43381 2021-06-19 06:41:58 | INFO | train_inner | epoch 002: 821 / 3002 loss=2.679, ppl=6.41, wps=5837.9, ups=0.09, wpb=64860, bsz=128, num_updates=3797, lr=9.99776e-05, gnorm=2.289, loss_scale=8, train_wall=11, gb_free=2.8, wall=43392 2021-06-19 06:42:09 | INFO | train_inner | epoch 002: 822 / 3002 loss=2.846, ppl=7.19, wps=5828.6, ups=0.09, wpb=64756, bsz=128, num_updates=3798, lr=9.99776e-05, gnorm=2.366, loss_scale=8, train_wall=11, gb_free=2.8, wall=43403 2021-06-19 06:42:20 | INFO | train_inner | epoch 002: 823 / 3002 loss=2.792, ppl=6.93, wps=5799.5, ups=0.09, wpb=64854, bsz=128, num_updates=3799, lr=9.99776e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=43414 2021-06-19 06:42:31 | INFO | train_inner | epoch 002: 824 / 3002 loss=2.823, ppl=7.08, wps=5790.5, ups=0.09, wpb=64947, bsz=128, num_updates=3800, lr=9.99776e-05, gnorm=2.218, loss_scale=8, train_wall=11, gb_free=2.8, wall=43426 2021-06-19 06:42:42 | INFO | train_inner | epoch 002: 825 / 3002 loss=2.805, ppl=6.99, wps=5778.1, ups=0.09, wpb=64726, bsz=128, num_updates=3801, lr=9.99776e-05, gnorm=2.183, loss_scale=8, train_wall=11, gb_free=2.8, wall=43437 2021-06-19 06:42:53 | INFO | train_inner | epoch 002: 826 / 3002 loss=2.828, ppl=7.1, wps=5845.3, ups=0.09, wpb=64823, bsz=128, num_updates=3802, lr=9.99776e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=43448 2021-06-19 06:43:05 | INFO | train_inner | epoch 002: 827 / 3002 loss=2.733, ppl=6.65, wps=5782.3, ups=0.09, wpb=64836, bsz=128, num_updates=3803, lr=9.99776e-05, gnorm=2.344, loss_scale=8, train_wall=11, gb_free=2.8, wall=43459 2021-06-19 06:43:16 | INFO | train_inner | epoch 002: 828 / 3002 loss=2.679, ppl=6.4, wps=5845.4, ups=0.09, wpb=64777, bsz=128, num_updates=3804, lr=9.99776e-05, gnorm=2.51, loss_scale=8, train_wall=11, gb_free=2.8, wall=43470 2021-06-19 06:43:27 | INFO | train_inner | epoch 002: 829 / 3002 loss=2.902, ppl=7.48, wps=5806.1, ups=0.09, wpb=64817, bsz=128, num_updates=3805, lr=9.99776e-05, gnorm=2.357, loss_scale=8, train_wall=11, gb_free=2.8, wall=43481 2021-06-19 06:43:38 | INFO | train_inner | epoch 002: 830 / 3002 loss=2.823, ppl=7.07, wps=5790.5, ups=0.09, wpb=64783, bsz=128, num_updates=3806, lr=9.99776e-05, gnorm=2.384, loss_scale=8, train_wall=11, gb_free=2.8, wall=43492 2021-06-19 06:43:49 | INFO | train_inner | epoch 002: 831 / 3002 loss=2.683, ppl=6.42, wps=5869.6, ups=0.09, wpb=64779, bsz=128, num_updates=3807, lr=9.99775e-05, gnorm=2.479, loss_scale=8, train_wall=11, gb_free=2.8, wall=43503 2021-06-19 06:44:00 | INFO | train_inner | epoch 002: 832 / 3002 loss=2.833, ppl=7.13, wps=5835.5, ups=0.09, wpb=64812, bsz=128, num_updates=3808, lr=9.99775e-05, gnorm=2.39, loss_scale=8, train_wall=11, gb_free=2.8, wall=43515 2021-06-19 06:44:11 | INFO | train_inner | epoch 002: 833 / 3002 loss=2.785, ppl=6.89, wps=5858.2, ups=0.09, wpb=64876, bsz=128, num_updates=3809, lr=9.99775e-05, gnorm=2.28, loss_scale=8, train_wall=11, gb_free=2.8, wall=43526 2021-06-19 06:44:22 | INFO | train_inner | epoch 002: 834 / 3002 loss=3.089, ppl=8.51, wps=5941.8, ups=0.09, wpb=64788, bsz=128, num_updates=3810, lr=9.99775e-05, gnorm=2.385, loss_scale=8, train_wall=10, gb_free=2.8, wall=43537 2021-06-19 06:44:33 | INFO | train_inner | epoch 002: 835 / 3002 loss=2.767, ppl=6.81, wps=5831.2, ups=0.09, wpb=64747, bsz=128, num_updates=3811, lr=9.99775e-05, gnorm=2.734, loss_scale=8, train_wall=11, gb_free=2.8, wall=43548 2021-06-19 06:44:44 | INFO | train_inner | epoch 002: 836 / 3002 loss=2.666, ppl=6.35, wps=5866.9, ups=0.09, wpb=64871, bsz=128, num_updates=3812, lr=9.99775e-05, gnorm=2.394, loss_scale=8, train_wall=11, gb_free=2.8, wall=43559 2021-06-19 06:44:56 | INFO | train_inner | epoch 002: 837 / 3002 loss=2.732, ppl=6.64, wps=5756, ups=0.09, wpb=64837, bsz=128, num_updates=3813, lr=9.99775e-05, gnorm=3.624, loss_scale=8, train_wall=11, gb_free=2.8, wall=43570 2021-06-19 06:45:07 | INFO | train_inner | epoch 002: 838 / 3002 loss=2.897, ppl=7.45, wps=5883.2, ups=0.09, wpb=64915, bsz=128, num_updates=3814, lr=9.99775e-05, gnorm=2.382, loss_scale=8, train_wall=11, gb_free=2.8, wall=43581 2021-06-19 06:45:18 | INFO | train_inner | epoch 002: 839 / 3002 loss=3.018, ppl=8.1, wps=5740, ups=0.09, wpb=64833, bsz=128, num_updates=3815, lr=9.99775e-05, gnorm=2.283, loss_scale=8, train_wall=11, gb_free=2.8, wall=43592 2021-06-19 06:45:29 | INFO | train_inner | epoch 002: 840 / 3002 loss=2.706, ppl=6.53, wps=5854.1, ups=0.09, wpb=64925, bsz=128, num_updates=3816, lr=9.99775e-05, gnorm=2.307, loss_scale=8, train_wall=11, gb_free=2.8, wall=43603 2021-06-19 06:45:40 | INFO | train_inner | epoch 002: 841 / 3002 loss=2.834, ppl=7.13, wps=5749.6, ups=0.09, wpb=64737, bsz=128, num_updates=3817, lr=9.99775e-05, gnorm=2.21, loss_scale=8, train_wall=11, gb_free=2.8, wall=43615 2021-06-19 06:45:52 | INFO | train_inner | epoch 002: 842 / 3002 loss=2.82, ppl=7.06, wps=5712.9, ups=0.09, wpb=64816, bsz=128, num_updates=3818, lr=9.99775e-05, gnorm=2.848, loss_scale=8, train_wall=11, gb_free=2.8, wall=43626 2021-06-19 06:46:03 | INFO | train_inner | epoch 002: 843 / 3002 loss=2.756, ppl=6.75, wps=5953.2, ups=0.09, wpb=64903, bsz=128, num_updates=3819, lr=9.99774e-05, gnorm=2.385, loss_scale=8, train_wall=10, gb_free=2.8, wall=43637 2021-06-19 06:46:13 | INFO | train_inner | epoch 002: 844 / 3002 loss=2.685, ppl=6.43, wps=6003.4, ups=0.09, wpb=64756, bsz=128, num_updates=3820, lr=9.99774e-05, gnorm=2.313, loss_scale=8, train_wall=10, gb_free=2.8, wall=43648 2021-06-19 06:46:25 | INFO | train_inner | epoch 002: 845 / 3002 loss=2.743, ppl=6.69, wps=5763.7, ups=0.09, wpb=64827, bsz=128, num_updates=3821, lr=9.99774e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=43659 2021-06-19 06:46:36 | INFO | train_inner | epoch 002: 846 / 3002 loss=2.729, ppl=6.63, wps=5861.5, ups=0.09, wpb=64823, bsz=128, num_updates=3822, lr=9.99774e-05, gnorm=2.566, loss_scale=8, train_wall=11, gb_free=2.8, wall=43670 2021-06-19 06:46:47 | INFO | train_inner | epoch 002: 847 / 3002 loss=2.625, ppl=6.17, wps=5904.2, ups=0.09, wpb=64866, bsz=128, num_updates=3823, lr=9.99774e-05, gnorm=2.192, loss_scale=8, train_wall=11, gb_free=2.8, wall=43681 2021-06-19 06:46:58 | INFO | train_inner | epoch 002: 848 / 3002 loss=2.709, ppl=6.54, wps=5895.1, ups=0.09, wpb=64898, bsz=128, num_updates=3824, lr=9.99774e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=43692 2021-06-19 06:47:09 | INFO | train_inner | epoch 002: 849 / 3002 loss=2.763, ppl=6.79, wps=5778.3, ups=0.09, wpb=64849, bsz=128, num_updates=3825, lr=9.99774e-05, gnorm=2.277, loss_scale=8, train_wall=11, gb_free=2.8, wall=43703 2021-06-19 06:47:20 | INFO | train_inner | epoch 002: 850 / 3002 loss=2.694, ppl=6.47, wps=5793.3, ups=0.09, wpb=64856, bsz=128, num_updates=3826, lr=9.99774e-05, gnorm=2.536, loss_scale=8, train_wall=11, gb_free=2.8, wall=43714 2021-06-19 06:47:31 | INFO | train_inner | epoch 002: 851 / 3002 loss=2.794, ppl=6.93, wps=5727.5, ups=0.09, wpb=64779, bsz=128, num_updates=3827, lr=9.99774e-05, gnorm=2.334, loss_scale=8, train_wall=11, gb_free=2.8, wall=43726 2021-06-19 06:47:43 | INFO | train_inner | epoch 002: 852 / 3002 loss=2.924, ppl=7.59, wps=5830.2, ups=0.09, wpb=64845, bsz=128, num_updates=3828, lr=9.99774e-05, gnorm=2.322, loss_scale=8, train_wall=11, gb_free=2.8, wall=43737 2021-06-19 06:47:54 | INFO | train_inner | epoch 002: 853 / 3002 loss=2.97, ppl=7.83, wps=5865, ups=0.09, wpb=64840, bsz=128, num_updates=3829, lr=9.99774e-05, gnorm=2.353, loss_scale=8, train_wall=11, gb_free=2.8, wall=43748 2021-06-19 06:48:05 | INFO | train_inner | epoch 002: 854 / 3002 loss=2.857, ppl=7.25, wps=5760.3, ups=0.09, wpb=64807, bsz=128, num_updates=3830, lr=9.99774e-05, gnorm=2.365, loss_scale=8, train_wall=11, gb_free=2.8, wall=43759 2021-06-19 06:48:16 | INFO | train_inner | epoch 002: 855 / 3002 loss=2.754, ppl=6.75, wps=5829.6, ups=0.09, wpb=64829, bsz=128, num_updates=3831, lr=9.99774e-05, gnorm=2.584, loss_scale=8, train_wall=11, gb_free=2.8, wall=43770 2021-06-19 06:48:27 | INFO | train_inner | epoch 002: 856 / 3002 loss=2.977, ppl=7.88, wps=5883, ups=0.09, wpb=64895, bsz=128, num_updates=3832, lr=9.99773e-05, gnorm=2.86, loss_scale=8, train_wall=11, gb_free=2.8, wall=43781 2021-06-19 06:48:38 | INFO | train_inner | epoch 002: 857 / 3002 loss=2.87, ppl=7.31, wps=5841.1, ups=0.09, wpb=64809, bsz=128, num_updates=3833, lr=9.99773e-05, gnorm=2.3, loss_scale=8, train_wall=11, gb_free=2.8, wall=43792 2021-06-19 06:48:49 | INFO | train_inner | epoch 002: 858 / 3002 loss=2.808, ppl=7, wps=5853.6, ups=0.09, wpb=64817, bsz=128, num_updates=3834, lr=9.99773e-05, gnorm=2.295, loss_scale=8, train_wall=11, gb_free=2.8, wall=43803 2021-06-19 06:49:00 | INFO | train_inner | epoch 002: 859 / 3002 loss=2.839, ppl=7.16, wps=5967, ups=0.09, wpb=64911, bsz=128, num_updates=3835, lr=9.99773e-05, gnorm=2.241, loss_scale=8, train_wall=10, gb_free=2.8, wall=43814 2021-06-19 06:49:11 | INFO | train_inner | epoch 002: 860 / 3002 loss=2.847, ppl=7.19, wps=5688.8, ups=0.09, wpb=64856, bsz=128, num_updates=3836, lr=9.99773e-05, gnorm=2.328, loss_scale=8, train_wall=11, gb_free=2.8, wall=43826 2021-06-19 06:49:23 | INFO | train_inner | epoch 002: 861 / 3002 loss=2.702, ppl=6.51, wps=5827.7, ups=0.09, wpb=64885, bsz=128, num_updates=3837, lr=9.99773e-05, gnorm=2.477, loss_scale=8, train_wall=11, gb_free=2.8, wall=43837 2021-06-19 06:49:34 | INFO | train_inner | epoch 002: 862 / 3002 loss=2.862, ppl=7.27, wps=5742.4, ups=0.09, wpb=64873, bsz=128, num_updates=3838, lr=9.99773e-05, gnorm=2.342, loss_scale=8, train_wall=11, gb_free=2.8, wall=43848 2021-06-19 06:49:45 | INFO | train_inner | epoch 002: 863 / 3002 loss=2.808, ppl=7, wps=5890.3, ups=0.09, wpb=64906, bsz=128, num_updates=3839, lr=9.99773e-05, gnorm=2.806, loss_scale=8, train_wall=11, gb_free=2.8, wall=43859 2021-06-19 06:49:56 | INFO | train_inner | epoch 002: 864 / 3002 loss=2.812, ppl=7.02, wps=5781.2, ups=0.09, wpb=64756, bsz=128, num_updates=3840, lr=9.99773e-05, gnorm=2.354, loss_scale=8, train_wall=11, gb_free=2.8, wall=43870 2021-06-19 06:50:07 | INFO | train_inner | epoch 002: 865 / 3002 loss=2.798, ppl=6.96, wps=5706.7, ups=0.09, wpb=64797, bsz=128, num_updates=3841, lr=9.99773e-05, gnorm=2.263, loss_scale=8, train_wall=11, gb_free=2.8, wall=43882 2021-06-19 06:50:18 | INFO | train_inner | epoch 002: 866 / 3002 loss=2.906, ppl=7.49, wps=5901.4, ups=0.09, wpb=64891, bsz=128, num_updates=3842, lr=9.99773e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=43893 2021-06-19 06:50:30 | INFO | train_inner | epoch 002: 867 / 3002 loss=2.752, ppl=6.74, wps=5767.7, ups=0.09, wpb=64835, bsz=128, num_updates=3843, lr=9.99773e-05, gnorm=2.306, loss_scale=8, train_wall=11, gb_free=2.8, wall=43904 2021-06-19 06:50:41 | INFO | train_inner | epoch 002: 868 / 3002 loss=2.786, ppl=6.9, wps=5749.7, ups=0.09, wpb=64806, bsz=128, num_updates=3844, lr=9.99772e-05, gnorm=2.248, loss_scale=8, train_wall=11, gb_free=2.8, wall=43915 2021-06-19 06:50:52 | INFO | train_inner | epoch 002: 869 / 3002 loss=2.732, ppl=6.64, wps=5860.7, ups=0.09, wpb=64810, bsz=128, num_updates=3845, lr=9.99772e-05, gnorm=2.287, loss_scale=8, train_wall=11, gb_free=2.8, wall=43926 2021-06-19 06:51:03 | INFO | train_inner | epoch 002: 870 / 3002 loss=2.705, ppl=6.52, wps=5801.9, ups=0.09, wpb=64836, bsz=128, num_updates=3846, lr=9.99772e-05, gnorm=2.368, loss_scale=8, train_wall=11, gb_free=2.8, wall=43938 2021-06-19 06:51:14 | INFO | train_inner | epoch 002: 871 / 3002 loss=2.915, ppl=7.54, wps=5926.8, ups=0.09, wpb=64757, bsz=128, num_updates=3847, lr=9.99772e-05, gnorm=2.437, loss_scale=8, train_wall=10, gb_free=2.8, wall=43948 2021-06-19 06:51:25 | INFO | train_inner | epoch 002: 872 / 3002 loss=2.881, ppl=7.37, wps=5830.5, ups=0.09, wpb=64862, bsz=128, num_updates=3848, lr=9.99772e-05, gnorm=2.385, loss_scale=8, train_wall=11, gb_free=2.8, wall=43960 2021-06-19 06:51:36 | INFO | train_inner | epoch 002: 873 / 3002 loss=2.774, ppl=6.84, wps=5801.4, ups=0.09, wpb=64740, bsz=128, num_updates=3849, lr=9.99772e-05, gnorm=2.334, loss_scale=8, train_wall=11, gb_free=2.8, wall=43971 2021-06-19 06:51:48 | INFO | train_inner | epoch 002: 874 / 3002 loss=2.767, ppl=6.81, wps=5748.4, ups=0.09, wpb=64796, bsz=128, num_updates=3850, lr=9.99772e-05, gnorm=2.289, loss_scale=8, train_wall=11, gb_free=2.8, wall=43982 2021-06-19 06:51:59 | INFO | train_inner | epoch 002: 875 / 3002 loss=2.784, ppl=6.89, wps=5988, ups=0.09, wpb=64901, bsz=128, num_updates=3851, lr=9.99772e-05, gnorm=2.289, loss_scale=8, train_wall=10, gb_free=2.8, wall=43993 2021-06-19 06:52:10 | INFO | train_inner | epoch 002: 876 / 3002 loss=2.77, ppl=6.82, wps=5749.3, ups=0.09, wpb=64788, bsz=128, num_updates=3852, lr=9.99772e-05, gnorm=2.293, loss_scale=8, train_wall=11, gb_free=2.8, wall=44004 2021-06-19 06:52:21 | INFO | train_inner | epoch 002: 877 / 3002 loss=2.757, ppl=6.76, wps=5898.1, ups=0.09, wpb=64851, bsz=128, num_updates=3853, lr=9.99772e-05, gnorm=2.29, loss_scale=8, train_wall=11, gb_free=2.8, wall=44015 2021-06-19 06:52:32 | INFO | train_inner | epoch 002: 878 / 3002 loss=2.637, ppl=6.22, wps=5945.3, ups=0.09, wpb=64853, bsz=128, num_updates=3854, lr=9.99772e-05, gnorm=2.39, loss_scale=8, train_wall=10, gb_free=2.8, wall=44026 2021-06-19 06:52:43 | INFO | train_inner | epoch 002: 879 / 3002 loss=2.845, ppl=7.19, wps=5925, ups=0.09, wpb=64771, bsz=128, num_updates=3855, lr=9.99772e-05, gnorm=4.433, loss_scale=8, train_wall=10, gb_free=2.8, wall=44037 2021-06-19 06:52:54 | INFO | train_inner | epoch 002: 880 / 3002 loss=2.703, ppl=6.51, wps=5936.1, ups=0.09, wpb=64826, bsz=128, num_updates=3856, lr=9.99772e-05, gnorm=2.316, loss_scale=8, train_wall=10, gb_free=2.8, wall=44048 2021-06-19 06:53:05 | INFO | train_inner | epoch 002: 881 / 3002 loss=2.714, ppl=6.56, wps=5868.6, ups=0.09, wpb=64872, bsz=128, num_updates=3857, lr=9.99771e-05, gnorm=2.209, loss_scale=8, train_wall=11, gb_free=2.8, wall=44059 2021-06-19 06:53:16 | INFO | train_inner | epoch 002: 882 / 3002 loss=2.765, ppl=6.8, wps=5837.1, ups=0.09, wpb=64809, bsz=128, num_updates=3858, lr=9.99771e-05, gnorm=2.335, loss_scale=8, train_wall=11, gb_free=2.8, wall=44070 2021-06-19 06:53:27 | INFO | train_inner | epoch 002: 883 / 3002 loss=2.771, ppl=6.83, wps=5812.5, ups=0.09, wpb=64800, bsz=128, num_updates=3859, lr=9.99771e-05, gnorm=2.273, loss_scale=8, train_wall=11, gb_free=2.8, wall=44081 2021-06-19 06:53:38 | INFO | train_inner | epoch 002: 884 / 3002 loss=2.847, ppl=7.2, wps=5890.9, ups=0.09, wpb=64888, bsz=128, num_updates=3860, lr=9.99771e-05, gnorm=2.369, loss_scale=8, train_wall=11, gb_free=2.8, wall=44092 2021-06-19 06:53:49 | INFO | train_inner | epoch 002: 885 / 3002 loss=2.813, ppl=7.03, wps=5821.1, ups=0.09, wpb=64770, bsz=128, num_updates=3861, lr=9.99771e-05, gnorm=2.276, loss_scale=8, train_wall=11, gb_free=2.8, wall=44103 2021-06-19 06:54:00 | INFO | train_inner | epoch 002: 886 / 3002 loss=2.701, ppl=6.5, wps=5850.2, ups=0.09, wpb=64843, bsz=128, num_updates=3862, lr=9.99771e-05, gnorm=2.276, loss_scale=8, train_wall=11, gb_free=2.8, wall=44114 2021-06-19 06:54:11 | INFO | train_inner | epoch 002: 887 / 3002 loss=2.756, ppl=6.75, wps=5784.3, ups=0.09, wpb=64802, bsz=128, num_updates=3863, lr=9.99771e-05, gnorm=2.675, loss_scale=8, train_wall=11, gb_free=2.8, wall=44126 2021-06-19 06:54:23 | INFO | train_inner | epoch 002: 888 / 3002 loss=2.665, ppl=6.34, wps=5744.8, ups=0.09, wpb=64871, bsz=128, num_updates=3864, lr=9.99771e-05, gnorm=2.194, loss_scale=8, train_wall=11, gb_free=2.8, wall=44137 2021-06-19 06:54:34 | INFO | train_inner | epoch 002: 889 / 3002 loss=3.107, ppl=8.62, wps=5821.9, ups=0.09, wpb=64781, bsz=128, num_updates=3865, lr=9.99771e-05, gnorm=2.29, loss_scale=8, train_wall=11, gb_free=2.8, wall=44148 2021-06-19 06:54:45 | INFO | train_inner | epoch 002: 890 / 3002 loss=2.914, ppl=7.54, wps=5801.5, ups=0.09, wpb=64863, bsz=128, num_updates=3866, lr=9.99771e-05, gnorm=2.394, loss_scale=8, train_wall=11, gb_free=2.8, wall=44159 2021-06-19 06:54:56 | INFO | train_inner | epoch 002: 891 / 3002 loss=2.755, ppl=6.75, wps=5857.8, ups=0.09, wpb=64865, bsz=128, num_updates=3867, lr=9.99771e-05, gnorm=2.39, loss_scale=8, train_wall=11, gb_free=2.8, wall=44170 2021-06-19 06:55:07 | INFO | train_inner | epoch 002: 892 / 3002 loss=2.819, ppl=7.06, wps=5915.2, ups=0.09, wpb=64826, bsz=128, num_updates=3868, lr=9.99771e-05, gnorm=2.312, loss_scale=8, train_wall=10, gb_free=2.8, wall=44181 2021-06-19 06:55:18 | INFO | train_inner | epoch 002: 893 / 3002 loss=2.652, ppl=6.29, wps=5844.6, ups=0.09, wpb=64879, bsz=128, num_updates=3869, lr=9.9977e-05, gnorm=2.217, loss_scale=8, train_wall=11, gb_free=2.8, wall=44192 2021-06-19 06:55:29 | INFO | train_inner | epoch 002: 894 / 3002 loss=2.717, ppl=6.57, wps=5827.9, ups=0.09, wpb=64882, bsz=128, num_updates=3870, lr=9.9977e-05, gnorm=2.313, loss_scale=8, train_wall=11, gb_free=2.8, wall=44203 2021-06-19 06:55:40 | INFO | train_inner | epoch 002: 895 / 3002 loss=2.796, ppl=6.94, wps=5938.3, ups=0.09, wpb=64871, bsz=128, num_updates=3871, lr=9.9977e-05, gnorm=2.282, loss_scale=8, train_wall=10, gb_free=2.8, wall=44214 2021-06-19 06:55:51 | INFO | train_inner | epoch 002: 896 / 3002 loss=2.967, ppl=7.82, wps=5868.8, ups=0.09, wpb=64892, bsz=128, num_updates=3872, lr=9.9977e-05, gnorm=2.334, loss_scale=8, train_wall=11, gb_free=2.8, wall=44225 2021-06-19 06:56:02 | INFO | train_inner | epoch 002: 897 / 3002 loss=2.752, ppl=6.74, wps=5787.5, ups=0.09, wpb=64736, bsz=128, num_updates=3873, lr=9.9977e-05, gnorm=2.262, loss_scale=8, train_wall=11, gb_free=2.8, wall=44237 2021-06-19 06:56:13 | INFO | train_inner | epoch 002: 898 / 3002 loss=2.658, ppl=6.31, wps=5849.1, ups=0.09, wpb=64921, bsz=128, num_updates=3874, lr=9.9977e-05, gnorm=2.496, loss_scale=8, train_wall=11, gb_free=2.8, wall=44248 2021-06-19 06:56:24 | INFO | train_inner | epoch 002: 899 / 3002 loss=2.947, ppl=7.71, wps=5881.7, ups=0.09, wpb=64895, bsz=128, num_updates=3875, lr=9.9977e-05, gnorm=2.32, loss_scale=8, train_wall=11, gb_free=2.8, wall=44259 2021-06-19 06:56:35 | INFO | train_inner | epoch 002: 900 / 3002 loss=2.795, ppl=6.94, wps=5886, ups=0.09, wpb=64766, bsz=128, num_updates=3876, lr=9.9977e-05, gnorm=2.314, loss_scale=8, train_wall=11, gb_free=2.8, wall=44270 2021-06-19 06:56:47 | INFO | train_inner | epoch 002: 901 / 3002 loss=2.716, ppl=6.57, wps=5812.1, ups=0.09, wpb=64799, bsz=128, num_updates=3877, lr=9.9977e-05, gnorm=2.404, loss_scale=8, train_wall=11, gb_free=2.8, wall=44281 2021-06-19 06:56:57 | INFO | train_inner | epoch 002: 902 / 3002 loss=2.725, ppl=6.61, wps=6023.2, ups=0.09, wpb=64818, bsz=128, num_updates=3878, lr=9.9977e-05, gnorm=2.293, loss_scale=8, train_wall=10, gb_free=2.8, wall=44292 2021-06-19 06:57:08 | INFO | train_inner | epoch 002: 903 / 3002 loss=2.849, ppl=7.2, wps=5894.4, ups=0.09, wpb=64818, bsz=128, num_updates=3879, lr=9.9977e-05, gnorm=2.398, loss_scale=8, train_wall=11, gb_free=2.8, wall=44303 2021-06-19 06:57:20 | INFO | train_inner | epoch 002: 904 / 3002 loss=2.863, ppl=7.27, wps=5816.1, ups=0.09, wpb=64849, bsz=128, num_updates=3880, lr=9.9977e-05, gnorm=2.343, loss_scale=8, train_wall=11, gb_free=2.8, wall=44314 2021-06-19 06:57:31 | INFO | train_inner | epoch 002: 905 / 3002 loss=2.779, ppl=6.86, wps=5833.2, ups=0.09, wpb=64804, bsz=128, num_updates=3881, lr=9.9977e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=44325 2021-06-19 06:57:42 | INFO | train_inner | epoch 002: 906 / 3002 loss=2.949, ppl=7.72, wps=5787.7, ups=0.09, wpb=64761, bsz=128, num_updates=3882, lr=9.99769e-05, gnorm=2.298, loss_scale=8, train_wall=11, gb_free=2.8, wall=44336 2021-06-19 06:57:53 | INFO | train_inner | epoch 002: 907 / 3002 loss=2.725, ppl=6.61, wps=5790.3, ups=0.09, wpb=64891, bsz=128, num_updates=3883, lr=9.99769e-05, gnorm=2.297, loss_scale=8, train_wall=11, gb_free=2.8, wall=44347 2021-06-19 06:58:04 | INFO | train_inner | epoch 002: 908 / 3002 loss=2.917, ppl=7.55, wps=5813.8, ups=0.09, wpb=64861, bsz=128, num_updates=3884, lr=9.99769e-05, gnorm=2.47, loss_scale=8, train_wall=11, gb_free=2.8, wall=44359 2021-06-19 06:58:15 | INFO | train_inner | epoch 002: 909 / 3002 loss=2.627, ppl=6.18, wps=5855.6, ups=0.09, wpb=64860, bsz=128, num_updates=3885, lr=9.99769e-05, gnorm=2.274, loss_scale=8, train_wall=11, gb_free=2.8, wall=44370 2021-06-19 06:58:26 | INFO | train_inner | epoch 002: 910 / 3002 loss=2.732, ppl=6.64, wps=6000.3, ups=0.09, wpb=64907, bsz=128, num_updates=3886, lr=9.99769e-05, gnorm=2.324, loss_scale=8, train_wall=10, gb_free=2.8, wall=44380 2021-06-19 06:58:37 | INFO | train_inner | epoch 002: 911 / 3002 loss=2.566, ppl=5.92, wps=5762.1, ups=0.09, wpb=64820, bsz=128, num_updates=3887, lr=9.99769e-05, gnorm=2.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=44392 2021-06-19 06:58:48 | INFO | train_inner | epoch 002: 912 / 3002 loss=2.691, ppl=6.46, wps=5867.9, ups=0.09, wpb=64761, bsz=128, num_updates=3888, lr=9.99769e-05, gnorm=2.359, loss_scale=8, train_wall=11, gb_free=2.8, wall=44403 2021-06-19 06:58:59 | INFO | train_inner | epoch 002: 913 / 3002 loss=2.904, ppl=7.49, wps=5836.7, ups=0.09, wpb=64826, bsz=128, num_updates=3889, lr=9.99769e-05, gnorm=2.496, loss_scale=8, train_wall=11, gb_free=2.8, wall=44414 2021-06-19 06:59:10 | INFO | train_inner | epoch 002: 914 / 3002 loss=2.797, ppl=6.95, wps=5921.1, ups=0.09, wpb=64908, bsz=128, num_updates=3890, lr=9.99769e-05, gnorm=2.339, loss_scale=8, train_wall=10, gb_free=2.8, wall=44425 2021-06-19 06:59:21 | INFO | train_inner | epoch 002: 915 / 3002 loss=2.891, ppl=7.42, wps=5923.4, ups=0.09, wpb=64861, bsz=128, num_updates=3891, lr=9.99769e-05, gnorm=2.292, loss_scale=8, train_wall=10, gb_free=2.8, wall=44436 2021-06-19 06:59:33 | INFO | train_inner | epoch 002: 916 / 3002 loss=2.765, ppl=6.8, wps=5790.3, ups=0.09, wpb=64889, bsz=128, num_updates=3892, lr=9.99769e-05, gnorm=2.351, loss_scale=8, train_wall=11, gb_free=2.8, wall=44447 2021-06-19 06:59:44 | INFO | train_inner | epoch 002: 917 / 3002 loss=2.943, ppl=7.69, wps=5714.7, ups=0.09, wpb=64874, bsz=128, num_updates=3893, lr=9.99769e-05, gnorm=2.345, loss_scale=8, train_wall=11, gb_free=2.8, wall=44458 2021-06-19 06:59:55 | INFO | train_inner | epoch 002: 918 / 3002 loss=2.728, ppl=6.62, wps=5898.5, ups=0.09, wpb=64902, bsz=128, num_updates=3894, lr=9.99768e-05, gnorm=2.408, loss_scale=8, train_wall=11, gb_free=2.8, wall=44469 2021-06-19 07:00:06 | INFO | train_inner | epoch 002: 919 / 3002 loss=2.849, ppl=7.2, wps=5903.4, ups=0.09, wpb=64883, bsz=128, num_updates=3895, lr=9.99768e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=44480 2021-06-19 07:00:17 | INFO | train_inner | epoch 002: 920 / 3002 loss=2.751, ppl=6.73, wps=5807.4, ups=0.09, wpb=64867, bsz=128, num_updates=3896, lr=9.99768e-05, gnorm=2.329, loss_scale=8, train_wall=11, gb_free=2.8, wall=44491 2021-06-19 07:00:28 | INFO | train_inner | epoch 002: 921 / 3002 loss=2.973, ppl=7.85, wps=5767.8, ups=0.09, wpb=64853, bsz=128, num_updates=3897, lr=9.99768e-05, gnorm=2.314, loss_scale=8, train_wall=11, gb_free=2.8, wall=44503 2021-06-19 07:00:40 | INFO | train_inner | epoch 002: 922 / 3002 loss=2.949, ppl=7.72, wps=5701.6, ups=0.09, wpb=64814, bsz=128, num_updates=3898, lr=9.99768e-05, gnorm=2.292, loss_scale=8, train_wall=11, gb_free=2.8, wall=44514 2021-06-19 07:00:51 | INFO | train_inner | epoch 002: 923 / 3002 loss=2.787, ppl=6.9, wps=5840.1, ups=0.09, wpb=64774, bsz=128, num_updates=3899, lr=9.99768e-05, gnorm=2.295, loss_scale=8, train_wall=11, gb_free=2.8, wall=44525 2021-06-19 07:01:02 | INFO | train_inner | epoch 002: 924 / 3002 loss=2.704, ppl=6.52, wps=5869.1, ups=0.09, wpb=64901, bsz=128, num_updates=3900, lr=9.99768e-05, gnorm=2.378, loss_scale=8, train_wall=11, gb_free=2.8, wall=44536 2021-06-19 07:01:13 | INFO | train_inner | epoch 002: 925 / 3002 loss=2.797, ppl=6.95, wps=5778.3, ups=0.09, wpb=64762, bsz=128, num_updates=3901, lr=9.99768e-05, gnorm=2.24, loss_scale=8, train_wall=11, gb_free=2.8, wall=44547 2021-06-19 07:01:24 | INFO | train_inner | epoch 002: 926 / 3002 loss=2.626, ppl=6.17, wps=5834, ups=0.09, wpb=64831, bsz=128, num_updates=3902, lr=9.99768e-05, gnorm=2.211, loss_scale=8, train_wall=11, gb_free=2.8, wall=44559 2021-06-19 07:01:35 | INFO | train_inner | epoch 002: 927 / 3002 loss=2.873, ppl=7.32, wps=5814.5, ups=0.09, wpb=64740, bsz=128, num_updates=3903, lr=9.99768e-05, gnorm=2.31, loss_scale=8, train_wall=11, gb_free=2.8, wall=44570 2021-06-19 07:01:46 | INFO | train_inner | epoch 002: 928 / 3002 loss=2.705, ppl=6.52, wps=5891, ups=0.09, wpb=64941, bsz=128, num_updates=3904, lr=9.99768e-05, gnorm=2.338, loss_scale=16, train_wall=11, gb_free=2.8, wall=44581 2021-06-19 07:01:57 | INFO | train_inner | epoch 002: 929 / 3002 loss=2.742, ppl=6.69, wps=5904.3, ups=0.09, wpb=64879, bsz=128, num_updates=3905, lr=9.99768e-05, gnorm=2.374, loss_scale=16, train_wall=11, gb_free=2.8, wall=44592 2021-06-19 07:02:09 | INFO | train_inner | epoch 002: 930 / 3002 loss=2.78, ppl=6.87, wps=5787.6, ups=0.09, wpb=64773, bsz=128, num_updates=3906, lr=9.99768e-05, gnorm=2.336, loss_scale=16, train_wall=11, gb_free=2.8, wall=44603 2021-06-19 07:02:20 | INFO | train_inner | epoch 002: 931 / 3002 loss=2.821, ppl=7.07, wps=5814.7, ups=0.09, wpb=64831, bsz=128, num_updates=3907, lr=9.99767e-05, gnorm=2.62, loss_scale=16, train_wall=11, gb_free=2.8, wall=44614 2021-06-19 07:02:31 | INFO | train_inner | epoch 002: 932 / 3002 loss=2.944, ppl=7.69, wps=5775.5, ups=0.09, wpb=64845, bsz=128, num_updates=3908, lr=9.99767e-05, gnorm=2.317, loss_scale=16, train_wall=11, gb_free=2.8, wall=44625 2021-06-19 07:02:42 | INFO | train_inner | epoch 002: 933 / 3002 loss=2.79, ppl=6.92, wps=5887.7, ups=0.09, wpb=64785, bsz=128, num_updates=3909, lr=9.99767e-05, gnorm=2.34, loss_scale=16, train_wall=11, gb_free=2.8, wall=44636 2021-06-19 07:02:53 | INFO | train_inner | epoch 002: 934 / 3002 loss=2.734, ppl=6.65, wps=5978.5, ups=0.09, wpb=64828, bsz=128, num_updates=3910, lr=9.99767e-05, gnorm=2.238, loss_scale=16, train_wall=10, gb_free=2.8, wall=44647 2021-06-19 07:03:04 | INFO | train_inner | epoch 002: 935 / 3002 loss=2.869, ppl=7.31, wps=5767.3, ups=0.09, wpb=64823, bsz=128, num_updates=3911, lr=9.99767e-05, gnorm=2.172, loss_scale=16, train_wall=11, gb_free=2.8, wall=44658 2021-06-19 07:03:15 | INFO | train_inner | epoch 002: 936 / 3002 loss=2.918, ppl=7.56, wps=5877.5, ups=0.09, wpb=64891, bsz=128, num_updates=3912, lr=9.99767e-05, gnorm=2.311, loss_scale=16, train_wall=11, gb_free=2.8, wall=44669 2021-06-19 07:03:26 | INFO | train_inner | epoch 002: 937 / 3002 loss=2.852, ppl=7.22, wps=5853.8, ups=0.09, wpb=64817, bsz=128, num_updates=3913, lr=9.99767e-05, gnorm=2.224, loss_scale=16, train_wall=11, gb_free=2.8, wall=44680 2021-06-19 07:03:37 | INFO | train_inner | epoch 002: 938 / 3002 loss=2.772, ppl=6.83, wps=5791.1, ups=0.09, wpb=64786, bsz=128, num_updates=3914, lr=9.99767e-05, gnorm=2.268, loss_scale=16, train_wall=11, gb_free=2.8, wall=44692 2021-06-19 07:03:48 | INFO | train_inner | epoch 002: 939 / 3002 loss=2.569, ppl=5.93, wps=5804.5, ups=0.09, wpb=64796, bsz=128, num_updates=3915, lr=9.99767e-05, gnorm=2.308, loss_scale=16, train_wall=11, gb_free=2.8, wall=44703 2021-06-19 07:03:59 | INFO | train_inner | epoch 002: 940 / 3002 loss=2.841, ppl=7.16, wps=5871.4, ups=0.09, wpb=64779, bsz=128, num_updates=3916, lr=9.99767e-05, gnorm=2.282, loss_scale=16, train_wall=11, gb_free=2.8, wall=44714 2021-06-19 07:04:10 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-19 07:04:21 | INFO | train_inner | epoch 002: 942 / 3002 loss=2.91, ppl=7.52, wps=2961, ups=0.05, wpb=64821, bsz=128, num_updates=3917, lr=9.99767e-05, gnorm=2.284, loss_scale=8, train_wall=21, gb_free=2.8, wall=44736 2021-06-19 07:04:33 | INFO | train_inner | epoch 002: 943 / 3002 loss=2.88, ppl=7.36, wps=5756.8, ups=0.09, wpb=64836, bsz=128, num_updates=3918, lr=9.99767e-05, gnorm=2.35, loss_scale=8, train_wall=11, gb_free=2.8, wall=44747 2021-06-19 07:04:44 | INFO | train_inner | epoch 002: 944 / 3002 loss=2.82, ppl=7.06, wps=5946.6, ups=0.09, wpb=64846, bsz=128, num_updates=3919, lr=9.99766e-05, gnorm=2.343, loss_scale=8, train_wall=10, gb_free=2.8, wall=44758 2021-06-19 07:04:55 | INFO | train_inner | epoch 002: 945 / 3002 loss=2.687, ppl=6.44, wps=5823, ups=0.09, wpb=64822, bsz=128, num_updates=3920, lr=9.99766e-05, gnorm=2.192, loss_scale=8, train_wall=11, gb_free=2.8, wall=44769 2021-06-19 07:05:06 | INFO | train_inner | epoch 002: 946 / 3002 loss=2.791, ppl=6.92, wps=5782.2, ups=0.09, wpb=64775, bsz=128, num_updates=3921, lr=9.99766e-05, gnorm=2.312, loss_scale=8, train_wall=11, gb_free=2.8, wall=44780 2021-06-19 07:05:17 | INFO | train_inner | epoch 002: 947 / 3002 loss=2.7, ppl=6.5, wps=5832.6, ups=0.09, wpb=64827, bsz=128, num_updates=3922, lr=9.99766e-05, gnorm=2.255, loss_scale=8, train_wall=11, gb_free=2.8, wall=44791 2021-06-19 07:05:28 | INFO | train_inner | epoch 002: 948 / 3002 loss=2.681, ppl=6.41, wps=5884.5, ups=0.09, wpb=64858, bsz=128, num_updates=3923, lr=9.99766e-05, gnorm=2.376, loss_scale=8, train_wall=11, gb_free=2.8, wall=44802 2021-06-19 07:05:39 | INFO | train_inner | epoch 002: 949 / 3002 loss=2.754, ppl=6.74, wps=5941, ups=0.09, wpb=64857, bsz=128, num_updates=3924, lr=9.99766e-05, gnorm=2.276, loss_scale=8, train_wall=10, gb_free=2.8, wall=44813 2021-06-19 07:05:50 | INFO | train_inner | epoch 002: 950 / 3002 loss=2.716, ppl=6.57, wps=5705.2, ups=0.09, wpb=64873, bsz=128, num_updates=3925, lr=9.99766e-05, gnorm=2.448, loss_scale=8, train_wall=11, gb_free=2.8, wall=44825 2021-06-19 07:06:02 | INFO | train_inner | epoch 002: 951 / 3002 loss=2.663, ppl=6.33, wps=5788.5, ups=0.09, wpb=64910, bsz=128, num_updates=3926, lr=9.99766e-05, gnorm=2.36, loss_scale=8, train_wall=11, gb_free=2.8, wall=44836 2021-06-19 07:06:12 | INFO | train_inner | epoch 002: 952 / 3002 loss=2.718, ppl=6.58, wps=5904.3, ups=0.09, wpb=64845, bsz=128, num_updates=3927, lr=9.99766e-05, gnorm=2.255, loss_scale=8, train_wall=11, gb_free=2.8, wall=44847 2021-06-19 07:06:24 | INFO | train_inner | epoch 002: 953 / 3002 loss=2.869, ppl=7.31, wps=5745.8, ups=0.09, wpb=64808, bsz=128, num_updates=3928, lr=9.99766e-05, gnorm=2.332, loss_scale=8, train_wall=11, gb_free=2.8, wall=44858 2021-06-19 07:06:35 | INFO | train_inner | epoch 002: 954 / 3002 loss=2.879, ppl=7.36, wps=5802.9, ups=0.09, wpb=64844, bsz=128, num_updates=3929, lr=9.99766e-05, gnorm=2.288, loss_scale=8, train_wall=11, gb_free=2.8, wall=44869 2021-06-19 07:06:46 | INFO | train_inner | epoch 002: 955 / 3002 loss=2.899, ppl=7.46, wps=5846.8, ups=0.09, wpb=64813, bsz=128, num_updates=3930, lr=9.99766e-05, gnorm=2.27, loss_scale=8, train_wall=11, gb_free=2.8, wall=44880 2021-06-19 07:06:57 | INFO | train_inner | epoch 002: 956 / 3002 loss=2.763, ppl=6.79, wps=5806.3, ups=0.09, wpb=64854, bsz=128, num_updates=3931, lr=9.99766e-05, gnorm=2.504, loss_scale=8, train_wall=11, gb_free=2.8, wall=44892 2021-06-19 07:07:08 | INFO | train_inner | epoch 002: 957 / 3002 loss=2.704, ppl=6.52, wps=5968.8, ups=0.09, wpb=64912, bsz=128, num_updates=3932, lr=9.99765e-05, gnorm=2.346, loss_scale=8, train_wall=10, gb_free=2.8, wall=44902 2021-06-19 07:07:19 | INFO | train_inner | epoch 002: 958 / 3002 loss=2.758, ppl=6.77, wps=5826.5, ups=0.09, wpb=64838, bsz=128, num_updates=3933, lr=9.99765e-05, gnorm=2.391, loss_scale=8, train_wall=11, gb_free=2.8, wall=44914 2021-06-19 07:07:30 | INFO | train_inner | epoch 002: 959 / 3002 loss=2.837, ppl=7.15, wps=5800.2, ups=0.09, wpb=64841, bsz=128, num_updates=3934, lr=9.99765e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=44925 2021-06-19 07:07:41 | INFO | train_inner | epoch 002: 960 / 3002 loss=2.827, ppl=7.1, wps=5868.5, ups=0.09, wpb=64847, bsz=128, num_updates=3935, lr=9.99765e-05, gnorm=2.342, loss_scale=8, train_wall=11, gb_free=2.8, wall=44936 2021-06-19 07:07:53 | INFO | train_inner | epoch 002: 961 / 3002 loss=2.697, ppl=6.48, wps=5825.6, ups=0.09, wpb=64757, bsz=128, num_updates=3936, lr=9.99765e-05, gnorm=2.276, loss_scale=8, train_wall=11, gb_free=2.8, wall=44947 2021-06-19 07:08:04 | INFO | train_inner | epoch 002: 962 / 3002 loss=2.636, ppl=6.21, wps=5803.9, ups=0.09, wpb=64790, bsz=128, num_updates=3937, lr=9.99765e-05, gnorm=2.392, loss_scale=8, train_wall=11, gb_free=2.8, wall=44958 2021-06-19 07:08:15 | INFO | train_inner | epoch 002: 963 / 3002 loss=2.779, ppl=6.86, wps=5791.8, ups=0.09, wpb=64814, bsz=128, num_updates=3938, lr=9.99765e-05, gnorm=2.397, loss_scale=8, train_wall=11, gb_free=2.8, wall=44969 2021-06-19 07:08:26 | INFO | train_inner | epoch 002: 964 / 3002 loss=2.814, ppl=7.03, wps=5805, ups=0.09, wpb=64772, bsz=128, num_updates=3939, lr=9.99765e-05, gnorm=2.338, loss_scale=8, train_wall=11, gb_free=2.8, wall=44980 2021-06-19 07:08:37 | INFO | train_inner | epoch 002: 965 / 3002 loss=2.797, ppl=6.95, wps=5890.2, ups=0.09, wpb=64771, bsz=128, num_updates=3940, lr=9.99765e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=44991 2021-06-19 07:08:48 | INFO | train_inner | epoch 002: 966 / 3002 loss=2.749, ppl=6.72, wps=5917.8, ups=0.09, wpb=64888, bsz=128, num_updates=3941, lr=9.99765e-05, gnorm=2.273, loss_scale=8, train_wall=11, gb_free=2.8, wall=45002 2021-06-19 07:08:59 | INFO | train_inner | epoch 002: 967 / 3002 loss=2.858, ppl=7.25, wps=5892.4, ups=0.09, wpb=64781, bsz=128, num_updates=3942, lr=9.99765e-05, gnorm=2.518, loss_scale=8, train_wall=11, gb_free=2.8, wall=45013 2021-06-19 07:09:10 | INFO | train_inner | epoch 002: 968 / 3002 loss=2.835, ppl=7.14, wps=5916.4, ups=0.09, wpb=64914, bsz=128, num_updates=3943, lr=9.99765e-05, gnorm=2.27, loss_scale=8, train_wall=11, gb_free=2.8, wall=45024 2021-06-19 07:09:21 | INFO | train_inner | epoch 002: 969 / 3002 loss=2.777, ppl=6.85, wps=5800.3, ups=0.09, wpb=64820, bsz=128, num_updates=3944, lr=9.99764e-05, gnorm=2.194, loss_scale=8, train_wall=11, gb_free=2.8, wall=45036 2021-06-19 07:09:32 | INFO | train_inner | epoch 002: 970 / 3002 loss=2.678, ppl=6.4, wps=5901.5, ups=0.09, wpb=64790, bsz=128, num_updates=3945, lr=9.99764e-05, gnorm=2.265, loss_scale=8, train_wall=11, gb_free=2.8, wall=45046 2021-06-19 07:09:43 | INFO | train_inner | epoch 002: 971 / 3002 loss=2.749, ppl=6.72, wps=5846.6, ups=0.09, wpb=64829, bsz=128, num_updates=3946, lr=9.99764e-05, gnorm=2.232, loss_scale=8, train_wall=11, gb_free=2.8, wall=45058 2021-06-19 07:09:54 | INFO | train_inner | epoch 002: 972 / 3002 loss=2.729, ppl=6.63, wps=5918.6, ups=0.09, wpb=64818, bsz=128, num_updates=3947, lr=9.99764e-05, gnorm=2.242, loss_scale=8, train_wall=11, gb_free=2.8, wall=45069 2021-06-19 07:10:05 | INFO | train_inner | epoch 002: 973 / 3002 loss=2.748, ppl=6.72, wps=5759.8, ups=0.09, wpb=64848, bsz=128, num_updates=3948, lr=9.99764e-05, gnorm=2.554, loss_scale=8, train_wall=11, gb_free=2.8, wall=45080 2021-06-19 07:10:16 | INFO | train_inner | epoch 002: 974 / 3002 loss=2.737, ppl=6.67, wps=5892.6, ups=0.09, wpb=64868, bsz=128, num_updates=3949, lr=9.99764e-05, gnorm=2.393, loss_scale=8, train_wall=11, gb_free=2.8, wall=45091 2021-06-19 07:10:28 | INFO | train_inner | epoch 002: 975 / 3002 loss=2.844, ppl=7.18, wps=5830.7, ups=0.09, wpb=64871, bsz=128, num_updates=3950, lr=9.99764e-05, gnorm=2.262, loss_scale=8, train_wall=11, gb_free=2.8, wall=45102 2021-06-19 07:10:39 | INFO | train_inner | epoch 002: 976 / 3002 loss=2.688, ppl=6.44, wps=5721.7, ups=0.09, wpb=64825, bsz=128, num_updates=3951, lr=9.99764e-05, gnorm=2.347, loss_scale=8, train_wall=11, gb_free=2.8, wall=45113 2021-06-19 07:10:50 | INFO | train_inner | epoch 002: 977 / 3002 loss=2.609, ppl=6.1, wps=5902.8, ups=0.09, wpb=64787, bsz=128, num_updates=3952, lr=9.99764e-05, gnorm=2.359, loss_scale=8, train_wall=11, gb_free=2.8, wall=45124 2021-06-19 07:11:01 | INFO | train_inner | epoch 002: 978 / 3002 loss=2.765, ppl=6.8, wps=5908.3, ups=0.09, wpb=64808, bsz=128, num_updates=3953, lr=9.99764e-05, gnorm=2.416, loss_scale=8, train_wall=10, gb_free=2.8, wall=45135 2021-06-19 07:11:12 | INFO | train_inner | epoch 002: 979 / 3002 loss=2.779, ppl=6.86, wps=5805.9, ups=0.09, wpb=64899, bsz=128, num_updates=3954, lr=9.99764e-05, gnorm=2.269, loss_scale=8, train_wall=11, gb_free=2.8, wall=45146 2021-06-19 07:11:23 | INFO | train_inner | epoch 002: 980 / 3002 loss=2.685, ppl=6.43, wps=5881.4, ups=0.09, wpb=64786, bsz=128, num_updates=3955, lr=9.99764e-05, gnorm=2.742, loss_scale=8, train_wall=11, gb_free=2.8, wall=45157 2021-06-19 07:11:34 | INFO | train_inner | epoch 002: 981 / 3002 loss=2.795, ppl=6.94, wps=5819.4, ups=0.09, wpb=64842, bsz=128, num_updates=3956, lr=9.99764e-05, gnorm=2.528, loss_scale=8, train_wall=11, gb_free=2.8, wall=45169 2021-06-19 07:11:45 | INFO | train_inner | epoch 002: 982 / 3002 loss=2.797, ppl=6.95, wps=5786.4, ups=0.09, wpb=64745, bsz=128, num_updates=3957, lr=9.99763e-05, gnorm=3.612, loss_scale=8, train_wall=11, gb_free=2.8, wall=45180 2021-06-19 07:11:56 | INFO | train_inner | epoch 002: 983 / 3002 loss=2.956, ppl=7.76, wps=5973.5, ups=0.09, wpb=64819, bsz=128, num_updates=3958, lr=9.99763e-05, gnorm=2.357, loss_scale=8, train_wall=10, gb_free=2.8, wall=45191 2021-06-19 07:12:07 | INFO | train_inner | epoch 002: 984 / 3002 loss=2.734, ppl=6.65, wps=5948, ups=0.09, wpb=64769, bsz=128, num_updates=3959, lr=9.99763e-05, gnorm=2.261, loss_scale=8, train_wall=10, gb_free=2.8, wall=45201 2021-06-19 07:12:18 | INFO | train_inner | epoch 002: 985 / 3002 loss=2.825, ppl=7.09, wps=5879.2, ups=0.09, wpb=64780, bsz=128, num_updates=3960, lr=9.99763e-05, gnorm=2.199, loss_scale=8, train_wall=11, gb_free=2.8, wall=45212 2021-06-19 07:12:29 | INFO | train_inner | epoch 002: 986 / 3002 loss=2.946, ppl=7.71, wps=5972.9, ups=0.09, wpb=64804, bsz=128, num_updates=3961, lr=9.99763e-05, gnorm=2.916, loss_scale=8, train_wall=10, gb_free=2.8, wall=45223 2021-06-19 07:12:40 | INFO | train_inner | epoch 002: 987 / 3002 loss=2.915, ppl=7.54, wps=5778.3, ups=0.09, wpb=64713, bsz=128, num_updates=3962, lr=9.99763e-05, gnorm=2.183, loss_scale=8, train_wall=11, gb_free=2.8, wall=45235 2021-06-19 07:12:51 | INFO | train_inner | epoch 002: 988 / 3002 loss=2.772, ppl=6.83, wps=5845.3, ups=0.09, wpb=64890, bsz=128, num_updates=3963, lr=9.99763e-05, gnorm=2.335, loss_scale=8, train_wall=11, gb_free=2.8, wall=45246 2021-06-19 07:13:02 | INFO | train_inner | epoch 002: 989 / 3002 loss=2.829, ppl=7.11, wps=5824.3, ups=0.09, wpb=64791, bsz=128, num_updates=3964, lr=9.99763e-05, gnorm=2.229, loss_scale=8, train_wall=11, gb_free=2.8, wall=45257 2021-06-19 07:13:14 | INFO | train_inner | epoch 002: 990 / 3002 loss=2.763, ppl=6.79, wps=5839.3, ups=0.09, wpb=64893, bsz=128, num_updates=3965, lr=9.99763e-05, gnorm=2.244, loss_scale=8, train_wall=11, gb_free=2.8, wall=45268 2021-06-19 07:13:25 | INFO | train_inner | epoch 002: 991 / 3002 loss=2.707, ppl=6.53, wps=5831.8, ups=0.09, wpb=64832, bsz=128, num_updates=3966, lr=9.99763e-05, gnorm=2.187, loss_scale=8, train_wall=11, gb_free=2.8, wall=45279 2021-06-19 07:13:36 | INFO | train_inner | epoch 002: 992 / 3002 loss=2.786, ppl=6.9, wps=5912.2, ups=0.09, wpb=64847, bsz=128, num_updates=3967, lr=9.99763e-05, gnorm=2.268, loss_scale=8, train_wall=11, gb_free=2.8, wall=45290 2021-06-19 07:13:47 | INFO | train_inner | epoch 002: 993 / 3002 loss=2.921, ppl=7.58, wps=5729.9, ups=0.09, wpb=64851, bsz=128, num_updates=3968, lr=9.99763e-05, gnorm=2.293, loss_scale=8, train_wall=11, gb_free=2.8, wall=45301 2021-06-19 07:13:58 | INFO | train_inner | epoch 002: 994 / 3002 loss=2.865, ppl=7.29, wps=5788.6, ups=0.09, wpb=64820, bsz=128, num_updates=3969, lr=9.99762e-05, gnorm=2.27, loss_scale=8, train_wall=11, gb_free=2.8, wall=45312 2021-06-19 07:14:09 | INFO | train_inner | epoch 002: 995 / 3002 loss=2.719, ppl=6.58, wps=5910.9, ups=0.09, wpb=64850, bsz=128, num_updates=3970, lr=9.99762e-05, gnorm=2.258, loss_scale=8, train_wall=11, gb_free=2.8, wall=45323 2021-06-19 07:14:20 | INFO | train_inner | epoch 002: 996 / 3002 loss=2.631, ppl=6.19, wps=5837.4, ups=0.09, wpb=64779, bsz=128, num_updates=3971, lr=9.99762e-05, gnorm=2.425, loss_scale=8, train_wall=11, gb_free=2.8, wall=45335 2021-06-19 07:14:31 | INFO | train_inner | epoch 002: 997 / 3002 loss=2.887, ppl=7.4, wps=6000.1, ups=0.09, wpb=64883, bsz=128, num_updates=3972, lr=9.99762e-05, gnorm=2.299, loss_scale=8, train_wall=10, gb_free=2.8, wall=45345 2021-06-19 07:14:42 | INFO | train_inner | epoch 002: 998 / 3002 loss=2.808, ppl=7, wps=5911.1, ups=0.09, wpb=64920, bsz=128, num_updates=3973, lr=9.99762e-05, gnorm=2.312, loss_scale=8, train_wall=11, gb_free=2.8, wall=45356 2021-06-19 07:14:53 | INFO | train_inner | epoch 002: 999 / 3002 loss=2.727, ppl=6.62, wps=5868.5, ups=0.09, wpb=64863, bsz=128, num_updates=3974, lr=9.99762e-05, gnorm=2.288, loss_scale=8, train_wall=11, gb_free=2.8, wall=45367 2021-06-19 07:15:04 | INFO | train_inner | epoch 002: 1000 / 3002 loss=2.996, ppl=7.98, wps=5801.7, ups=0.09, wpb=64793, bsz=128, num_updates=3975, lr=9.99762e-05, gnorm=2.289, loss_scale=8, train_wall=11, gb_free=2.8, wall=45379 2021-06-19 07:15:15 | INFO | train_inner | epoch 002: 1001 / 3002 loss=2.713, ppl=6.56, wps=5768.5, ups=0.09, wpb=64879, bsz=128, num_updates=3976, lr=9.99762e-05, gnorm=2.579, loss_scale=8, train_wall=11, gb_free=2.8, wall=45390 2021-06-19 07:15:27 | INFO | train_inner | epoch 002: 1002 / 3002 loss=2.811, ppl=7.02, wps=5768.9, ups=0.09, wpb=64817, bsz=128, num_updates=3977, lr=9.99762e-05, gnorm=2.316, loss_scale=8, train_wall=11, gb_free=2.8, wall=45401 2021-06-19 07:15:38 | INFO | train_inner | epoch 002: 1003 / 3002 loss=2.832, ppl=7.12, wps=5913.2, ups=0.09, wpb=64884, bsz=128, num_updates=3978, lr=9.99762e-05, gnorm=2.291, loss_scale=8, train_wall=11, gb_free=2.8, wall=45412 2021-06-19 07:15:49 | INFO | train_inner | epoch 002: 1004 / 3002 loss=2.836, ppl=7.14, wps=5875.6, ups=0.09, wpb=64894, bsz=128, num_updates=3979, lr=9.99762e-05, gnorm=2.301, loss_scale=8, train_wall=11, gb_free=2.8, wall=45423 2021-06-19 07:16:00 | INFO | train_inner | epoch 002: 1005 / 3002 loss=2.779, ppl=6.87, wps=5871.5, ups=0.09, wpb=64865, bsz=128, num_updates=3980, lr=9.99762e-05, gnorm=2.155, loss_scale=8, train_wall=11, gb_free=2.8, wall=45434 2021-06-19 07:16:11 | INFO | train_inner | epoch 002: 1006 / 3002 loss=2.745, ppl=6.7, wps=5877.7, ups=0.09, wpb=64831, bsz=128, num_updates=3981, lr=9.99762e-05, gnorm=2.293, loss_scale=8, train_wall=11, gb_free=2.8, wall=45445 2021-06-19 07:16:22 | INFO | train_inner | epoch 002: 1007 / 3002 loss=2.712, ppl=6.55, wps=5736, ups=0.09, wpb=64891, bsz=128, num_updates=3982, lr=9.99761e-05, gnorm=2.238, loss_scale=8, train_wall=11, gb_free=2.8, wall=45456 2021-06-19 07:16:33 | INFO | train_inner | epoch 002: 1008 / 3002 loss=2.749, ppl=6.72, wps=5895.9, ups=0.09, wpb=64862, bsz=128, num_updates=3983, lr=9.99761e-05, gnorm=2.249, loss_scale=8, train_wall=11, gb_free=2.8, wall=45467 2021-06-19 07:16:44 | INFO | train_inner | epoch 002: 1009 / 3002 loss=2.917, ppl=7.55, wps=5819.6, ups=0.09, wpb=64710, bsz=128, num_updates=3984, lr=9.99761e-05, gnorm=2.294, loss_scale=8, train_wall=11, gb_free=2.8, wall=45479 2021-06-19 07:16:55 | INFO | train_inner | epoch 002: 1010 / 3002 loss=2.702, ppl=6.51, wps=5946.3, ups=0.09, wpb=64865, bsz=128, num_updates=3985, lr=9.99761e-05, gnorm=2.281, loss_scale=8, train_wall=10, gb_free=2.8, wall=45489 2021-06-19 07:17:06 | INFO | train_inner | epoch 002: 1011 / 3002 loss=2.669, ppl=6.36, wps=5891, ups=0.09, wpb=64798, bsz=128, num_updates=3986, lr=9.99761e-05, gnorm=2.267, loss_scale=8, train_wall=11, gb_free=2.8, wall=45500 2021-06-19 07:17:17 | INFO | train_inner | epoch 002: 1012 / 3002 loss=2.74, ppl=6.68, wps=5812.7, ups=0.09, wpb=64816, bsz=128, num_updates=3987, lr=9.99761e-05, gnorm=2.306, loss_scale=8, train_wall=11, gb_free=2.8, wall=45512 2021-06-19 07:17:28 | INFO | train_inner | epoch 002: 1013 / 3002 loss=2.718, ppl=6.58, wps=5873.5, ups=0.09, wpb=64742, bsz=128, num_updates=3988, lr=9.99761e-05, gnorm=2.258, loss_scale=8, train_wall=11, gb_free=2.8, wall=45523 2021-06-19 07:17:39 | INFO | train_inner | epoch 002: 1014 / 3002 loss=2.904, ppl=7.48, wps=5880.5, ups=0.09, wpb=64792, bsz=128, num_updates=3989, lr=9.99761e-05, gnorm=2.235, loss_scale=8, train_wall=11, gb_free=2.8, wall=45534 2021-06-19 07:17:50 | INFO | train_inner | epoch 002: 1015 / 3002 loss=2.816, ppl=7.04, wps=5845.9, ups=0.09, wpb=64810, bsz=128, num_updates=3990, lr=9.99761e-05, gnorm=2.24, loss_scale=8, train_wall=11, gb_free=2.8, wall=45545 2021-06-19 07:18:02 | INFO | train_inner | epoch 002: 1016 / 3002 loss=2.606, ppl=6.09, wps=5848, ups=0.09, wpb=64869, bsz=128, num_updates=3991, lr=9.99761e-05, gnorm=2.294, loss_scale=8, train_wall=11, gb_free=2.8, wall=45556 2021-06-19 07:18:12 | INFO | train_inner | epoch 002: 1017 / 3002 loss=2.779, ppl=6.86, wps=5908.6, ups=0.09, wpb=64832, bsz=128, num_updates=3992, lr=9.99761e-05, gnorm=2.317, loss_scale=8, train_wall=11, gb_free=2.8, wall=45567 2021-06-19 07:18:24 | INFO | train_inner | epoch 002: 1018 / 3002 loss=2.907, ppl=7.5, wps=5832.6, ups=0.09, wpb=64876, bsz=128, num_updates=3993, lr=9.99761e-05, gnorm=2.239, loss_scale=8, train_wall=11, gb_free=2.8, wall=45578 2021-06-19 07:18:35 | INFO | train_inner | epoch 002: 1019 / 3002 loss=2.761, ppl=6.78, wps=5867.1, ups=0.09, wpb=64854, bsz=128, num_updates=3994, lr=9.9976e-05, gnorm=2.288, loss_scale=8, train_wall=11, gb_free=2.8, wall=45589 2021-06-19 07:18:46 | INFO | train_inner | epoch 002: 1020 / 3002 loss=2.768, ppl=6.81, wps=5882.4, ups=0.09, wpb=64906, bsz=128, num_updates=3995, lr=9.9976e-05, gnorm=2.298, loss_scale=8, train_wall=11, gb_free=2.8, wall=45600 2021-06-19 07:18:57 | INFO | train_inner | epoch 002: 1021 / 3002 loss=2.808, ppl=7, wps=5830.4, ups=0.09, wpb=64785, bsz=128, num_updates=3996, lr=9.9976e-05, gnorm=2.179, loss_scale=8, train_wall=11, gb_free=2.8, wall=45611 2021-06-19 07:19:08 | INFO | train_inner | epoch 002: 1022 / 3002 loss=2.879, ppl=7.36, wps=5881.4, ups=0.09, wpb=64848, bsz=128, num_updates=3997, lr=9.9976e-05, gnorm=2.38, loss_scale=8, train_wall=11, gb_free=2.8, wall=45622 2021-06-19 07:19:19 | INFO | train_inner | epoch 002: 1023 / 3002 loss=2.94, ppl=7.67, wps=5793.8, ups=0.09, wpb=64857, bsz=128, num_updates=3998, lr=9.9976e-05, gnorm=2.209, loss_scale=8, train_wall=11, gb_free=2.8, wall=45633 2021-06-19 07:19:30 | INFO | train_inner | epoch 002: 1024 / 3002 loss=2.759, ppl=6.77, wps=5880.8, ups=0.09, wpb=64847, bsz=128, num_updates=3999, lr=9.9976e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=45644 2021-06-19 07:19:41 | INFO | train_inner | epoch 002: 1025 / 3002 loss=2.799, ppl=6.96, wps=5906.4, ups=0.09, wpb=64820, bsz=128, num_updates=4000, lr=9.9976e-05, gnorm=2.238, loss_scale=8, train_wall=11, gb_free=2.8, wall=45655 2021-06-19 07:19:52 | INFO | train_inner | epoch 002: 1026 / 3002 loss=2.761, ppl=6.78, wps=5782.1, ups=0.09, wpb=64881, bsz=128, num_updates=4001, lr=9.9976e-05, gnorm=2.361, loss_scale=8, train_wall=11, gb_free=2.8, wall=45667 2021-06-19 07:20:04 | INFO | train_inner | epoch 002: 1027 / 3002 loss=2.789, ppl=6.91, wps=5696, ups=0.09, wpb=64774, bsz=128, num_updates=4002, lr=9.9976e-05, gnorm=2.499, loss_scale=8, train_wall=11, gb_free=2.8, wall=45678 2021-06-19 07:20:15 | INFO | train_inner | epoch 002: 1028 / 3002 loss=2.844, ppl=7.18, wps=5872.3, ups=0.09, wpb=64825, bsz=128, num_updates=4003, lr=9.9976e-05, gnorm=2.221, loss_scale=8, train_wall=11, gb_free=2.8, wall=45689 2021-06-19 07:20:26 | INFO | train_inner | epoch 002: 1029 / 3002 loss=2.884, ppl=7.38, wps=5888, ups=0.09, wpb=64776, bsz=128, num_updates=4004, lr=9.9976e-05, gnorm=2.282, loss_scale=8, train_wall=11, gb_free=2.8, wall=45700 2021-06-19 07:20:37 | INFO | train_inner | epoch 002: 1030 / 3002 loss=2.823, ppl=7.08, wps=5827.7, ups=0.09, wpb=64875, bsz=128, num_updates=4005, lr=9.9976e-05, gnorm=2.291, loss_scale=8, train_wall=11, gb_free=2.8, wall=45711 2021-06-19 07:20:48 | INFO | train_inner | epoch 002: 1031 / 3002 loss=2.841, ppl=7.17, wps=5790, ups=0.09, wpb=64901, bsz=128, num_updates=4006, lr=9.9976e-05, gnorm=2.217, loss_scale=8, train_wall=11, gb_free=2.8, wall=45722 2021-06-19 07:20:59 | INFO | train_inner | epoch 002: 1032 / 3002 loss=2.922, ppl=7.58, wps=5790.4, ups=0.09, wpb=64755, bsz=128, num_updates=4007, lr=9.99759e-05, gnorm=2.328, loss_scale=8, train_wall=11, gb_free=2.8, wall=45734 2021-06-19 07:21:10 | INFO | train_inner | epoch 002: 1033 / 3002 loss=2.905, ppl=7.49, wps=5774.3, ups=0.09, wpb=64783, bsz=128, num_updates=4008, lr=9.99759e-05, gnorm=2.276, loss_scale=8, train_wall=11, gb_free=2.8, wall=45745 2021-06-19 07:21:22 | INFO | train_inner | epoch 002: 1034 / 3002 loss=2.648, ppl=6.27, wps=5823.2, ups=0.09, wpb=64885, bsz=128, num_updates=4009, lr=9.99759e-05, gnorm=2.2, loss_scale=8, train_wall=11, gb_free=2.8, wall=45756 2021-06-19 07:21:33 | INFO | train_inner | epoch 002: 1035 / 3002 loss=2.682, ppl=6.42, wps=5797.4, ups=0.09, wpb=64769, bsz=128, num_updates=4010, lr=9.99759e-05, gnorm=2.245, loss_scale=8, train_wall=11, gb_free=2.8, wall=45767 2021-06-19 07:21:44 | INFO | train_inner | epoch 002: 1036 / 3002 loss=2.924, ppl=7.59, wps=5838, ups=0.09, wpb=64840, bsz=128, num_updates=4011, lr=9.99759e-05, gnorm=2.217, loss_scale=8, train_wall=11, gb_free=2.8, wall=45778 2021-06-19 07:21:55 | INFO | train_inner | epoch 002: 1037 / 3002 loss=2.79, ppl=6.92, wps=5924.7, ups=0.09, wpb=64829, bsz=128, num_updates=4012, lr=9.99759e-05, gnorm=2.417, loss_scale=8, train_wall=10, gb_free=2.8, wall=45789 2021-06-19 07:22:06 | INFO | train_inner | epoch 002: 1038 / 3002 loss=2.762, ppl=6.78, wps=5779.3, ups=0.09, wpb=64864, bsz=128, num_updates=4013, lr=9.99759e-05, gnorm=2.265, loss_scale=8, train_wall=11, gb_free=2.8, wall=45800 2021-06-19 07:22:17 | INFO | train_inner | epoch 002: 1039 / 3002 loss=2.965, ppl=7.81, wps=5852.4, ups=0.09, wpb=64763, bsz=128, num_updates=4014, lr=9.99759e-05, gnorm=3.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=45811 2021-06-19 07:22:28 | INFO | train_inner | epoch 002: 1040 / 3002 loss=2.727, ppl=6.62, wps=5862.6, ups=0.09, wpb=64860, bsz=128, num_updates=4015, lr=9.99759e-05, gnorm=2.274, loss_scale=8, train_wall=11, gb_free=2.8, wall=45822 2021-06-19 07:22:39 | INFO | train_inner | epoch 002: 1041 / 3002 loss=2.858, ppl=7.25, wps=5787.6, ups=0.09, wpb=64771, bsz=128, num_updates=4016, lr=9.99759e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=45834 2021-06-19 07:22:50 | INFO | train_inner | epoch 002: 1042 / 3002 loss=2.602, ppl=6.07, wps=5946.5, ups=0.09, wpb=64791, bsz=128, num_updates=4017, lr=9.99759e-05, gnorm=2.75, loss_scale=8, train_wall=10, gb_free=2.8, wall=45845 2021-06-19 07:23:01 | INFO | train_inner | epoch 002: 1043 / 3002 loss=2.761, ppl=6.78, wps=5836.2, ups=0.09, wpb=64800, bsz=128, num_updates=4018, lr=9.99759e-05, gnorm=2.205, loss_scale=8, train_wall=11, gb_free=2.8, wall=45856 2021-06-19 07:23:12 | INFO | train_inner | epoch 002: 1044 / 3002 loss=2.738, ppl=6.67, wps=5851.3, ups=0.09, wpb=64840, bsz=128, num_updates=4019, lr=9.99758e-05, gnorm=2.23, loss_scale=8, train_wall=11, gb_free=2.8, wall=45867 2021-06-19 07:23:24 | INFO | train_inner | epoch 002: 1045 / 3002 loss=2.709, ppl=6.54, wps=5785.7, ups=0.09, wpb=64895, bsz=128, num_updates=4020, lr=9.99758e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=45878 2021-06-19 07:23:35 | INFO | train_inner | epoch 002: 1046 / 3002 loss=2.825, ppl=7.08, wps=5848.8, ups=0.09, wpb=64815, bsz=128, num_updates=4021, lr=9.99758e-05, gnorm=2.217, loss_scale=8, train_wall=11, gb_free=2.8, wall=45889 2021-06-19 07:23:46 | INFO | train_inner | epoch 002: 1047 / 3002 loss=2.944, ppl=7.7, wps=5873.4, ups=0.09, wpb=64795, bsz=128, num_updates=4022, lr=9.99758e-05, gnorm=7.503, loss_scale=8, train_wall=11, gb_free=2.8, wall=45900 2021-06-19 07:23:57 | INFO | train_inner | epoch 002: 1048 / 3002 loss=2.799, ppl=6.96, wps=5853.1, ups=0.09, wpb=64861, bsz=128, num_updates=4023, lr=9.99758e-05, gnorm=2.446, loss_scale=8, train_wall=11, gb_free=2.8, wall=45911 2021-06-19 07:24:08 | INFO | train_inner | epoch 002: 1049 / 3002 loss=2.822, ppl=7.07, wps=5822, ups=0.09, wpb=64813, bsz=128, num_updates=4024, lr=9.99758e-05, gnorm=2.461, loss_scale=8, train_wall=11, gb_free=2.8, wall=45922 2021-06-19 07:24:19 | INFO | train_inner | epoch 002: 1050 / 3002 loss=2.616, ppl=6.13, wps=5706.8, ups=0.09, wpb=64743, bsz=128, num_updates=4025, lr=9.99758e-05, gnorm=2.329, loss_scale=8, train_wall=11, gb_free=2.8, wall=45934 2021-06-19 07:24:31 | INFO | train_inner | epoch 002: 1051 / 3002 loss=2.884, ppl=7.38, wps=5778.4, ups=0.09, wpb=64828, bsz=128, num_updates=4026, lr=9.99758e-05, gnorm=3.113, loss_scale=8, train_wall=11, gb_free=2.8, wall=45945 2021-06-19 07:24:41 | INFO | train_inner | epoch 002: 1052 / 3002 loss=2.854, ppl=7.23, wps=6004.6, ups=0.09, wpb=64841, bsz=128, num_updates=4027, lr=9.99758e-05, gnorm=2.764, loss_scale=8, train_wall=10, gb_free=2.8, wall=45956 2021-06-19 07:24:52 | INFO | train_inner | epoch 002: 1053 / 3002 loss=2.72, ppl=6.59, wps=5821.5, ups=0.09, wpb=64823, bsz=128, num_updates=4028, lr=9.99758e-05, gnorm=2.509, loss_scale=8, train_wall=11, gb_free=2.8, wall=45967 2021-06-19 07:25:03 | INFO | train_inner | epoch 002: 1054 / 3002 loss=2.686, ppl=6.43, wps=5942, ups=0.09, wpb=64871, bsz=128, num_updates=4029, lr=9.99758e-05, gnorm=2.505, loss_scale=8, train_wall=10, gb_free=2.8, wall=45978 2021-06-19 07:25:14 | INFO | train_inner | epoch 002: 1055 / 3002 loss=2.75, ppl=6.73, wps=5849.8, ups=0.09, wpb=64864, bsz=128, num_updates=4030, lr=9.99758e-05, gnorm=2.694, loss_scale=8, train_wall=11, gb_free=2.8, wall=45989 2021-06-19 07:25:26 | INFO | train_inner | epoch 002: 1056 / 3002 loss=2.913, ppl=7.53, wps=5806.8, ups=0.09, wpb=64801, bsz=128, num_updates=4031, lr=9.99758e-05, gnorm=2.438, loss_scale=8, train_wall=11, gb_free=2.8, wall=46000 2021-06-19 07:25:37 | INFO | train_inner | epoch 002: 1057 / 3002 loss=2.787, ppl=6.9, wps=5829.3, ups=0.09, wpb=64938, bsz=128, num_updates=4032, lr=9.99757e-05, gnorm=2.319, loss_scale=8, train_wall=11, gb_free=2.8, wall=46011 2021-06-19 07:25:48 | INFO | train_inner | epoch 002: 1058 / 3002 loss=2.639, ppl=6.23, wps=5957.2, ups=0.09, wpb=64837, bsz=128, num_updates=4033, lr=9.99757e-05, gnorm=2.295, loss_scale=8, train_wall=10, gb_free=2.8, wall=46022 2021-06-19 07:25:59 | INFO | train_inner | epoch 002: 1059 / 3002 loss=2.7, ppl=6.5, wps=5875.6, ups=0.09, wpb=64816, bsz=128, num_updates=4034, lr=9.99757e-05, gnorm=2.427, loss_scale=8, train_wall=11, gb_free=2.8, wall=46033 2021-06-19 07:26:10 | INFO | train_inner | epoch 002: 1060 / 3002 loss=2.988, ppl=7.93, wps=5755.5, ups=0.09, wpb=64728, bsz=128, num_updates=4035, lr=9.99757e-05, gnorm=2.57, loss_scale=8, train_wall=11, gb_free=2.8, wall=46044 2021-06-19 07:26:21 | INFO | train_inner | epoch 002: 1061 / 3002 loss=2.681, ppl=6.41, wps=5759.4, ups=0.09, wpb=64695, bsz=128, num_updates=4036, lr=9.99757e-05, gnorm=2.68, loss_scale=8, train_wall=11, gb_free=2.8, wall=46055 2021-06-19 07:26:33 | INFO | train_inner | epoch 002: 1062 / 3002 loss=2.728, ppl=6.63, wps=5677, ups=0.09, wpb=64684, bsz=128, num_updates=4037, lr=9.99757e-05, gnorm=2.258, loss_scale=8, train_wall=11, gb_free=2.8, wall=46067 2021-06-19 07:26:44 | INFO | train_inner | epoch 002: 1063 / 3002 loss=2.751, ppl=6.73, wps=5849.3, ups=0.09, wpb=64940, bsz=128, num_updates=4038, lr=9.99757e-05, gnorm=3.687, loss_scale=8, train_wall=11, gb_free=2.8, wall=46078 2021-06-19 07:26:55 | INFO | train_inner | epoch 002: 1064 / 3002 loss=2.781, ppl=6.87, wps=5818, ups=0.09, wpb=64794, bsz=128, num_updates=4039, lr=9.99757e-05, gnorm=2.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=46089 2021-06-19 07:27:06 | INFO | train_inner | epoch 002: 1065 / 3002 loss=2.728, ppl=6.63, wps=5754, ups=0.09, wpb=64774, bsz=128, num_updates=4040, lr=9.99757e-05, gnorm=2.678, loss_scale=8, train_wall=11, gb_free=2.8, wall=46100 2021-06-19 07:27:17 | INFO | train_inner | epoch 002: 1066 / 3002 loss=2.697, ppl=6.48, wps=5847.8, ups=0.09, wpb=64783, bsz=128, num_updates=4041, lr=9.99757e-05, gnorm=2.406, loss_scale=8, train_wall=11, gb_free=2.8, wall=46111 2021-06-19 07:27:28 | INFO | train_inner | epoch 002: 1067 / 3002 loss=2.877, ppl=7.35, wps=5778.3, ups=0.09, wpb=64842, bsz=128, num_updates=4042, lr=9.99757e-05, gnorm=2.373, loss_scale=8, train_wall=11, gb_free=2.8, wall=46123 2021-06-19 07:27:39 | INFO | train_inner | epoch 002: 1068 / 3002 loss=2.786, ppl=6.89, wps=5834.2, ups=0.09, wpb=64813, bsz=128, num_updates=4043, lr=9.99757e-05, gnorm=4.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=46134 2021-06-19 07:27:50 | INFO | train_inner | epoch 002: 1069 / 3002 loss=2.764, ppl=6.79, wps=5890.7, ups=0.09, wpb=64813, bsz=128, num_updates=4044, lr=9.99756e-05, gnorm=2.516, loss_scale=16, train_wall=11, gb_free=2.8, wall=46145 2021-06-19 07:28:02 | INFO | train_inner | epoch 002: 1070 / 3002 loss=2.742, ppl=6.69, wps=5841.3, ups=0.09, wpb=64797, bsz=128, num_updates=4045, lr=9.99756e-05, gnorm=2.499, loss_scale=16, train_wall=11, gb_free=2.8, wall=46156 2021-06-19 07:28:12 | INFO | train_inner | epoch 002: 1071 / 3002 loss=2.715, ppl=6.57, wps=5935.9, ups=0.09, wpb=64863, bsz=128, num_updates=4046, lr=9.99756e-05, gnorm=2.411, loss_scale=16, train_wall=10, gb_free=2.8, wall=46167 2021-06-19 07:28:24 | INFO | train_inner | epoch 002: 1072 / 3002 loss=2.771, ppl=6.83, wps=5812.2, ups=0.09, wpb=64808, bsz=128, num_updates=4047, lr=9.99756e-05, gnorm=2.489, loss_scale=16, train_wall=11, gb_free=2.8, wall=46178 2021-06-19 07:28:35 | INFO | train_inner | epoch 002: 1073 / 3002 loss=2.911, ppl=7.52, wps=5922, ups=0.09, wpb=64800, bsz=128, num_updates=4048, lr=9.99756e-05, gnorm=2.47, loss_scale=16, train_wall=10, gb_free=2.8, wall=46189 2021-06-19 07:28:45 | INFO | train_inner | epoch 002: 1074 / 3002 loss=2.889, ppl=7.41, wps=6028.8, ups=0.09, wpb=64873, bsz=128, num_updates=4049, lr=9.99756e-05, gnorm=2.297, loss_scale=16, train_wall=10, gb_free=2.8, wall=46200 2021-06-19 07:28:56 | INFO | train_inner | epoch 002: 1075 / 3002 loss=2.819, ppl=7.06, wps=5813.6, ups=0.09, wpb=64813, bsz=128, num_updates=4050, lr=9.99756e-05, gnorm=2.253, loss_scale=16, train_wall=11, gb_free=2.8, wall=46211 2021-06-19 07:29:08 | INFO | train_inner | epoch 002: 1076 / 3002 loss=2.647, ppl=6.26, wps=5853.1, ups=0.09, wpb=64741, bsz=128, num_updates=4051, lr=9.99756e-05, gnorm=2.313, loss_scale=16, train_wall=11, gb_free=2.8, wall=46222 2021-06-19 07:29:19 | INFO | train_inner | epoch 002: 1077 / 3002 loss=2.789, ppl=6.91, wps=5862, ups=0.09, wpb=64813, bsz=128, num_updates=4052, lr=9.99756e-05, gnorm=2.259, loss_scale=16, train_wall=11, gb_free=2.8, wall=46233 2021-06-19 07:29:30 | INFO | train_inner | epoch 002: 1078 / 3002 loss=2.709, ppl=6.54, wps=5802.1, ups=0.09, wpb=64779, bsz=128, num_updates=4053, lr=9.99756e-05, gnorm=2.205, loss_scale=16, train_wall=11, gb_free=2.8, wall=46244 2021-06-19 07:29:41 | INFO | train_inner | epoch 002: 1079 / 3002 loss=2.835, ppl=7.14, wps=5975.6, ups=0.09, wpb=64866, bsz=128, num_updates=4054, lr=9.99756e-05, gnorm=2.225, loss_scale=16, train_wall=10, gb_free=2.8, wall=46255 2021-06-19 07:29:52 | INFO | train_inner | epoch 002: 1080 / 3002 loss=2.784, ppl=6.89, wps=5870.8, ups=0.09, wpb=64853, bsz=128, num_updates=4055, lr=9.99756e-05, gnorm=2.342, loss_scale=16, train_wall=11, gb_free=2.8, wall=46266 2021-06-19 07:30:03 | INFO | train_inner | epoch 002: 1081 / 3002 loss=2.754, ppl=6.74, wps=5805.3, ups=0.09, wpb=64782, bsz=128, num_updates=4056, lr=9.99756e-05, gnorm=2.482, loss_scale=16, train_wall=11, gb_free=2.8, wall=46277 2021-06-19 07:30:14 | INFO | train_inner | epoch 002: 1082 / 3002 loss=2.719, ppl=6.59, wps=5847.7, ups=0.09, wpb=64733, bsz=128, num_updates=4057, lr=9.99755e-05, gnorm=2.338, loss_scale=16, train_wall=11, gb_free=2.8, wall=46288 2021-06-19 07:30:25 | INFO | train_inner | epoch 002: 1083 / 3002 loss=2.707, ppl=6.53, wps=5821.3, ups=0.09, wpb=64845, bsz=128, num_updates=4058, lr=9.99755e-05, gnorm=3.172, loss_scale=16, train_wall=11, gb_free=2.8, wall=46299 2021-06-19 07:30:36 | INFO | train_inner | epoch 002: 1084 / 3002 loss=2.766, ppl=6.8, wps=5896.9, ups=0.09, wpb=64782, bsz=128, num_updates=4059, lr=9.99755e-05, gnorm=2.466, loss_scale=16, train_wall=11, gb_free=2.8, wall=46310 2021-06-19 07:30:47 | INFO | train_inner | epoch 002: 1085 / 3002 loss=2.817, ppl=7.05, wps=5698.8, ups=0.09, wpb=64836, bsz=128, num_updates=4060, lr=9.99755e-05, gnorm=2.576, loss_scale=16, train_wall=11, gb_free=2.8, wall=46322 2021-06-19 07:30:59 | INFO | train_inner | epoch 002: 1086 / 3002 loss=2.804, ppl=6.98, wps=5791.2, ups=0.09, wpb=64843, bsz=128, num_updates=4061, lr=9.99755e-05, gnorm=2.467, loss_scale=16, train_wall=11, gb_free=2.8, wall=46333 2021-06-19 07:31:10 | INFO | train_inner | epoch 002: 1087 / 3002 loss=2.883, ppl=7.38, wps=5862, ups=0.09, wpb=64884, bsz=128, num_updates=4062, lr=9.99755e-05, gnorm=2.594, loss_scale=16, train_wall=11, gb_free=2.8, wall=46344 2021-06-19 07:31:21 | INFO | train_inner | epoch 002: 1088 / 3002 loss=2.843, ppl=7.18, wps=5697.4, ups=0.09, wpb=64789, bsz=128, num_updates=4063, lr=9.99755e-05, gnorm=2.358, loss_scale=16, train_wall=11, gb_free=2.8, wall=46355 2021-06-19 07:31:32 | INFO | train_inner | epoch 002: 1089 / 3002 loss=2.847, ppl=7.19, wps=5862.2, ups=0.09, wpb=64828, bsz=128, num_updates=4064, lr=9.99755e-05, gnorm=2.217, loss_scale=16, train_wall=11, gb_free=2.8, wall=46366 2021-06-19 07:31:43 | INFO | train_inner | epoch 002: 1090 / 3002 loss=2.822, ppl=7.07, wps=5765.1, ups=0.09, wpb=64890, bsz=128, num_updates=4065, lr=9.99755e-05, gnorm=2.364, loss_scale=16, train_wall=11, gb_free=2.8, wall=46378 2021-06-19 07:31:54 | INFO | train_inner | epoch 002: 1091 / 3002 loss=2.812, ppl=7.02, wps=5871.1, ups=0.09, wpb=64894, bsz=128, num_updates=4066, lr=9.99755e-05, gnorm=2.358, loss_scale=16, train_wall=11, gb_free=2.8, wall=46389 2021-06-19 07:32:05 | INFO | train_inner | epoch 002: 1092 / 3002 loss=2.825, ppl=7.09, wps=5846.6, ups=0.09, wpb=64799, bsz=128, num_updates=4067, lr=9.99755e-05, gnorm=3.747, loss_scale=16, train_wall=11, gb_free=2.8, wall=46400 2021-06-19 07:32:17 | INFO | train_inner | epoch 002: 1093 / 3002 loss=2.601, ppl=6.07, wps=5849.7, ups=0.09, wpb=64768, bsz=128, num_updates=4068, lr=9.99755e-05, gnorm=2.689, loss_scale=16, train_wall=11, gb_free=2.8, wall=46411 2021-06-19 07:32:28 | INFO | train_inner | epoch 002: 1094 / 3002 loss=2.736, ppl=6.66, wps=5823.2, ups=0.09, wpb=64820, bsz=128, num_updates=4069, lr=9.99754e-05, gnorm=2.21, loss_scale=16, train_wall=11, gb_free=2.8, wall=46422 2021-06-19 07:32:39 | INFO | train_inner | epoch 002: 1095 / 3002 loss=2.876, ppl=7.34, wps=5887.4, ups=0.09, wpb=64913, bsz=128, num_updates=4070, lr=9.99754e-05, gnorm=2.723, loss_scale=16, train_wall=11, gb_free=2.8, wall=46433 2021-06-19 07:32:50 | INFO | train_inner | epoch 002: 1096 / 3002 loss=2.846, ppl=7.19, wps=5724.7, ups=0.09, wpb=64821, bsz=128, num_updates=4071, lr=9.99754e-05, gnorm=2.669, loss_scale=16, train_wall=11, gb_free=2.8, wall=46444 2021-06-19 07:33:01 | INFO | train_inner | epoch 002: 1097 / 3002 loss=2.756, ppl=6.76, wps=5732.6, ups=0.09, wpb=64860, bsz=128, num_updates=4072, lr=9.99754e-05, gnorm=2.477, loss_scale=16, train_wall=11, gb_free=2.8, wall=46456 2021-06-19 07:33:12 | INFO | train_inner | epoch 002: 1098 / 3002 loss=2.751, ppl=6.73, wps=5827.9, ups=0.09, wpb=64802, bsz=128, num_updates=4073, lr=9.99754e-05, gnorm=2.217, loss_scale=16, train_wall=11, gb_free=2.8, wall=46467 2021-06-19 07:33:23 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-19 07:33:35 | INFO | train_inner | epoch 002: 1100 / 3002 loss=2.719, ppl=6.58, wps=2932.4, ups=0.05, wpb=64770, bsz=128, num_updates=4074, lr=9.99754e-05, gnorm=2.312, loss_scale=8, train_wall=21, gb_free=2.8, wall=46489 2021-06-19 07:33:46 | INFO | train_inner | epoch 002: 1101 / 3002 loss=2.618, ppl=6.14, wps=5902.8, ups=0.09, wpb=64837, bsz=128, num_updates=4075, lr=9.99754e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=46500 2021-06-19 07:33:57 | INFO | train_inner | epoch 002: 1102 / 3002 loss=2.605, ppl=6.08, wps=5852.1, ups=0.09, wpb=64819, bsz=128, num_updates=4076, lr=9.99754e-05, gnorm=2.51, loss_scale=8, train_wall=11, gb_free=2.8, wall=46511 2021-06-19 07:34:08 | INFO | train_inner | epoch 002: 1103 / 3002 loss=2.743, ppl=6.69, wps=5833.1, ups=0.09, wpb=64832, bsz=128, num_updates=4077, lr=9.99754e-05, gnorm=2.28, loss_scale=8, train_wall=11, gb_free=2.8, wall=46522 2021-06-19 07:34:19 | INFO | train_inner | epoch 002: 1104 / 3002 loss=2.816, ppl=7.04, wps=5824.9, ups=0.09, wpb=64799, bsz=128, num_updates=4078, lr=9.99754e-05, gnorm=2.278, loss_scale=8, train_wall=11, gb_free=2.8, wall=46533 2021-06-19 07:34:30 | INFO | train_inner | epoch 002: 1105 / 3002 loss=2.792, ppl=6.93, wps=5777.8, ups=0.09, wpb=64732, bsz=128, num_updates=4079, lr=9.99754e-05, gnorm=2.285, loss_scale=8, train_wall=11, gb_free=2.8, wall=46544 2021-06-19 07:34:41 | INFO | train_inner | epoch 002: 1106 / 3002 loss=2.857, ppl=7.25, wps=5836.9, ups=0.09, wpb=64780, bsz=128, num_updates=4080, lr=9.99754e-05, gnorm=2.148, loss_scale=8, train_wall=11, gb_free=2.8, wall=46556 2021-06-19 07:34:52 | INFO | train_inner | epoch 002: 1107 / 3002 loss=2.857, ppl=7.25, wps=5793.8, ups=0.09, wpb=64821, bsz=128, num_updates=4081, lr=9.99754e-05, gnorm=2.191, loss_scale=8, train_wall=11, gb_free=2.8, wall=46567 2021-06-19 07:35:03 | INFO | train_inner | epoch 002: 1108 / 3002 loss=2.848, ppl=7.2, wps=5832.6, ups=0.09, wpb=64913, bsz=128, num_updates=4082, lr=9.99753e-05, gnorm=2.81, loss_scale=8, train_wall=11, gb_free=2.8, wall=46578 2021-06-19 07:35:15 | INFO | train_inner | epoch 002: 1109 / 3002 loss=2.708, ppl=6.53, wps=5872.9, ups=0.09, wpb=64849, bsz=128, num_updates=4083, lr=9.99753e-05, gnorm=2.183, loss_scale=8, train_wall=11, gb_free=2.8, wall=46589 2021-06-19 07:35:25 | INFO | train_inner | epoch 002: 1110 / 3002 loss=2.85, ppl=7.21, wps=5933.3, ups=0.09, wpb=64865, bsz=128, num_updates=4084, lr=9.99753e-05, gnorm=2.312, loss_scale=8, train_wall=10, gb_free=2.8, wall=46600 2021-06-19 07:35:36 | INFO | train_inner | epoch 002: 1111 / 3002 loss=2.694, ppl=6.47, wps=6014.9, ups=0.09, wpb=64792, bsz=128, num_updates=4085, lr=9.99753e-05, gnorm=2.244, loss_scale=8, train_wall=10, gb_free=2.8, wall=46611 2021-06-19 07:35:47 | INFO | train_inner | epoch 002: 1112 / 3002 loss=2.705, ppl=6.52, wps=5805.1, ups=0.09, wpb=64806, bsz=128, num_updates=4086, lr=9.99753e-05, gnorm=2.223, loss_scale=8, train_wall=11, gb_free=2.8, wall=46622 2021-06-19 07:35:59 | INFO | train_inner | epoch 002: 1113 / 3002 loss=2.715, ppl=6.57, wps=5819.1, ups=0.09, wpb=64778, bsz=128, num_updates=4087, lr=9.99753e-05, gnorm=2.229, loss_scale=8, train_wall=11, gb_free=2.8, wall=46633 2021-06-19 07:36:10 | INFO | train_inner | epoch 002: 1114 / 3002 loss=2.963, ppl=7.8, wps=5860.5, ups=0.09, wpb=64813, bsz=128, num_updates=4088, lr=9.99753e-05, gnorm=2.309, loss_scale=8, train_wall=11, gb_free=2.8, wall=46644 2021-06-19 07:36:21 | INFO | train_inner | epoch 002: 1115 / 3002 loss=2.858, ppl=7.25, wps=5754.7, ups=0.09, wpb=64846, bsz=128, num_updates=4089, lr=9.99753e-05, gnorm=2.317, loss_scale=8, train_wall=11, gb_free=2.8, wall=46655 2021-06-19 07:36:32 | INFO | train_inner | epoch 002: 1116 / 3002 loss=2.768, ppl=6.81, wps=5831.4, ups=0.09, wpb=64876, bsz=128, num_updates=4090, lr=9.99753e-05, gnorm=2.288, loss_scale=8, train_wall=11, gb_free=2.8, wall=46666 2021-06-19 07:36:43 | INFO | train_inner | epoch 002: 1117 / 3002 loss=2.747, ppl=6.71, wps=5830.8, ups=0.09, wpb=64770, bsz=128, num_updates=4091, lr=9.99753e-05, gnorm=2.345, loss_scale=8, train_wall=11, gb_free=2.8, wall=46677 2021-06-19 07:36:54 | INFO | train_inner | epoch 002: 1118 / 3002 loss=2.855, ppl=7.24, wps=5772, ups=0.09, wpb=64822, bsz=128, num_updates=4092, lr=9.99753e-05, gnorm=2.483, loss_scale=8, train_wall=11, gb_free=2.8, wall=46689 2021-06-19 07:37:05 | INFO | train_inner | epoch 002: 1119 / 3002 loss=2.875, ppl=7.34, wps=5887.8, ups=0.09, wpb=64808, bsz=128, num_updates=4093, lr=9.99753e-05, gnorm=2.34, loss_scale=8, train_wall=11, gb_free=2.8, wall=46700 2021-06-19 07:37:16 | INFO | train_inner | epoch 002: 1120 / 3002 loss=2.654, ppl=6.29, wps=5907.6, ups=0.09, wpb=64755, bsz=128, num_updates=4094, lr=9.99752e-05, gnorm=2.379, loss_scale=8, train_wall=10, gb_free=2.8, wall=46711 2021-06-19 07:37:27 | INFO | train_inner | epoch 002: 1121 / 3002 loss=2.829, ppl=7.11, wps=5899.9, ups=0.09, wpb=64818, bsz=128, num_updates=4095, lr=9.99752e-05, gnorm=2.355, loss_scale=8, train_wall=11, gb_free=2.8, wall=46722 2021-06-19 07:37:38 | INFO | train_inner | epoch 002: 1122 / 3002 loss=2.803, ppl=6.98, wps=5919.2, ups=0.09, wpb=64840, bsz=128, num_updates=4096, lr=9.99752e-05, gnorm=2.327, loss_scale=8, train_wall=11, gb_free=2.8, wall=46733 2021-06-19 07:37:49 | INFO | train_inner | epoch 002: 1123 / 3002 loss=2.881, ppl=7.37, wps=5854.6, ups=0.09, wpb=64838, bsz=128, num_updates=4097, lr=9.99752e-05, gnorm=2.364, loss_scale=8, train_wall=11, gb_free=2.8, wall=46744 2021-06-19 07:38:00 | INFO | train_inner | epoch 002: 1124 / 3002 loss=2.706, ppl=6.52, wps=5832.6, ups=0.09, wpb=64915, bsz=128, num_updates=4098, lr=9.99752e-05, gnorm=2.31, loss_scale=8, train_wall=11, gb_free=2.8, wall=46755 2021-06-19 07:38:12 | INFO | train_inner | epoch 002: 1125 / 3002 loss=2.718, ppl=6.58, wps=5843.3, ups=0.09, wpb=64870, bsz=128, num_updates=4099, lr=9.99752e-05, gnorm=2.228, loss_scale=8, train_wall=11, gb_free=2.8, wall=46766 2021-06-19 07:38:22 | INFO | train_inner | epoch 002: 1126 / 3002 loss=2.708, ppl=6.53, wps=5927.2, ups=0.09, wpb=64826, bsz=128, num_updates=4100, lr=9.99752e-05, gnorm=2.314, loss_scale=8, train_wall=10, gb_free=2.8, wall=46777 2021-06-19 07:38:34 | INFO | train_inner | epoch 002: 1127 / 3002 loss=2.817, ppl=7.05, wps=5864.6, ups=0.09, wpb=64857, bsz=128, num_updates=4101, lr=9.99752e-05, gnorm=2.32, loss_scale=8, train_wall=11, gb_free=2.8, wall=46788 2021-06-19 07:38:45 | INFO | train_inner | epoch 002: 1128 / 3002 loss=2.724, ppl=6.61, wps=5830.9, ups=0.09, wpb=64830, bsz=128, num_updates=4102, lr=9.99752e-05, gnorm=2.229, loss_scale=8, train_wall=11, gb_free=2.8, wall=46799 2021-06-19 07:38:56 | INFO | train_inner | epoch 002: 1129 / 3002 loss=2.894, ppl=7.43, wps=5805.2, ups=0.09, wpb=64780, bsz=128, num_updates=4103, lr=9.99752e-05, gnorm=2.228, loss_scale=8, train_wall=11, gb_free=2.8, wall=46810 2021-06-19 07:39:07 | INFO | train_inner | epoch 002: 1130 / 3002 loss=2.77, ppl=6.82, wps=5776.5, ups=0.09, wpb=64855, bsz=128, num_updates=4104, lr=9.99752e-05, gnorm=2.298, loss_scale=8, train_wall=11, gb_free=2.8, wall=46821 2021-06-19 07:39:18 | INFO | train_inner | epoch 002: 1131 / 3002 loss=2.636, ppl=6.22, wps=5812.3, ups=0.09, wpb=64898, bsz=128, num_updates=4105, lr=9.99752e-05, gnorm=2.232, loss_scale=8, train_wall=11, gb_free=2.8, wall=46833 2021-06-19 07:39:29 | INFO | train_inner | epoch 002: 1132 / 3002 loss=2.82, ppl=7.06, wps=5873.8, ups=0.09, wpb=64788, bsz=128, num_updates=4106, lr=9.99752e-05, gnorm=2.21, loss_scale=8, train_wall=11, gb_free=2.8, wall=46844 2021-06-19 07:39:40 | INFO | train_inner | epoch 002: 1133 / 3002 loss=2.879, ppl=7.36, wps=5772.1, ups=0.09, wpb=64787, bsz=128, num_updates=4107, lr=9.99751e-05, gnorm=2.382, loss_scale=8, train_wall=11, gb_free=2.8, wall=46855 2021-06-19 07:39:52 | INFO | train_inner | epoch 002: 1134 / 3002 loss=3.042, ppl=8.23, wps=5806.2, ups=0.09, wpb=64756, bsz=128, num_updates=4108, lr=9.99751e-05, gnorm=2.35, loss_scale=8, train_wall=11, gb_free=2.8, wall=46866 2021-06-19 07:40:03 | INFO | train_inner | epoch 002: 1135 / 3002 loss=2.854, ppl=7.23, wps=5797.3, ups=0.09, wpb=64818, bsz=128, num_updates=4109, lr=9.99751e-05, gnorm=2.362, loss_scale=8, train_wall=11, gb_free=2.8, wall=46877 2021-06-19 07:40:14 | INFO | train_inner | epoch 002: 1136 / 3002 loss=2.799, ppl=6.96, wps=5867.8, ups=0.09, wpb=64886, bsz=128, num_updates=4110, lr=9.99751e-05, gnorm=2.295, loss_scale=8, train_wall=11, gb_free=2.8, wall=46888 2021-06-19 07:40:25 | INFO | train_inner | epoch 002: 1137 / 3002 loss=2.753, ppl=6.74, wps=5918.7, ups=0.09, wpb=64796, bsz=128, num_updates=4111, lr=9.99751e-05, gnorm=2.54, loss_scale=8, train_wall=10, gb_free=2.8, wall=46899 2021-06-19 07:40:36 | INFO | train_inner | epoch 002: 1138 / 3002 loss=2.843, ppl=7.17, wps=5888.5, ups=0.09, wpb=64816, bsz=128, num_updates=4112, lr=9.99751e-05, gnorm=2.465, loss_scale=8, train_wall=11, gb_free=2.8, wall=46910 2021-06-19 07:40:47 | INFO | train_inner | epoch 002: 1139 / 3002 loss=2.693, ppl=6.47, wps=5859.7, ups=0.09, wpb=64810, bsz=128, num_updates=4113, lr=9.99751e-05, gnorm=2.117, loss_scale=8, train_wall=11, gb_free=2.8, wall=46921 2021-06-19 07:40:58 | INFO | train_inner | epoch 002: 1140 / 3002 loss=2.678, ppl=6.4, wps=5886.3, ups=0.09, wpb=64810, bsz=128, num_updates=4114, lr=9.99751e-05, gnorm=2.231, loss_scale=8, train_wall=11, gb_free=2.8, wall=46932 2021-06-19 07:41:09 | INFO | train_inner | epoch 002: 1141 / 3002 loss=2.634, ppl=6.21, wps=5825.7, ups=0.09, wpb=64899, bsz=128, num_updates=4115, lr=9.99751e-05, gnorm=2.183, loss_scale=8, train_wall=11, gb_free=2.8, wall=46943 2021-06-19 07:41:20 | INFO | train_inner | epoch 002: 1142 / 3002 loss=2.77, ppl=6.82, wps=5889.6, ups=0.09, wpb=64876, bsz=128, num_updates=4116, lr=9.99751e-05, gnorm=2.17, loss_scale=8, train_wall=11, gb_free=2.8, wall=46954 2021-06-19 07:41:31 | INFO | train_inner | epoch 002: 1143 / 3002 loss=2.852, ppl=7.22, wps=5817.4, ups=0.09, wpb=64849, bsz=128, num_updates=4117, lr=9.99751e-05, gnorm=2.329, loss_scale=8, train_wall=11, gb_free=2.8, wall=46966 2021-06-19 07:41:42 | INFO | train_inner | epoch 002: 1144 / 3002 loss=2.778, ppl=6.86, wps=5778.3, ups=0.09, wpb=64903, bsz=128, num_updates=4118, lr=9.99751e-05, gnorm=2.487, loss_scale=8, train_wall=11, gb_free=2.8, wall=46977 2021-06-19 07:41:54 | INFO | train_inner | epoch 002: 1145 / 3002 loss=2.709, ppl=6.54, wps=5805.8, ups=0.09, wpb=64849, bsz=128, num_updates=4119, lr=9.9975e-05, gnorm=2.275, loss_scale=8, train_wall=11, gb_free=2.8, wall=46988 2021-06-19 07:42:05 | INFO | train_inner | epoch 002: 1146 / 3002 loss=2.729, ppl=6.63, wps=5910.2, ups=0.09, wpb=64860, bsz=128, num_updates=4120, lr=9.9975e-05, gnorm=2.122, loss_scale=8, train_wall=11, gb_free=2.8, wall=46999 2021-06-19 07:42:16 | INFO | train_inner | epoch 002: 1147 / 3002 loss=2.817, ppl=7.05, wps=5847.6, ups=0.09, wpb=64833, bsz=128, num_updates=4121, lr=9.9975e-05, gnorm=2.168, loss_scale=8, train_wall=11, gb_free=2.8, wall=47010 2021-06-19 07:42:27 | INFO | train_inner | epoch 002: 1148 / 3002 loss=2.751, ppl=6.73, wps=5781.1, ups=0.09, wpb=64846, bsz=128, num_updates=4122, lr=9.9975e-05, gnorm=2.254, loss_scale=8, train_wall=11, gb_free=2.8, wall=47021 2021-06-19 07:42:38 | INFO | train_inner | epoch 002: 1149 / 3002 loss=2.604, ppl=6.08, wps=5826.7, ups=0.09, wpb=64812, bsz=128, num_updates=4123, lr=9.9975e-05, gnorm=2.152, loss_scale=8, train_wall=11, gb_free=2.8, wall=47032 2021-06-19 07:42:49 | INFO | train_inner | epoch 002: 1150 / 3002 loss=2.774, ppl=6.84, wps=5772.8, ups=0.09, wpb=64875, bsz=128, num_updates=4124, lr=9.9975e-05, gnorm=2.172, loss_scale=8, train_wall=11, gb_free=2.8, wall=47044 2021-06-19 07:43:00 | INFO | train_inner | epoch 002: 1151 / 3002 loss=2.708, ppl=6.54, wps=5910.5, ups=0.09, wpb=64824, bsz=128, num_updates=4125, lr=9.9975e-05, gnorm=2.236, loss_scale=8, train_wall=11, gb_free=2.8, wall=47055 2021-06-19 07:43:11 | INFO | train_inner | epoch 002: 1152 / 3002 loss=2.685, ppl=6.43, wps=5896.4, ups=0.09, wpb=64807, bsz=128, num_updates=4126, lr=9.9975e-05, gnorm=2.191, loss_scale=8, train_wall=11, gb_free=2.8, wall=47066 2021-06-19 07:43:22 | INFO | train_inner | epoch 002: 1153 / 3002 loss=2.807, ppl=7, wps=5908.5, ups=0.09, wpb=64805, bsz=128, num_updates=4127, lr=9.9975e-05, gnorm=2.263, loss_scale=8, train_wall=11, gb_free=2.8, wall=47076 2021-06-19 07:43:33 | INFO | train_inner | epoch 002: 1154 / 3002 loss=2.791, ppl=6.92, wps=5945.5, ups=0.09, wpb=64781, bsz=128, num_updates=4128, lr=9.9975e-05, gnorm=2.356, loss_scale=8, train_wall=10, gb_free=2.8, wall=47087 2021-06-19 07:43:44 | INFO | train_inner | epoch 002: 1155 / 3002 loss=2.815, ppl=7.03, wps=5972.5, ups=0.09, wpb=64852, bsz=128, num_updates=4129, lr=9.9975e-05, gnorm=2.312, loss_scale=8, train_wall=10, gb_free=2.8, wall=47098 2021-06-19 07:43:55 | INFO | train_inner | epoch 002: 1156 / 3002 loss=2.675, ppl=6.39, wps=5947, ups=0.09, wpb=64838, bsz=128, num_updates=4130, lr=9.9975e-05, gnorm=2.362, loss_scale=8, train_wall=10, gb_free=2.8, wall=47109 2021-06-19 07:44:06 | INFO | train_inner | epoch 002: 1157 / 3002 loss=2.883, ppl=7.37, wps=5832.8, ups=0.09, wpb=64786, bsz=128, num_updates=4131, lr=9.99749e-05, gnorm=2.311, loss_scale=8, train_wall=11, gb_free=2.8, wall=47120 2021-06-19 07:44:17 | INFO | train_inner | epoch 002: 1158 / 3002 loss=2.732, ppl=6.64, wps=5935.1, ups=0.09, wpb=64937, bsz=128, num_updates=4132, lr=9.99749e-05, gnorm=2.189, loss_scale=8, train_wall=11, gb_free=2.8, wall=47131 2021-06-19 07:44:28 | INFO | train_inner | epoch 002: 1159 / 3002 loss=2.771, ppl=6.82, wps=5850.7, ups=0.09, wpb=64823, bsz=128, num_updates=4133, lr=9.99749e-05, gnorm=2.172, loss_scale=8, train_wall=11, gb_free=2.8, wall=47142 2021-06-19 07:44:39 | INFO | train_inner | epoch 002: 1160 / 3002 loss=2.762, ppl=6.78, wps=5912.2, ups=0.09, wpb=64817, bsz=128, num_updates=4134, lr=9.99749e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=47153 2021-06-19 07:44:50 | INFO | train_inner | epoch 002: 1161 / 3002 loss=2.808, ppl=7, wps=5849.2, ups=0.09, wpb=64782, bsz=128, num_updates=4135, lr=9.99749e-05, gnorm=2.24, loss_scale=8, train_wall=11, gb_free=2.8, wall=47164 2021-06-19 07:45:01 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-19 07:45:12 | INFO | train_inner | epoch 002: 1163 / 3002 loss=2.849, ppl=7.2, wps=2971.6, ups=0.05, wpb=64865, bsz=128, num_updates=4136, lr=9.99749e-05, gnorm=2.266, loss_scale=4, train_wall=21, gb_free=2.8, wall=47186 2021-06-19 07:45:23 | INFO | train_inner | epoch 002: 1164 / 3002 loss=2.744, ppl=6.7, wps=5869.2, ups=0.09, wpb=64836, bsz=128, num_updates=4137, lr=9.99749e-05, gnorm=2.286, loss_scale=4, train_wall=11, gb_free=2.8, wall=47197 2021-06-19 07:45:34 | INFO | train_inner | epoch 002: 1165 / 3002 loss=2.838, ppl=7.15, wps=5820.6, ups=0.09, wpb=64833, bsz=128, num_updates=4138, lr=9.99749e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=47208 2021-06-19 07:45:45 | INFO | train_inner | epoch 002: 1166 / 3002 loss=2.785, ppl=6.89, wps=5844.1, ups=0.09, wpb=64839, bsz=128, num_updates=4139, lr=9.99749e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=47219 2021-06-19 07:45:56 | INFO | train_inner | epoch 002: 1167 / 3002 loss=2.809, ppl=7.01, wps=5823.7, ups=0.09, wpb=64816, bsz=128, num_updates=4140, lr=9.99749e-05, gnorm=2.172, loss_scale=4, train_wall=11, gb_free=2.8, wall=47231 2021-06-19 07:46:07 | INFO | train_inner | epoch 002: 1168 / 3002 loss=2.906, ppl=7.5, wps=5850, ups=0.09, wpb=64856, bsz=128, num_updates=4141, lr=9.99749e-05, gnorm=2.127, loss_scale=4, train_wall=11, gb_free=2.8, wall=47242 2021-06-19 07:46:18 | INFO | train_inner | epoch 002: 1169 / 3002 loss=2.797, ppl=6.95, wps=5933.1, ups=0.09, wpb=64844, bsz=128, num_updates=4142, lr=9.99749e-05, gnorm=2.312, loss_scale=4, train_wall=10, gb_free=2.8, wall=47253 2021-06-19 07:46:29 | INFO | train_inner | epoch 002: 1170 / 3002 loss=2.765, ppl=6.8, wps=5929.6, ups=0.09, wpb=64855, bsz=128, num_updates=4143, lr=9.99749e-05, gnorm=2.258, loss_scale=4, train_wall=11, gb_free=2.8, wall=47264 2021-06-19 07:46:40 | INFO | train_inner | epoch 002: 1171 / 3002 loss=2.817, ppl=7.05, wps=5774.8, ups=0.09, wpb=64812, bsz=128, num_updates=4144, lr=9.99748e-05, gnorm=2.255, loss_scale=4, train_wall=11, gb_free=2.8, wall=47275 2021-06-19 07:46:51 | INFO | train_inner | epoch 002: 1172 / 3002 loss=2.671, ppl=6.37, wps=5870.7, ups=0.09, wpb=64893, bsz=128, num_updates=4145, lr=9.99748e-05, gnorm=2.258, loss_scale=4, train_wall=11, gb_free=2.8, wall=47286 2021-06-19 07:47:03 | INFO | train_inner | epoch 002: 1173 / 3002 loss=2.736, ppl=6.66, wps=5848, ups=0.09, wpb=64867, bsz=128, num_updates=4146, lr=9.99748e-05, gnorm=2.284, loss_scale=4, train_wall=11, gb_free=2.8, wall=47297 2021-06-19 07:47:14 | INFO | train_inner | epoch 002: 1174 / 3002 loss=2.907, ppl=7.5, wps=5808.2, ups=0.09, wpb=64794, bsz=128, num_updates=4147, lr=9.99748e-05, gnorm=2.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=47308 2021-06-19 07:47:25 | INFO | train_inner | epoch 002: 1175 / 3002 loss=2.709, ppl=6.54, wps=5837.1, ups=0.09, wpb=64810, bsz=128, num_updates=4148, lr=9.99748e-05, gnorm=2.274, loss_scale=4, train_wall=11, gb_free=2.8, wall=47319 2021-06-19 07:47:36 | INFO | train_inner | epoch 002: 1176 / 3002 loss=2.749, ppl=6.72, wps=5872.1, ups=0.09, wpb=64845, bsz=128, num_updates=4149, lr=9.99748e-05, gnorm=2.214, loss_scale=4, train_wall=11, gb_free=2.8, wall=47330 2021-06-19 07:47:47 | INFO | train_inner | epoch 002: 1177 / 3002 loss=2.726, ppl=6.62, wps=5882, ups=0.09, wpb=64834, bsz=128, num_updates=4150, lr=9.99748e-05, gnorm=2.268, loss_scale=4, train_wall=11, gb_free=2.8, wall=47341 2021-06-19 07:47:58 | INFO | train_inner | epoch 002: 1178 / 3002 loss=2.728, ppl=6.63, wps=5928.8, ups=0.09, wpb=64832, bsz=128, num_updates=4151, lr=9.99748e-05, gnorm=2.268, loss_scale=4, train_wall=10, gb_free=2.8, wall=47352 2021-06-19 07:48:09 | INFO | train_inner | epoch 002: 1179 / 3002 loss=2.768, ppl=6.81, wps=5844.4, ups=0.09, wpb=64871, bsz=128, num_updates=4152, lr=9.99748e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=47363 2021-06-19 07:48:20 | INFO | train_inner | epoch 002: 1180 / 3002 loss=2.968, ppl=7.82, wps=5850.4, ups=0.09, wpb=64736, bsz=128, num_updates=4153, lr=9.99748e-05, gnorm=2.331, loss_scale=4, train_wall=11, gb_free=2.8, wall=47374 2021-06-19 07:48:31 | INFO | train_inner | epoch 002: 1181 / 3002 loss=2.691, ppl=6.46, wps=5781.3, ups=0.09, wpb=64831, bsz=128, num_updates=4154, lr=9.99748e-05, gnorm=2.307, loss_scale=4, train_wall=11, gb_free=2.8, wall=47386 2021-06-19 07:48:42 | INFO | train_inner | epoch 002: 1182 / 3002 loss=2.561, ppl=5.9, wps=5837.9, ups=0.09, wpb=64763, bsz=128, num_updates=4155, lr=9.99748e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=47397 2021-06-19 07:48:53 | INFO | train_inner | epoch 002: 1183 / 3002 loss=2.859, ppl=7.26, wps=5821.1, ups=0.09, wpb=64900, bsz=128, num_updates=4156, lr=9.99747e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=47408 2021-06-19 07:49:05 | INFO | train_inner | epoch 002: 1184 / 3002 loss=2.66, ppl=6.32, wps=5819.5, ups=0.09, wpb=64793, bsz=128, num_updates=4157, lr=9.99747e-05, gnorm=2.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=47419 2021-06-19 07:49:16 | INFO | train_inner | epoch 002: 1185 / 3002 loss=2.816, ppl=7.04, wps=5865.3, ups=0.09, wpb=64804, bsz=128, num_updates=4158, lr=9.99747e-05, gnorm=2.259, loss_scale=4, train_wall=11, gb_free=2.8, wall=47430 2021-06-19 07:49:27 | INFO | train_inner | epoch 002: 1186 / 3002 loss=2.871, ppl=7.32, wps=5885.5, ups=0.09, wpb=64871, bsz=128, num_updates=4159, lr=9.99747e-05, gnorm=2.204, loss_scale=4, train_wall=11, gb_free=2.8, wall=47441 2021-06-19 07:49:38 | INFO | train_inner | epoch 002: 1187 / 3002 loss=2.722, ppl=6.6, wps=5869.9, ups=0.09, wpb=64746, bsz=128, num_updates=4160, lr=9.99747e-05, gnorm=2.268, loss_scale=4, train_wall=11, gb_free=2.8, wall=47452 2021-06-19 07:49:49 | INFO | train_inner | epoch 002: 1188 / 3002 loss=2.873, ppl=7.32, wps=5883.1, ups=0.09, wpb=64891, bsz=128, num_updates=4161, lr=9.99747e-05, gnorm=3.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=47463 2021-06-19 07:50:00 | INFO | train_inner | epoch 002: 1189 / 3002 loss=2.757, ppl=6.76, wps=5963, ups=0.09, wpb=64905, bsz=128, num_updates=4162, lr=9.99747e-05, gnorm=2.207, loss_scale=4, train_wall=10, gb_free=2.8, wall=47474 2021-06-19 07:50:11 | INFO | train_inner | epoch 002: 1190 / 3002 loss=2.737, ppl=6.67, wps=5815.7, ups=0.09, wpb=64838, bsz=128, num_updates=4163, lr=9.99747e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=47485 2021-06-19 07:50:22 | INFO | train_inner | epoch 002: 1191 / 3002 loss=2.741, ppl=6.69, wps=5935.6, ups=0.09, wpb=64856, bsz=128, num_updates=4164, lr=9.99747e-05, gnorm=2.254, loss_scale=4, train_wall=10, gb_free=2.8, wall=47496 2021-06-19 07:50:33 | INFO | train_inner | epoch 002: 1192 / 3002 loss=2.742, ppl=6.69, wps=5884.2, ups=0.09, wpb=64806, bsz=128, num_updates=4165, lr=9.99747e-05, gnorm=2.189, loss_scale=4, train_wall=11, gb_free=2.8, wall=47507 2021-06-19 07:50:44 | INFO | train_inner | epoch 002: 1193 / 3002 loss=2.783, ppl=6.88, wps=5881.2, ups=0.09, wpb=64780, bsz=128, num_updates=4166, lr=9.99747e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=47518 2021-06-19 07:50:55 | INFO | train_inner | epoch 002: 1194 / 3002 loss=2.7, ppl=6.5, wps=5874.4, ups=0.09, wpb=64871, bsz=128, num_updates=4167, lr=9.99747e-05, gnorm=2.404, loss_scale=4, train_wall=11, gb_free=2.8, wall=47529 2021-06-19 07:51:06 | INFO | train_inner | epoch 002: 1195 / 3002 loss=2.672, ppl=6.37, wps=5854.6, ups=0.09, wpb=64858, bsz=128, num_updates=4168, lr=9.99747e-05, gnorm=2.29, loss_scale=4, train_wall=11, gb_free=2.8, wall=47540 2021-06-19 07:51:17 | INFO | train_inner | epoch 002: 1196 / 3002 loss=2.849, ppl=7.2, wps=5949.5, ups=0.09, wpb=64955, bsz=128, num_updates=4169, lr=9.99746e-05, gnorm=2.327, loss_scale=4, train_wall=10, gb_free=2.8, wall=47551 2021-06-19 07:51:28 | INFO | train_inner | epoch 002: 1197 / 3002 loss=2.899, ppl=7.46, wps=5817, ups=0.09, wpb=64808, bsz=128, num_updates=4170, lr=9.99746e-05, gnorm=2.202, loss_scale=4, train_wall=11, gb_free=2.8, wall=47562 2021-06-19 07:51:39 | INFO | train_inner | epoch 002: 1198 / 3002 loss=2.858, ppl=7.25, wps=5806.1, ups=0.09, wpb=64916, bsz=128, num_updates=4171, lr=9.99746e-05, gnorm=2.206, loss_scale=4, train_wall=11, gb_free=2.8, wall=47573 2021-06-19 07:51:50 | INFO | train_inner | epoch 002: 1199 / 3002 loss=2.78, ppl=6.87, wps=5818.2, ups=0.09, wpb=64822, bsz=128, num_updates=4172, lr=9.99746e-05, gnorm=2.372, loss_scale=4, train_wall=11, gb_free=2.8, wall=47585 2021-06-19 07:52:01 | INFO | train_inner | epoch 002: 1200 / 3002 loss=2.855, ppl=7.24, wps=5815.3, ups=0.09, wpb=64709, bsz=128, num_updates=4173, lr=9.99746e-05, gnorm=2.759, loss_scale=4, train_wall=11, gb_free=2.8, wall=47596 2021-06-19 07:52:13 | INFO | train_inner | epoch 002: 1201 / 3002 loss=2.75, ppl=6.73, wps=5774.6, ups=0.09, wpb=64805, bsz=128, num_updates=4174, lr=9.99746e-05, gnorm=2.273, loss_scale=4, train_wall=11, gb_free=2.8, wall=47607 2021-06-19 07:52:24 | INFO | train_inner | epoch 002: 1202 / 3002 loss=2.899, ppl=7.46, wps=5795.7, ups=0.09, wpb=64846, bsz=128, num_updates=4175, lr=9.99746e-05, gnorm=2.325, loss_scale=4, train_wall=11, gb_free=2.8, wall=47618 2021-06-19 07:52:35 | INFO | train_inner | epoch 002: 1203 / 3002 loss=2.831, ppl=7.12, wps=5779.4, ups=0.09, wpb=64785, bsz=128, num_updates=4176, lr=9.99746e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=47629 2021-06-19 07:52:46 | INFO | train_inner | epoch 002: 1204 / 3002 loss=2.885, ppl=7.39, wps=5774.2, ups=0.09, wpb=64783, bsz=128, num_updates=4177, lr=9.99746e-05, gnorm=2.247, loss_scale=4, train_wall=11, gb_free=2.8, wall=47640 2021-06-19 07:52:57 | INFO | train_inner | epoch 002: 1205 / 3002 loss=2.808, ppl=7, wps=5863.1, ups=0.09, wpb=64777, bsz=128, num_updates=4178, lr=9.99746e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=47652 2021-06-19 07:53:08 | INFO | train_inner | epoch 002: 1206 / 3002 loss=2.67, ppl=6.37, wps=5835.4, ups=0.09, wpb=64868, bsz=128, num_updates=4179, lr=9.99746e-05, gnorm=2.296, loss_scale=4, train_wall=11, gb_free=2.8, wall=47663 2021-06-19 07:53:19 | INFO | train_inner | epoch 002: 1207 / 3002 loss=2.814, ppl=7.03, wps=5864.7, ups=0.09, wpb=64760, bsz=128, num_updates=4180, lr=9.99746e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=47674 2021-06-19 07:53:30 | INFO | train_inner | epoch 002: 1208 / 3002 loss=2.862, ppl=7.27, wps=5885.8, ups=0.09, wpb=64864, bsz=128, num_updates=4181, lr=9.99745e-05, gnorm=2.467, loss_scale=4, train_wall=11, gb_free=2.8, wall=47685 2021-06-19 07:53:41 | INFO | train_inner | epoch 002: 1209 / 3002 loss=2.871, ppl=7.31, wps=5927.5, ups=0.09, wpb=64823, bsz=128, num_updates=4182, lr=9.99745e-05, gnorm=2.267, loss_scale=4, train_wall=10, gb_free=2.8, wall=47696 2021-06-19 07:53:52 | INFO | train_inner | epoch 002: 1210 / 3002 loss=2.598, ppl=6.06, wps=5960.7, ups=0.09, wpb=64823, bsz=128, num_updates=4183, lr=9.99745e-05, gnorm=2.105, loss_scale=4, train_wall=10, gb_free=2.8, wall=47707 2021-06-19 07:54:03 | INFO | train_inner | epoch 002: 1211 / 3002 loss=2.816, ppl=7.04, wps=5914.8, ups=0.09, wpb=64846, bsz=128, num_updates=4184, lr=9.99745e-05, gnorm=2.236, loss_scale=4, train_wall=11, gb_free=2.8, wall=47718 2021-06-19 07:54:14 | INFO | train_inner | epoch 002: 1212 / 3002 loss=2.827, ppl=7.09, wps=5918.9, ups=0.09, wpb=64838, bsz=128, num_updates=4185, lr=9.99745e-05, gnorm=2.242, loss_scale=4, train_wall=10, gb_free=2.8, wall=47728 2021-06-19 07:54:25 | INFO | train_inner | epoch 002: 1213 / 3002 loss=2.946, ppl=7.71, wps=5908.6, ups=0.09, wpb=64770, bsz=128, num_updates=4186, lr=9.99745e-05, gnorm=2.269, loss_scale=4, train_wall=11, gb_free=2.8, wall=47739 2021-06-19 07:54:36 | INFO | train_inner | epoch 002: 1214 / 3002 loss=2.605, ppl=6.09, wps=5843.1, ups=0.09, wpb=64877, bsz=128, num_updates=4187, lr=9.99745e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=47751 2021-06-19 07:54:47 | INFO | train_inner | epoch 002: 1215 / 3002 loss=2.873, ppl=7.33, wps=5908.9, ups=0.09, wpb=64859, bsz=128, num_updates=4188, lr=9.99745e-05, gnorm=2.272, loss_scale=4, train_wall=11, gb_free=2.8, wall=47761 2021-06-19 07:54:58 | INFO | train_inner | epoch 002: 1216 / 3002 loss=2.674, ppl=6.38, wps=5800.4, ups=0.09, wpb=64827, bsz=128, num_updates=4189, lr=9.99745e-05, gnorm=2.538, loss_scale=4, train_wall=11, gb_free=2.8, wall=47773 2021-06-19 07:55:10 | INFO | train_inner | epoch 002: 1217 / 3002 loss=2.788, ppl=6.91, wps=5810.4, ups=0.09, wpb=64876, bsz=128, num_updates=4190, lr=9.99745e-05, gnorm=2.296, loss_scale=4, train_wall=11, gb_free=2.8, wall=47784 2021-06-19 07:55:21 | INFO | train_inner | epoch 002: 1218 / 3002 loss=2.815, ppl=7.04, wps=5805.4, ups=0.09, wpb=64763, bsz=128, num_updates=4191, lr=9.99745e-05, gnorm=2.281, loss_scale=4, train_wall=11, gb_free=2.8, wall=47795 2021-06-19 07:55:32 | INFO | train_inner | epoch 002: 1219 / 3002 loss=2.833, ppl=7.12, wps=5787.9, ups=0.09, wpb=64858, bsz=128, num_updates=4192, lr=9.99745e-05, gnorm=2.293, loss_scale=4, train_wall=11, gb_free=2.8, wall=47806 2021-06-19 07:55:43 | INFO | train_inner | epoch 002: 1220 / 3002 loss=2.785, ppl=6.89, wps=5877.8, ups=0.09, wpb=64828, bsz=128, num_updates=4193, lr=9.99745e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=47817 2021-06-19 07:55:54 | INFO | train_inner | epoch 002: 1221 / 3002 loss=2.825, ppl=7.09, wps=5812.2, ups=0.09, wpb=64871, bsz=128, num_updates=4194, lr=9.99744e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=47828 2021-06-19 07:56:05 | INFO | train_inner | epoch 002: 1222 / 3002 loss=2.734, ppl=6.65, wps=5828.1, ups=0.09, wpb=64799, bsz=128, num_updates=4195, lr=9.99744e-05, gnorm=2.332, loss_scale=4, train_wall=11, gb_free=2.8, wall=47840 2021-06-19 07:56:16 | INFO | train_inner | epoch 002: 1223 / 3002 loss=2.826, ppl=7.09, wps=5850.2, ups=0.09, wpb=64837, bsz=128, num_updates=4196, lr=9.99744e-05, gnorm=2.447, loss_scale=4, train_wall=11, gb_free=2.8, wall=47851 2021-06-19 07:56:27 | INFO | train_inner | epoch 002: 1224 / 3002 loss=2.785, ppl=6.89, wps=5931.3, ups=0.09, wpb=64835, bsz=128, num_updates=4197, lr=9.99744e-05, gnorm=2.268, loss_scale=4, train_wall=10, gb_free=2.8, wall=47862 2021-06-19 07:56:38 | INFO | train_inner | epoch 002: 1225 / 3002 loss=2.608, ppl=6.1, wps=5865.9, ups=0.09, wpb=64832, bsz=128, num_updates=4198, lr=9.99744e-05, gnorm=2.18, loss_scale=4, train_wall=11, gb_free=2.8, wall=47873 2021-06-19 07:56:49 | INFO | train_inner | epoch 002: 1226 / 3002 loss=2.709, ppl=6.54, wps=5957.1, ups=0.09, wpb=64846, bsz=128, num_updates=4199, lr=9.99744e-05, gnorm=2.346, loss_scale=4, train_wall=10, gb_free=2.8, wall=47883 2021-06-19 07:57:00 | INFO | train_inner | epoch 002: 1227 / 3002 loss=2.859, ppl=7.26, wps=5786.1, ups=0.09, wpb=64800, bsz=128, num_updates=4200, lr=9.99744e-05, gnorm=2.287, loss_scale=4, train_wall=11, gb_free=2.8, wall=47895 2021-06-19 07:57:12 | INFO | train_inner | epoch 002: 1228 / 3002 loss=2.79, ppl=6.92, wps=5787.2, ups=0.09, wpb=64807, bsz=128, num_updates=4201, lr=9.99744e-05, gnorm=2.376, loss_scale=4, train_wall=11, gb_free=2.8, wall=47906 2021-06-19 07:57:22 | INFO | train_inner | epoch 002: 1229 / 3002 loss=2.968, ppl=7.82, wps=5922.5, ups=0.09, wpb=64777, bsz=128, num_updates=4202, lr=9.99744e-05, gnorm=2.256, loss_scale=4, train_wall=11, gb_free=2.8, wall=47917 2021-06-19 07:57:33 | INFO | train_inner | epoch 002: 1230 / 3002 loss=2.641, ppl=6.24, wps=5897.6, ups=0.09, wpb=64894, bsz=128, num_updates=4203, lr=9.99744e-05, gnorm=2.219, loss_scale=4, train_wall=11, gb_free=2.8, wall=47928 2021-06-19 07:57:45 | INFO | train_inner | epoch 002: 1231 / 3002 loss=2.785, ppl=6.89, wps=5865.2, ups=0.09, wpb=64796, bsz=128, num_updates=4204, lr=9.99744e-05, gnorm=2.315, loss_scale=4, train_wall=11, gb_free=2.8, wall=47939 2021-06-19 07:57:56 | INFO | train_inner | epoch 002: 1232 / 3002 loss=2.822, ppl=7.07, wps=5812.6, ups=0.09, wpb=64845, bsz=128, num_updates=4205, lr=9.99744e-05, gnorm=2.279, loss_scale=4, train_wall=11, gb_free=2.8, wall=47950 2021-06-19 07:58:07 | INFO | train_inner | epoch 002: 1233 / 3002 loss=2.795, ppl=6.94, wps=5828.2, ups=0.09, wpb=64853, bsz=128, num_updates=4206, lr=9.99743e-05, gnorm=2.157, loss_scale=4, train_wall=11, gb_free=2.8, wall=47961 2021-06-19 07:58:18 | INFO | train_inner | epoch 002: 1234 / 3002 loss=2.893, ppl=7.43, wps=5959, ups=0.09, wpb=64856, bsz=128, num_updates=4207, lr=9.99743e-05, gnorm=2.204, loss_scale=4, train_wall=10, gb_free=2.8, wall=47972 2021-06-19 07:58:29 | INFO | train_inner | epoch 002: 1235 / 3002 loss=2.621, ppl=6.15, wps=5822.5, ups=0.09, wpb=64819, bsz=128, num_updates=4208, lr=9.99743e-05, gnorm=2.33, loss_scale=4, train_wall=11, gb_free=2.8, wall=47983 2021-06-19 07:58:40 | INFO | train_inner | epoch 002: 1236 / 3002 loss=2.728, ppl=6.63, wps=5789.4, ups=0.09, wpb=64826, bsz=128, num_updates=4209, lr=9.99743e-05, gnorm=2.34, loss_scale=4, train_wall=11, gb_free=2.8, wall=47994 2021-06-19 07:58:51 | INFO | train_inner | epoch 002: 1237 / 3002 loss=2.817, ppl=7.05, wps=5806.4, ups=0.09, wpb=64858, bsz=128, num_updates=4210, lr=9.99743e-05, gnorm=2.325, loss_scale=4, train_wall=11, gb_free=2.8, wall=48006 2021-06-19 07:59:02 | INFO | train_inner | epoch 002: 1238 / 3002 loss=2.841, ppl=7.17, wps=5852.1, ups=0.09, wpb=64781, bsz=128, num_updates=4211, lr=9.99743e-05, gnorm=2.319, loss_scale=4, train_wall=11, gb_free=2.8, wall=48017 2021-06-19 07:59:13 | INFO | train_inner | epoch 002: 1239 / 3002 loss=2.64, ppl=6.24, wps=5932.7, ups=0.09, wpb=64898, bsz=128, num_updates=4212, lr=9.99743e-05, gnorm=2.396, loss_scale=4, train_wall=10, gb_free=2.8, wall=48028 2021-06-19 07:59:24 | INFO | train_inner | epoch 002: 1240 / 3002 loss=2.817, ppl=7.05, wps=5933.9, ups=0.09, wpb=64784, bsz=128, num_updates=4213, lr=9.99743e-05, gnorm=2.286, loss_scale=4, train_wall=10, gb_free=2.8, wall=48038 2021-06-19 07:59:35 | INFO | train_inner | epoch 002: 1241 / 3002 loss=2.745, ppl=6.7, wps=5760.2, ups=0.09, wpb=64856, bsz=128, num_updates=4214, lr=9.99743e-05, gnorm=2.369, loss_scale=4, train_wall=11, gb_free=2.8, wall=48050 2021-06-19 07:59:46 | INFO | train_inner | epoch 002: 1242 / 3002 loss=2.747, ppl=6.71, wps=5871.8, ups=0.09, wpb=64833, bsz=128, num_updates=4215, lr=9.99743e-05, gnorm=2.255, loss_scale=4, train_wall=11, gb_free=2.8, wall=48061 2021-06-19 07:59:57 | INFO | train_inner | epoch 002: 1243 / 3002 loss=2.898, ppl=7.46, wps=5845.1, ups=0.09, wpb=64772, bsz=128, num_updates=4216, lr=9.99743e-05, gnorm=2.437, loss_scale=4, train_wall=11, gb_free=2.8, wall=48072 2021-06-19 08:00:08 | INFO | train_inner | epoch 002: 1244 / 3002 loss=2.709, ppl=6.54, wps=5979.9, ups=0.09, wpb=64781, bsz=128, num_updates=4217, lr=9.99743e-05, gnorm=2.321, loss_scale=4, train_wall=10, gb_free=2.8, wall=48083 2021-06-19 08:00:19 | INFO | train_inner | epoch 002: 1245 / 3002 loss=2.9, ppl=7.46, wps=5806.5, ups=0.09, wpb=64773, bsz=128, num_updates=4218, lr=9.99743e-05, gnorm=2.286, loss_scale=4, train_wall=11, gb_free=2.8, wall=48094 2021-06-19 08:00:31 | INFO | train_inner | epoch 002: 1246 / 3002 loss=2.822, ppl=7.07, wps=5786.1, ups=0.09, wpb=64745, bsz=128, num_updates=4219, lr=9.99742e-05, gnorm=2.227, loss_scale=4, train_wall=11, gb_free=2.8, wall=48105 2021-06-19 08:00:42 | INFO | train_inner | epoch 002: 1247 / 3002 loss=2.731, ppl=6.64, wps=5761, ups=0.09, wpb=64847, bsz=128, num_updates=4220, lr=9.99742e-05, gnorm=2.339, loss_scale=4, train_wall=11, gb_free=2.8, wall=48116 2021-06-19 08:00:53 | INFO | train_inner | epoch 002: 1248 / 3002 loss=2.815, ppl=7.04, wps=5945.2, ups=0.09, wpb=64847, bsz=128, num_updates=4221, lr=9.99742e-05, gnorm=2.178, loss_scale=4, train_wall=10, gb_free=2.8, wall=48127 2021-06-19 08:01:04 | INFO | train_inner | epoch 002: 1249 / 3002 loss=2.892, ppl=7.43, wps=5890, ups=0.09, wpb=64874, bsz=128, num_updates=4222, lr=9.99742e-05, gnorm=2.318, loss_scale=4, train_wall=11, gb_free=2.8, wall=48138 2021-06-19 08:01:15 | INFO | train_inner | epoch 002: 1250 / 3002 loss=2.769, ppl=6.82, wps=5895.8, ups=0.09, wpb=64801, bsz=128, num_updates=4223, lr=9.99742e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=48149 2021-06-19 08:01:26 | INFO | train_inner | epoch 002: 1251 / 3002 loss=2.755, ppl=6.75, wps=5763.6, ups=0.09, wpb=64836, bsz=128, num_updates=4224, lr=9.99742e-05, gnorm=2.355, loss_scale=4, train_wall=11, gb_free=2.8, wall=48160 2021-06-19 08:01:37 | INFO | train_inner | epoch 002: 1252 / 3002 loss=2.833, ppl=7.13, wps=5786.5, ups=0.09, wpb=64778, bsz=128, num_updates=4225, lr=9.99742e-05, gnorm=2.338, loss_scale=4, train_wall=11, gb_free=2.8, wall=48172 2021-06-19 08:01:48 | INFO | train_inner | epoch 002: 1253 / 3002 loss=2.796, ppl=6.95, wps=5936, ups=0.09, wpb=64846, bsz=128, num_updates=4226, lr=9.99742e-05, gnorm=2.19, loss_scale=4, train_wall=10, gb_free=2.8, wall=48183 2021-06-19 08:01:59 | INFO | train_inner | epoch 002: 1254 / 3002 loss=2.912, ppl=7.53, wps=5857.9, ups=0.09, wpb=64821, bsz=128, num_updates=4227, lr=9.99742e-05, gnorm=2.306, loss_scale=4, train_wall=11, gb_free=2.8, wall=48194 2021-06-19 08:02:10 | INFO | train_inner | epoch 002: 1255 / 3002 loss=2.664, ppl=6.34, wps=5828.4, ups=0.09, wpb=64791, bsz=128, num_updates=4228, lr=9.99742e-05, gnorm=2.189, loss_scale=4, train_wall=11, gb_free=2.8, wall=48205 2021-06-19 08:02:21 | INFO | train_inner | epoch 002: 1256 / 3002 loss=2.728, ppl=6.63, wps=5974.4, ups=0.09, wpb=64814, bsz=128, num_updates=4229, lr=9.99742e-05, gnorm=2.142, loss_scale=4, train_wall=10, gb_free=2.8, wall=48216 2021-06-19 08:02:32 | INFO | train_inner | epoch 002: 1257 / 3002 loss=2.675, ppl=6.39, wps=5939.7, ups=0.09, wpb=64867, bsz=128, num_updates=4230, lr=9.99742e-05, gnorm=2.183, loss_scale=4, train_wall=10, gb_free=2.8, wall=48227 2021-06-19 08:02:43 | INFO | train_inner | epoch 002: 1258 / 3002 loss=2.9, ppl=7.46, wps=5894.4, ups=0.09, wpb=64789, bsz=128, num_updates=4231, lr=9.99741e-05, gnorm=2.252, loss_scale=4, train_wall=11, gb_free=2.8, wall=48237 2021-06-19 08:02:54 | INFO | train_inner | epoch 002: 1259 / 3002 loss=2.667, ppl=6.35, wps=5745, ups=0.09, wpb=64844, bsz=128, num_updates=4232, lr=9.99741e-05, gnorm=2.217, loss_scale=4, train_wall=11, gb_free=2.8, wall=48249 2021-06-19 08:03:06 | INFO | train_inner | epoch 002: 1260 / 3002 loss=2.78, ppl=6.87, wps=5837.8, ups=0.09, wpb=64792, bsz=128, num_updates=4233, lr=9.99741e-05, gnorm=2.256, loss_scale=4, train_wall=11, gb_free=2.8, wall=48260 2021-06-19 08:03:17 | INFO | train_inner | epoch 002: 1261 / 3002 loss=2.799, ppl=6.96, wps=5813.5, ups=0.09, wpb=64852, bsz=128, num_updates=4234, lr=9.99741e-05, gnorm=2.263, loss_scale=4, train_wall=11, gb_free=2.8, wall=48271 2021-06-19 08:03:28 | INFO | train_inner | epoch 002: 1262 / 3002 loss=2.789, ppl=6.91, wps=5870.8, ups=0.09, wpb=64759, bsz=128, num_updates=4235, lr=9.99741e-05, gnorm=2.254, loss_scale=4, train_wall=11, gb_free=2.8, wall=48282 2021-06-19 08:03:39 | INFO | train_inner | epoch 002: 1263 / 3002 loss=2.566, ppl=5.92, wps=5909, ups=0.09, wpb=64926, bsz=128, num_updates=4236, lr=9.99741e-05, gnorm=2.162, loss_scale=4, train_wall=11, gb_free=2.8, wall=48293 2021-06-19 08:03:50 | INFO | train_inner | epoch 002: 1264 / 3002 loss=2.742, ppl=6.69, wps=5852.9, ups=0.09, wpb=64921, bsz=128, num_updates=4237, lr=9.99741e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=48304 2021-06-19 08:04:01 | INFO | train_inner | epoch 002: 1265 / 3002 loss=2.679, ppl=6.41, wps=5860.5, ups=0.09, wpb=64790, bsz=128, num_updates=4238, lr=9.99741e-05, gnorm=2.236, loss_scale=4, train_wall=11, gb_free=2.8, wall=48315 2021-06-19 08:04:12 | INFO | train_inner | epoch 002: 1266 / 3002 loss=2.632, ppl=6.2, wps=5806.4, ups=0.09, wpb=64780, bsz=128, num_updates=4239, lr=9.99741e-05, gnorm=2.214, loss_scale=4, train_wall=11, gb_free=2.8, wall=48326 2021-06-19 08:04:23 | INFO | train_inner | epoch 002: 1267 / 3002 loss=2.607, ppl=6.09, wps=5794, ups=0.09, wpb=64834, bsz=128, num_updates=4240, lr=9.99741e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=48338 2021-06-19 08:04:35 | INFO | train_inner | epoch 002: 1268 / 3002 loss=2.943, ppl=7.69, wps=5743.9, ups=0.09, wpb=64892, bsz=128, num_updates=4241, lr=9.99741e-05, gnorm=2.288, loss_scale=4, train_wall=11, gb_free=2.8, wall=48349 2021-06-19 08:04:45 | INFO | train_inner | epoch 002: 1269 / 3002 loss=2.799, ppl=6.96, wps=5935.2, ups=0.09, wpb=64762, bsz=128, num_updates=4242, lr=9.99741e-05, gnorm=2.301, loss_scale=4, train_wall=10, gb_free=2.8, wall=48360 2021-06-19 08:04:56 | INFO | train_inner | epoch 002: 1270 / 3002 loss=2.682, ppl=6.42, wps=5873.2, ups=0.09, wpb=64824, bsz=128, num_updates=4243, lr=9.99741e-05, gnorm=2.73, loss_scale=4, train_wall=11, gb_free=2.8, wall=48371 2021-06-19 08:05:08 | INFO | train_inner | epoch 002: 1271 / 3002 loss=2.641, ppl=6.24, wps=5847.8, ups=0.09, wpb=64822, bsz=128, num_updates=4244, lr=9.9974e-05, gnorm=2.336, loss_scale=4, train_wall=11, gb_free=2.8, wall=48382 2021-06-19 08:05:19 | INFO | train_inner | epoch 002: 1272 / 3002 loss=2.737, ppl=6.67, wps=5804.8, ups=0.09, wpb=64851, bsz=128, num_updates=4245, lr=9.9974e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=48393 2021-06-19 08:05:30 | INFO | train_inner | epoch 002: 1273 / 3002 loss=2.782, ppl=6.88, wps=5898.5, ups=0.09, wpb=64861, bsz=128, num_updates=4246, lr=9.9974e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=48404 2021-06-19 08:05:41 | INFO | train_inner | epoch 002: 1274 / 3002 loss=2.747, ppl=6.71, wps=5870.6, ups=0.09, wpb=64858, bsz=128, num_updates=4247, lr=9.9974e-05, gnorm=2.273, loss_scale=4, train_wall=11, gb_free=2.8, wall=48415 2021-06-19 08:05:52 | INFO | train_inner | epoch 002: 1275 / 3002 loss=2.857, ppl=7.25, wps=5942.7, ups=0.09, wpb=64878, bsz=128, num_updates=4248, lr=9.9974e-05, gnorm=2.234, loss_scale=4, train_wall=10, gb_free=2.8, wall=48426 2021-06-19 08:06:03 | INFO | train_inner | epoch 002: 1276 / 3002 loss=2.79, ppl=6.92, wps=5766.6, ups=0.09, wpb=64790, bsz=128, num_updates=4249, lr=9.9974e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=48437 2021-06-19 08:06:14 | INFO | train_inner | epoch 002: 1277 / 3002 loss=2.617, ppl=6.13, wps=5910.2, ups=0.09, wpb=64882, bsz=128, num_updates=4250, lr=9.9974e-05, gnorm=2.286, loss_scale=4, train_wall=11, gb_free=2.8, wall=48448 2021-06-19 08:06:25 | INFO | train_inner | epoch 002: 1278 / 3002 loss=2.646, ppl=6.26, wps=5847.5, ups=0.09, wpb=64878, bsz=128, num_updates=4251, lr=9.9974e-05, gnorm=2.285, loss_scale=4, train_wall=11, gb_free=2.8, wall=48459 2021-06-19 08:06:36 | INFO | train_inner | epoch 002: 1279 / 3002 loss=2.712, ppl=6.55, wps=5782, ups=0.09, wpb=64816, bsz=128, num_updates=4252, lr=9.9974e-05, gnorm=2.236, loss_scale=4, train_wall=11, gb_free=2.8, wall=48471 2021-06-19 08:06:47 | INFO | train_inner | epoch 002: 1280 / 3002 loss=2.751, ppl=6.73, wps=5885, ups=0.09, wpb=64833, bsz=128, num_updates=4253, lr=9.9974e-05, gnorm=2.31, loss_scale=4, train_wall=11, gb_free=2.8, wall=48482 2021-06-19 08:06:58 | INFO | train_inner | epoch 002: 1281 / 3002 loss=2.833, ppl=7.13, wps=5871.4, ups=0.09, wpb=64810, bsz=128, num_updates=4254, lr=9.9974e-05, gnorm=2.397, loss_scale=4, train_wall=11, gb_free=2.8, wall=48493 2021-06-19 08:07:09 | INFO | train_inner | epoch 002: 1282 / 3002 loss=2.678, ppl=6.4, wps=5829, ups=0.09, wpb=64932, bsz=128, num_updates=4255, lr=9.9974e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=48504 2021-06-19 08:07:21 | INFO | train_inner | epoch 002: 1283 / 3002 loss=2.813, ppl=7.03, wps=5749.7, ups=0.09, wpb=64811, bsz=128, num_updates=4256, lr=9.99739e-05, gnorm=2.216, loss_scale=4, train_wall=11, gb_free=2.8, wall=48515 2021-06-19 08:07:32 | INFO | train_inner | epoch 002: 1284 / 3002 loss=2.678, ppl=6.4, wps=5787.2, ups=0.09, wpb=64783, bsz=128, num_updates=4257, lr=9.99739e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=48526 2021-06-19 08:07:43 | INFO | train_inner | epoch 002: 1285 / 3002 loss=2.712, ppl=6.55, wps=5874.2, ups=0.09, wpb=64804, bsz=128, num_updates=4258, lr=9.99739e-05, gnorm=2.24, loss_scale=4, train_wall=11, gb_free=2.8, wall=48537 2021-06-19 08:07:54 | INFO | train_inner | epoch 002: 1286 / 3002 loss=2.746, ppl=6.71, wps=5757.4, ups=0.09, wpb=64883, bsz=128, num_updates=4259, lr=9.99739e-05, gnorm=2.263, loss_scale=4, train_wall=11, gb_free=2.8, wall=48548 2021-06-19 08:08:05 | INFO | train_inner | epoch 002: 1287 / 3002 loss=2.914, ppl=7.54, wps=5865.3, ups=0.09, wpb=64831, bsz=128, num_updates=4260, lr=9.99739e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=48560 2021-06-19 08:08:16 | INFO | train_inner | epoch 002: 1288 / 3002 loss=2.684, ppl=6.43, wps=5816.7, ups=0.09, wpb=64835, bsz=128, num_updates=4261, lr=9.99739e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=48571 2021-06-19 08:08:27 | INFO | train_inner | epoch 002: 1289 / 3002 loss=2.733, ppl=6.65, wps=5829.6, ups=0.09, wpb=64872, bsz=128, num_updates=4262, lr=9.99739e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=48582 2021-06-19 08:08:38 | INFO | train_inner | epoch 002: 1290 / 3002 loss=2.612, ppl=6.11, wps=5896.8, ups=0.09, wpb=64804, bsz=128, num_updates=4263, lr=9.99739e-05, gnorm=2.192, loss_scale=8, train_wall=11, gb_free=2.8, wall=48593 2021-06-19 08:08:49 | INFO | train_inner | epoch 002: 1291 / 3002 loss=2.903, ppl=7.48, wps=6005.2, ups=0.09, wpb=64809, bsz=128, num_updates=4264, lr=9.99739e-05, gnorm=2.499, loss_scale=8, train_wall=10, gb_free=2.8, wall=48604 2021-06-19 08:09:00 | INFO | train_inner | epoch 002: 1292 / 3002 loss=2.7, ppl=6.5, wps=5845.5, ups=0.09, wpb=64801, bsz=128, num_updates=4265, lr=9.99739e-05, gnorm=2.519, loss_scale=8, train_wall=11, gb_free=2.8, wall=48615 2021-06-19 08:09:12 | INFO | train_inner | epoch 002: 1293 / 3002 loss=2.715, ppl=6.57, wps=5807.8, ups=0.09, wpb=64826, bsz=128, num_updates=4266, lr=9.99739e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=48626 2021-06-19 08:09:23 | INFO | train_inner | epoch 002: 1294 / 3002 loss=2.843, ppl=7.17, wps=5782.2, ups=0.09, wpb=64853, bsz=128, num_updates=4267, lr=9.99739e-05, gnorm=2.172, loss_scale=8, train_wall=11, gb_free=2.8, wall=48637 2021-06-19 08:09:34 | INFO | train_inner | epoch 002: 1295 / 3002 loss=2.772, ppl=6.83, wps=5841.7, ups=0.09, wpb=64898, bsz=128, num_updates=4268, lr=9.99739e-05, gnorm=2.242, loss_scale=8, train_wall=11, gb_free=2.8, wall=48648 2021-06-19 08:09:45 | INFO | train_inner | epoch 002: 1296 / 3002 loss=2.732, ppl=6.64, wps=5884.3, ups=0.09, wpb=64813, bsz=128, num_updates=4269, lr=9.99738e-05, gnorm=2.236, loss_scale=8, train_wall=11, gb_free=2.8, wall=48659 2021-06-19 08:09:56 | INFO | train_inner | epoch 002: 1297 / 3002 loss=2.752, ppl=6.74, wps=5827.1, ups=0.09, wpb=64785, bsz=128, num_updates=4270, lr=9.99738e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=48670 2021-06-19 08:10:07 | INFO | train_inner | epoch 002: 1298 / 3002 loss=2.728, ppl=6.62, wps=5891.6, ups=0.09, wpb=64801, bsz=128, num_updates=4271, lr=9.99738e-05, gnorm=2.232, loss_scale=8, train_wall=11, gb_free=2.8, wall=48681 2021-06-19 08:10:18 | INFO | train_inner | epoch 002: 1299 / 3002 loss=2.81, ppl=7.01, wps=5863.5, ups=0.09, wpb=64792, bsz=128, num_updates=4272, lr=9.99738e-05, gnorm=2.159, loss_scale=8, train_wall=11, gb_free=2.8, wall=48692 2021-06-19 08:10:29 | INFO | train_inner | epoch 002: 1300 / 3002 loss=2.898, ppl=7.45, wps=5891.1, ups=0.09, wpb=64775, bsz=128, num_updates=4273, lr=9.99738e-05, gnorm=2.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=48703 2021-06-19 08:10:40 | INFO | train_inner | epoch 002: 1301 / 3002 loss=2.947, ppl=7.71, wps=5879.4, ups=0.09, wpb=64734, bsz=128, num_updates=4274, lr=9.99738e-05, gnorm=2.237, loss_scale=8, train_wall=11, gb_free=2.8, wall=48714 2021-06-19 08:10:51 | INFO | train_inner | epoch 002: 1302 / 3002 loss=2.827, ppl=7.1, wps=5815.1, ups=0.09, wpb=64795, bsz=128, num_updates=4275, lr=9.99738e-05, gnorm=2.283, loss_scale=8, train_wall=11, gb_free=2.8, wall=48726 2021-06-19 08:11:02 | INFO | train_inner | epoch 002: 1303 / 3002 loss=3.009, ppl=8.05, wps=5836.7, ups=0.09, wpb=64870, bsz=128, num_updates=4276, lr=9.99738e-05, gnorm=2.431, loss_scale=8, train_wall=11, gb_free=2.8, wall=48737 2021-06-19 08:11:13 | INFO | train_inner | epoch 002: 1304 / 3002 loss=2.666, ppl=6.35, wps=5915.5, ups=0.09, wpb=64906, bsz=128, num_updates=4277, lr=9.99738e-05, gnorm=2.155, loss_scale=8, train_wall=11, gb_free=2.8, wall=48748 2021-06-19 08:11:25 | INFO | train_inner | epoch 002: 1305 / 3002 loss=2.568, ppl=5.93, wps=5758, ups=0.09, wpb=64803, bsz=128, num_updates=4278, lr=9.99738e-05, gnorm=2.13, loss_scale=8, train_wall=11, gb_free=2.8, wall=48759 2021-06-19 08:11:36 | INFO | train_inner | epoch 002: 1306 / 3002 loss=2.893, ppl=7.43, wps=5891.2, ups=0.09, wpb=64840, bsz=128, num_updates=4279, lr=9.99738e-05, gnorm=2.189, loss_scale=8, train_wall=11, gb_free=2.8, wall=48770 2021-06-19 08:11:47 | INFO | train_inner | epoch 002: 1307 / 3002 loss=2.667, ppl=6.35, wps=5819.2, ups=0.09, wpb=64902, bsz=128, num_updates=4280, lr=9.99738e-05, gnorm=2.243, loss_scale=8, train_wall=11, gb_free=2.8, wall=48781 2021-06-19 08:11:58 | INFO | train_inner | epoch 002: 1308 / 3002 loss=2.771, ppl=6.83, wps=5900.3, ups=0.09, wpb=64871, bsz=128, num_updates=4281, lr=9.99737e-05, gnorm=2.344, loss_scale=8, train_wall=11, gb_free=2.8, wall=48792 2021-06-19 08:12:09 | INFO | train_inner | epoch 002: 1309 / 3002 loss=2.95, ppl=7.73, wps=5805.7, ups=0.09, wpb=64830, bsz=128, num_updates=4282, lr=9.99737e-05, gnorm=2.179, loss_scale=8, train_wall=11, gb_free=2.8, wall=48803 2021-06-19 08:12:20 | INFO | train_inner | epoch 002: 1310 / 3002 loss=2.613, ppl=6.12, wps=5964.7, ups=0.09, wpb=64863, bsz=128, num_updates=4283, lr=9.99737e-05, gnorm=2.157, loss_scale=8, train_wall=10, gb_free=2.8, wall=48814 2021-06-19 08:12:31 | INFO | train_inner | epoch 002: 1311 / 3002 loss=2.725, ppl=6.61, wps=5835.7, ups=0.09, wpb=64790, bsz=128, num_updates=4284, lr=9.99737e-05, gnorm=2.241, loss_scale=8, train_wall=11, gb_free=2.8, wall=48825 2021-06-19 08:12:42 | INFO | train_inner | epoch 002: 1312 / 3002 loss=2.757, ppl=6.76, wps=5831.4, ups=0.09, wpb=64837, bsz=128, num_updates=4285, lr=9.99737e-05, gnorm=2.208, loss_scale=8, train_wall=11, gb_free=2.8, wall=48836 2021-06-19 08:12:53 | INFO | train_inner | epoch 002: 1313 / 3002 loss=2.73, ppl=6.63, wps=5766.6, ups=0.09, wpb=64766, bsz=128, num_updates=4286, lr=9.99737e-05, gnorm=2.263, loss_scale=8, train_wall=11, gb_free=2.8, wall=48848 2021-06-19 08:13:04 | INFO | train_inner | epoch 002: 1314 / 3002 loss=2.681, ppl=6.41, wps=5827.4, ups=0.09, wpb=64887, bsz=128, num_updates=4287, lr=9.99737e-05, gnorm=2.208, loss_scale=8, train_wall=11, gb_free=2.8, wall=48859 2021-06-19 08:13:16 | INFO | train_inner | epoch 002: 1315 / 3002 loss=2.675, ppl=6.39, wps=5717.4, ups=0.09, wpb=64819, bsz=128, num_updates=4288, lr=9.99737e-05, gnorm=2.252, loss_scale=8, train_wall=11, gb_free=2.8, wall=48870 2021-06-19 08:13:27 | INFO | train_inner | epoch 002: 1316 / 3002 loss=2.707, ppl=6.53, wps=5817.7, ups=0.09, wpb=64588, bsz=128, num_updates=4289, lr=9.99737e-05, gnorm=2.324, loss_scale=8, train_wall=11, gb_free=2.8, wall=48881 2021-06-19 08:13:38 | INFO | train_inner | epoch 002: 1317 / 3002 loss=2.866, ppl=7.29, wps=5906.3, ups=0.09, wpb=64888, bsz=128, num_updates=4290, lr=9.99737e-05, gnorm=2.355, loss_scale=8, train_wall=11, gb_free=2.8, wall=48892 2021-06-19 08:13:49 | INFO | train_inner | epoch 002: 1318 / 3002 loss=2.574, ppl=5.96, wps=5870.1, ups=0.09, wpb=64855, bsz=128, num_updates=4291, lr=9.99737e-05, gnorm=2.193, loss_scale=8, train_wall=11, gb_free=2.8, wall=48903 2021-06-19 08:14:00 | INFO | train_inner | epoch 002: 1319 / 3002 loss=2.808, ppl=7, wps=5909.2, ups=0.09, wpb=64789, bsz=128, num_updates=4292, lr=9.99737e-05, gnorm=2.211, loss_scale=8, train_wall=11, gb_free=2.8, wall=48914 2021-06-19 08:14:11 | INFO | train_inner | epoch 002: 1320 / 3002 loss=2.713, ppl=6.56, wps=5788.8, ups=0.09, wpb=64759, bsz=128, num_updates=4293, lr=9.99737e-05, gnorm=2.261, loss_scale=8, train_wall=11, gb_free=2.8, wall=48925 2021-06-19 08:14:22 | INFO | train_inner | epoch 002: 1321 / 3002 loss=3.01, ppl=8.05, wps=5868.1, ups=0.09, wpb=64803, bsz=128, num_updates=4294, lr=9.99736e-05, gnorm=2.244, loss_scale=8, train_wall=11, gb_free=2.8, wall=48936 2021-06-19 08:14:33 | INFO | train_inner | epoch 002: 1322 / 3002 loss=2.778, ppl=6.86, wps=5888.8, ups=0.09, wpb=64878, bsz=128, num_updates=4295, lr=9.99736e-05, gnorm=2.24, loss_scale=8, train_wall=11, gb_free=2.8, wall=48947 2021-06-19 08:14:44 | INFO | train_inner | epoch 002: 1323 / 3002 loss=2.861, ppl=7.26, wps=5906.3, ups=0.09, wpb=64883, bsz=128, num_updates=4296, lr=9.99736e-05, gnorm=2.355, loss_scale=8, train_wall=11, gb_free=2.8, wall=48958 2021-06-19 08:14:55 | INFO | train_inner | epoch 002: 1324 / 3002 loss=2.745, ppl=6.71, wps=5846.9, ups=0.09, wpb=64908, bsz=128, num_updates=4297, lr=9.99736e-05, gnorm=2.226, loss_scale=8, train_wall=11, gb_free=2.8, wall=48969 2021-06-19 08:15:06 | INFO | train_inner | epoch 002: 1325 / 3002 loss=2.614, ppl=6.12, wps=5810.9, ups=0.09, wpb=64870, bsz=128, num_updates=4298, lr=9.99736e-05, gnorm=2.112, loss_scale=8, train_wall=11, gb_free=2.8, wall=48981 2021-06-19 08:15:17 | INFO | train_inner | epoch 002: 1326 / 3002 loss=2.826, ppl=7.09, wps=5980, ups=0.09, wpb=64903, bsz=128, num_updates=4299, lr=9.99736e-05, gnorm=2.641, loss_scale=8, train_wall=10, gb_free=2.8, wall=48991 2021-06-19 08:15:28 | INFO | train_inner | epoch 002: 1327 / 3002 loss=2.692, ppl=6.46, wps=6159.7, ups=0.1, wpb=64741, bsz=128, num_updates=4300, lr=9.99736e-05, gnorm=2.237, loss_scale=8, train_wall=10, gb_free=2.8, wall=49002 2021-06-19 08:15:39 | INFO | train_inner | epoch 002: 1328 / 3002 loss=2.67, ppl=6.37, wps=5934.3, ups=0.09, wpb=64861, bsz=128, num_updates=4301, lr=9.99736e-05, gnorm=2.208, loss_scale=8, train_wall=10, gb_free=2.8, wall=49013 2021-06-19 08:15:50 | INFO | train_inner | epoch 002: 1329 / 3002 loss=2.756, ppl=6.75, wps=5764.9, ups=0.09, wpb=64768, bsz=128, num_updates=4302, lr=9.99736e-05, gnorm=2.254, loss_scale=8, train_wall=11, gb_free=2.8, wall=49024 2021-06-19 08:16:01 | INFO | train_inner | epoch 002: 1330 / 3002 loss=2.658, ppl=6.31, wps=5860.6, ups=0.09, wpb=64830, bsz=128, num_updates=4303, lr=9.99736e-05, gnorm=2.312, loss_scale=8, train_wall=11, gb_free=2.8, wall=49035 2021-06-19 08:16:12 | INFO | train_inner | epoch 002: 1331 / 3002 loss=2.759, ppl=6.77, wps=5783, ups=0.09, wpb=64883, bsz=128, num_updates=4304, lr=9.99736e-05, gnorm=2.394, loss_scale=8, train_wall=11, gb_free=2.8, wall=49046 2021-06-19 08:16:23 | INFO | train_inner | epoch 002: 1332 / 3002 loss=2.689, ppl=6.45, wps=5873.7, ups=0.09, wpb=64800, bsz=128, num_updates=4305, lr=9.99736e-05, gnorm=2.303, loss_scale=8, train_wall=11, gb_free=2.8, wall=49057 2021-06-19 08:16:34 | INFO | train_inner | epoch 002: 1333 / 3002 loss=2.7, ppl=6.5, wps=5855.7, ups=0.09, wpb=64869, bsz=128, num_updates=4306, lr=9.99735e-05, gnorm=2.218, loss_scale=8, train_wall=11, gb_free=2.8, wall=49069 2021-06-19 08:16:45 | INFO | train_inner | epoch 002: 1334 / 3002 loss=2.695, ppl=6.48, wps=5813.4, ups=0.09, wpb=64852, bsz=128, num_updates=4307, lr=9.99735e-05, gnorm=2.23, loss_scale=8, train_wall=11, gb_free=2.8, wall=49080 2021-06-19 08:16:56 | INFO | train_inner | epoch 002: 1335 / 3002 loss=2.908, ppl=7.5, wps=5805.2, ups=0.09, wpb=64773, bsz=128, num_updates=4308, lr=9.99735e-05, gnorm=2.333, loss_scale=8, train_wall=11, gb_free=2.8, wall=49091 2021-06-19 08:17:08 | INFO | train_inner | epoch 002: 1336 / 3002 loss=2.633, ppl=6.2, wps=5802.3, ups=0.09, wpb=64826, bsz=128, num_updates=4309, lr=9.99735e-05, gnorm=2.244, loss_scale=8, train_wall=11, gb_free=2.8, wall=49102 2021-06-19 08:17:19 | INFO | train_inner | epoch 002: 1337 / 3002 loss=2.685, ppl=6.43, wps=5807.7, ups=0.09, wpb=64778, bsz=128, num_updates=4310, lr=9.99735e-05, gnorm=2.168, loss_scale=8, train_wall=11, gb_free=2.8, wall=49113 2021-06-19 08:17:30 | INFO | train_inner | epoch 002: 1338 / 3002 loss=2.695, ppl=6.48, wps=5775.8, ups=0.09, wpb=64873, bsz=128, num_updates=4311, lr=9.99735e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=49124 2021-06-19 08:17:41 | INFO | train_inner | epoch 002: 1339 / 3002 loss=2.711, ppl=6.55, wps=5892.6, ups=0.09, wpb=64864, bsz=128, num_updates=4312, lr=9.99735e-05, gnorm=2.301, loss_scale=8, train_wall=11, gb_free=2.8, wall=49135 2021-06-19 08:17:52 | INFO | train_inner | epoch 002: 1340 / 3002 loss=2.697, ppl=6.49, wps=5792.6, ups=0.09, wpb=64845, bsz=128, num_updates=4313, lr=9.99735e-05, gnorm=2.237, loss_scale=8, train_wall=11, gb_free=2.8, wall=49147 2021-06-19 08:18:04 | INFO | train_inner | epoch 002: 1341 / 3002 loss=2.797, ppl=6.95, wps=5744.2, ups=0.09, wpb=64899, bsz=128, num_updates=4314, lr=9.99735e-05, gnorm=2.309, loss_scale=8, train_wall=11, gb_free=2.8, wall=49158 2021-06-19 08:18:15 | INFO | train_inner | epoch 002: 1342 / 3002 loss=2.714, ppl=6.56, wps=5842, ups=0.09, wpb=64931, bsz=128, num_updates=4315, lr=9.99735e-05, gnorm=2.376, loss_scale=8, train_wall=11, gb_free=2.8, wall=49169 2021-06-19 08:18:26 | INFO | train_inner | epoch 002: 1343 / 3002 loss=2.64, ppl=6.23, wps=5892.7, ups=0.09, wpb=64846, bsz=128, num_updates=4316, lr=9.99735e-05, gnorm=2.26, loss_scale=8, train_wall=11, gb_free=2.8, wall=49180 2021-06-19 08:18:37 | INFO | train_inner | epoch 002: 1344 / 3002 loss=2.611, ppl=6.11, wps=5772.5, ups=0.09, wpb=64839, bsz=128, num_updates=4317, lr=9.99735e-05, gnorm=2.435, loss_scale=8, train_wall=11, gb_free=2.8, wall=49191 2021-06-19 08:18:48 | INFO | train_inner | epoch 002: 1345 / 3002 loss=2.68, ppl=6.41, wps=5844.7, ups=0.09, wpb=64848, bsz=128, num_updates=4318, lr=9.99735e-05, gnorm=2.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=49202 2021-06-19 08:18:59 | INFO | train_inner | epoch 002: 1346 / 3002 loss=2.676, ppl=6.39, wps=5881.7, ups=0.09, wpb=64888, bsz=128, num_updates=4319, lr=9.99734e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=49213 2021-06-19 08:19:10 | INFO | train_inner | epoch 002: 1347 / 3002 loss=2.776, ppl=6.85, wps=5848.8, ups=0.09, wpb=64779, bsz=128, num_updates=4320, lr=9.99734e-05, gnorm=2.263, loss_scale=8, train_wall=11, gb_free=2.8, wall=49224 2021-06-19 08:19:21 | INFO | train_inner | epoch 002: 1348 / 3002 loss=2.793, ppl=6.93, wps=5802.8, ups=0.09, wpb=64826, bsz=128, num_updates=4321, lr=9.99734e-05, gnorm=2.289, loss_scale=8, train_wall=11, gb_free=2.8, wall=49236 2021-06-19 08:19:33 | INFO | train_inner | epoch 002: 1349 / 3002 loss=2.756, ppl=6.76, wps=5754.5, ups=0.09, wpb=64802, bsz=128, num_updates=4322, lr=9.99734e-05, gnorm=2.26, loss_scale=8, train_wall=11, gb_free=2.8, wall=49247 2021-06-19 08:19:44 | INFO | train_inner | epoch 002: 1350 / 3002 loss=2.863, ppl=7.27, wps=5829.5, ups=0.09, wpb=64770, bsz=128, num_updates=4323, lr=9.99734e-05, gnorm=2.318, loss_scale=8, train_wall=11, gb_free=2.8, wall=49258 2021-06-19 08:19:55 | INFO | train_inner | epoch 002: 1351 / 3002 loss=2.804, ppl=6.98, wps=5826.8, ups=0.09, wpb=64769, bsz=128, num_updates=4324, lr=9.99734e-05, gnorm=2.163, loss_scale=8, train_wall=11, gb_free=2.8, wall=49269 2021-06-19 08:20:06 | INFO | train_inner | epoch 002: 1352 / 3002 loss=2.712, ppl=6.55, wps=5811.2, ups=0.09, wpb=64835, bsz=128, num_updates=4325, lr=9.99734e-05, gnorm=2.246, loss_scale=8, train_wall=11, gb_free=2.8, wall=49280 2021-06-19 08:20:17 | INFO | train_inner | epoch 002: 1353 / 3002 loss=2.803, ppl=6.98, wps=5779.8, ups=0.09, wpb=64885, bsz=128, num_updates=4326, lr=9.99734e-05, gnorm=2.171, loss_scale=8, train_wall=11, gb_free=2.8, wall=49291 2021-06-19 08:20:28 | INFO | train_inner | epoch 002: 1354 / 3002 loss=2.67, ppl=6.36, wps=5846.7, ups=0.09, wpb=64692, bsz=128, num_updates=4327, lr=9.99734e-05, gnorm=2.298, loss_scale=8, train_wall=11, gb_free=2.8, wall=49303 2021-06-19 08:20:39 | INFO | train_inner | epoch 002: 1355 / 3002 loss=2.859, ppl=7.25, wps=5822.8, ups=0.09, wpb=64879, bsz=128, num_updates=4328, lr=9.99734e-05, gnorm=2.39, loss_scale=8, train_wall=11, gb_free=2.8, wall=49314 2021-06-19 08:20:50 | INFO | train_inner | epoch 002: 1356 / 3002 loss=2.788, ppl=6.9, wps=5903, ups=0.09, wpb=64847, bsz=128, num_updates=4329, lr=9.99734e-05, gnorm=2.324, loss_scale=8, train_wall=11, gb_free=2.8, wall=49325 2021-06-19 08:21:01 | INFO | train_inner | epoch 002: 1357 / 3002 loss=2.838, ppl=7.15, wps=5825.3, ups=0.09, wpb=64789, bsz=128, num_updates=4330, lr=9.99734e-05, gnorm=2.329, loss_scale=8, train_wall=11, gb_free=2.8, wall=49336 2021-06-19 08:21:12 | INFO | train_inner | epoch 002: 1358 / 3002 loss=2.861, ppl=7.27, wps=5889.8, ups=0.09, wpb=64839, bsz=128, num_updates=4331, lr=9.99733e-05, gnorm=2.233, loss_scale=8, train_wall=11, gb_free=2.8, wall=49347 2021-06-19 08:21:24 | INFO | train_inner | epoch 002: 1359 / 3002 loss=2.766, ppl=6.8, wps=5774.1, ups=0.09, wpb=64695, bsz=128, num_updates=4332, lr=9.99733e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=49358 2021-06-19 08:21:35 | INFO | train_inner | epoch 002: 1360 / 3002 loss=2.751, ppl=6.73, wps=5809.9, ups=0.09, wpb=64814, bsz=128, num_updates=4333, lr=9.99733e-05, gnorm=2.163, loss_scale=8, train_wall=11, gb_free=2.8, wall=49369 2021-06-19 08:21:46 | INFO | train_inner | epoch 002: 1361 / 3002 loss=2.694, ppl=6.47, wps=5741.1, ups=0.09, wpb=64808, bsz=128, num_updates=4334, lr=9.99733e-05, gnorm=2.17, loss_scale=8, train_wall=11, gb_free=2.8, wall=49380 2021-06-19 08:21:57 | INFO | train_inner | epoch 002: 1362 / 3002 loss=2.812, ppl=7.02, wps=5857.3, ups=0.09, wpb=64772, bsz=128, num_updates=4335, lr=9.99733e-05, gnorm=2.231, loss_scale=8, train_wall=11, gb_free=2.8, wall=49392 2021-06-19 08:22:08 | INFO | train_inner | epoch 002: 1363 / 3002 loss=2.791, ppl=6.92, wps=5849, ups=0.09, wpb=64770, bsz=128, num_updates=4336, lr=9.99733e-05, gnorm=2.226, loss_scale=8, train_wall=11, gb_free=2.8, wall=49403 2021-06-19 08:22:19 | INFO | train_inner | epoch 002: 1364 / 3002 loss=2.809, ppl=7.01, wps=5770.8, ups=0.09, wpb=64737, bsz=128, num_updates=4337, lr=9.99733e-05, gnorm=2.379, loss_scale=8, train_wall=11, gb_free=2.8, wall=49414 2021-06-19 08:22:31 | INFO | train_inner | epoch 002: 1365 / 3002 loss=2.698, ppl=6.49, wps=5843.3, ups=0.09, wpb=64847, bsz=128, num_updates=4338, lr=9.99733e-05, gnorm=2.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=49425 2021-06-19 08:22:42 | INFO | train_inner | epoch 002: 1366 / 3002 loss=2.847, ppl=7.2, wps=5793.5, ups=0.09, wpb=64810, bsz=128, num_updates=4339, lr=9.99733e-05, gnorm=2.303, loss_scale=8, train_wall=11, gb_free=2.8, wall=49436 2021-06-19 08:22:53 | INFO | train_inner | epoch 002: 1367 / 3002 loss=2.717, ppl=6.58, wps=5790.1, ups=0.09, wpb=64717, bsz=128, num_updates=4340, lr=9.99733e-05, gnorm=2.29, loss_scale=8, train_wall=11, gb_free=2.8, wall=49447 2021-06-19 08:23:04 | INFO | train_inner | epoch 002: 1368 / 3002 loss=2.801, ppl=6.97, wps=5860.4, ups=0.09, wpb=64843, bsz=128, num_updates=4341, lr=9.99733e-05, gnorm=2.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=49458 2021-06-19 08:23:15 | INFO | train_inner | epoch 002: 1369 / 3002 loss=2.747, ppl=6.71, wps=5987.5, ups=0.09, wpb=64829, bsz=128, num_updates=4342, lr=9.99733e-05, gnorm=2.175, loss_scale=8, train_wall=10, gb_free=2.8, wall=49469 2021-06-19 08:23:26 | INFO | train_inner | epoch 002: 1370 / 3002 loss=2.646, ppl=6.26, wps=5787.8, ups=0.09, wpb=64831, bsz=128, num_updates=4343, lr=9.99733e-05, gnorm=2.168, loss_scale=8, train_wall=11, gb_free=2.8, wall=49480 2021-06-19 08:23:37 | INFO | train_inner | epoch 002: 1371 / 3002 loss=2.772, ppl=6.83, wps=5864.9, ups=0.09, wpb=64834, bsz=128, num_updates=4344, lr=9.99732e-05, gnorm=2.174, loss_scale=8, train_wall=11, gb_free=2.8, wall=49491 2021-06-19 08:23:48 | INFO | train_inner | epoch 002: 1372 / 3002 loss=2.797, ppl=6.95, wps=5888.3, ups=0.09, wpb=64893, bsz=128, num_updates=4345, lr=9.99732e-05, gnorm=2.267, loss_scale=8, train_wall=11, gb_free=2.8, wall=49502 2021-06-19 08:23:59 | INFO | train_inner | epoch 002: 1373 / 3002 loss=2.754, ppl=6.75, wps=5855.3, ups=0.09, wpb=64869, bsz=128, num_updates=4346, lr=9.99732e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=49514 2021-06-19 08:24:10 | INFO | train_inner | epoch 002: 1374 / 3002 loss=2.525, ppl=5.76, wps=5875.7, ups=0.09, wpb=64928, bsz=128, num_updates=4347, lr=9.99732e-05, gnorm=2.187, loss_scale=8, train_wall=11, gb_free=2.8, wall=49525 2021-06-19 08:24:21 | INFO | train_inner | epoch 002: 1375 / 3002 loss=2.785, ppl=6.89, wps=5885.6, ups=0.09, wpb=64822, bsz=128, num_updates=4348, lr=9.99732e-05, gnorm=2.404, loss_scale=8, train_wall=11, gb_free=2.8, wall=49536 2021-06-19 08:24:32 | INFO | train_inner | epoch 002: 1376 / 3002 loss=2.788, ppl=6.91, wps=5881.8, ups=0.09, wpb=64794, bsz=128, num_updates=4349, lr=9.99732e-05, gnorm=2.336, loss_scale=8, train_wall=11, gb_free=2.8, wall=49547 2021-06-19 08:24:43 | INFO | train_inner | epoch 002: 1377 / 3002 loss=2.6, ppl=6.06, wps=5837.9, ups=0.09, wpb=64863, bsz=128, num_updates=4350, lr=9.99732e-05, gnorm=2.422, loss_scale=8, train_wall=11, gb_free=2.8, wall=49558 2021-06-19 08:24:54 | INFO | train_inner | epoch 002: 1378 / 3002 loss=2.642, ppl=6.24, wps=5897.6, ups=0.09, wpb=64886, bsz=128, num_updates=4351, lr=9.99732e-05, gnorm=2.363, loss_scale=8, train_wall=11, gb_free=2.8, wall=49569 2021-06-19 08:25:06 | INFO | train_inner | epoch 002: 1379 / 3002 loss=2.566, ppl=5.92, wps=5760.1, ups=0.09, wpb=64886, bsz=128, num_updates=4352, lr=9.99732e-05, gnorm=2.33, loss_scale=8, train_wall=11, gb_free=2.8, wall=49580 2021-06-19 08:25:17 | INFO | train_inner | epoch 002: 1380 / 3002 loss=2.648, ppl=6.27, wps=5839.1, ups=0.09, wpb=64863, bsz=128, num_updates=4353, lr=9.99732e-05, gnorm=2.202, loss_scale=8, train_wall=11, gb_free=2.8, wall=49591 2021-06-19 08:25:28 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-19 08:25:39 | INFO | train_inner | epoch 002: 1382 / 3002 loss=2.75, ppl=6.73, wps=2898.4, ups=0.04, wpb=64879, bsz=128, num_updates=4354, lr=9.99732e-05, gnorm=2.28, loss_scale=4, train_wall=21, gb_free=2.8, wall=49613 2021-06-19 08:25:50 | INFO | train_inner | epoch 002: 1383 / 3002 loss=2.854, ppl=7.23, wps=5817.6, ups=0.09, wpb=64770, bsz=128, num_updates=4355, lr=9.99732e-05, gnorm=2.322, loss_scale=4, train_wall=11, gb_free=2.8, wall=49625 2021-06-19 08:26:01 | INFO | train_inner | epoch 002: 1384 / 3002 loss=2.728, ppl=6.62, wps=5805.8, ups=0.09, wpb=64798, bsz=128, num_updates=4356, lr=9.99731e-05, gnorm=2.484, loss_scale=4, train_wall=11, gb_free=2.8, wall=49636 2021-06-19 08:26:13 | INFO | train_inner | epoch 002: 1385 / 3002 loss=2.811, ppl=7.02, wps=5836.9, ups=0.09, wpb=64802, bsz=128, num_updates=4357, lr=9.99731e-05, gnorm=2.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=49647 2021-06-19 08:26:24 | INFO | train_inner | epoch 002: 1386 / 3002 loss=2.706, ppl=6.53, wps=5893, ups=0.09, wpb=64841, bsz=128, num_updates=4358, lr=9.99731e-05, gnorm=2.241, loss_scale=4, train_wall=11, gb_free=2.8, wall=49658 2021-06-19 08:26:35 | INFO | train_inner | epoch 002: 1387 / 3002 loss=2.771, ppl=6.83, wps=5772.7, ups=0.09, wpb=64835, bsz=128, num_updates=4359, lr=9.99731e-05, gnorm=2.308, loss_scale=4, train_wall=11, gb_free=2.8, wall=49669 2021-06-19 08:26:46 | INFO | train_inner | epoch 002: 1388 / 3002 loss=2.702, ppl=6.51, wps=5889.4, ups=0.09, wpb=64861, bsz=128, num_updates=4360, lr=9.99731e-05, gnorm=2.251, loss_scale=4, train_wall=11, gb_free=2.8, wall=49680 2021-06-19 08:26:57 | INFO | train_inner | epoch 002: 1389 / 3002 loss=2.696, ppl=6.48, wps=5914.8, ups=0.09, wpb=64853, bsz=128, num_updates=4361, lr=9.99731e-05, gnorm=2.281, loss_scale=4, train_wall=11, gb_free=2.8, wall=49691 2021-06-19 08:27:08 | INFO | train_inner | epoch 002: 1390 / 3002 loss=2.625, ppl=6.17, wps=5766.9, ups=0.09, wpb=64840, bsz=128, num_updates=4362, lr=9.99731e-05, gnorm=2.344, loss_scale=4, train_wall=11, gb_free=2.8, wall=49702 2021-06-19 08:27:19 | INFO | train_inner | epoch 002: 1391 / 3002 loss=2.782, ppl=6.88, wps=5779.3, ups=0.09, wpb=64868, bsz=128, num_updates=4363, lr=9.99731e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=49714 2021-06-19 08:27:30 | INFO | train_inner | epoch 002: 1392 / 3002 loss=2.9, ppl=7.46, wps=5792.8, ups=0.09, wpb=64772, bsz=128, num_updates=4364, lr=9.99731e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=49725 2021-06-19 08:27:41 | INFO | train_inner | epoch 002: 1393 / 3002 loss=2.789, ppl=6.91, wps=5924.6, ups=0.09, wpb=64885, bsz=128, num_updates=4365, lr=9.99731e-05, gnorm=2.305, loss_scale=4, train_wall=10, gb_free=2.8, wall=49736 2021-06-19 08:27:53 | INFO | train_inner | epoch 002: 1394 / 3002 loss=2.629, ppl=6.19, wps=5725.3, ups=0.09, wpb=64804, bsz=128, num_updates=4366, lr=9.99731e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=49747 2021-06-19 08:28:04 | INFO | train_inner | epoch 002: 1395 / 3002 loss=2.763, ppl=6.79, wps=5759.2, ups=0.09, wpb=64811, bsz=128, num_updates=4367, lr=9.99731e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=49758 2021-06-19 08:28:15 | INFO | train_inner | epoch 002: 1396 / 3002 loss=3.079, ppl=8.45, wps=5879.3, ups=0.09, wpb=64805, bsz=128, num_updates=4368, lr=9.99731e-05, gnorm=2.312, loss_scale=4, train_wall=11, gb_free=2.8, wall=49769 2021-06-19 08:28:26 | INFO | train_inner | epoch 002: 1397 / 3002 loss=2.769, ppl=6.82, wps=5832.5, ups=0.09, wpb=64856, bsz=128, num_updates=4369, lr=9.9973e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=49780 2021-06-19 08:28:37 | INFO | train_inner | epoch 002: 1398 / 3002 loss=2.677, ppl=6.4, wps=5797, ups=0.09, wpb=64832, bsz=128, num_updates=4370, lr=9.9973e-05, gnorm=2.119, loss_scale=4, train_wall=11, gb_free=2.8, wall=49792 2021-06-19 08:28:48 | INFO | train_inner | epoch 002: 1399 / 3002 loss=2.623, ppl=6.16, wps=5837.2, ups=0.09, wpb=64858, bsz=128, num_updates=4371, lr=9.9973e-05, gnorm=2.154, loss_scale=4, train_wall=11, gb_free=2.8, wall=49803 2021-06-19 08:29:00 | INFO | train_inner | epoch 002: 1400 / 3002 loss=2.861, ppl=7.27, wps=5771.9, ups=0.09, wpb=64798, bsz=128, num_updates=4372, lr=9.9973e-05, gnorm=2.352, loss_scale=4, train_wall=11, gb_free=2.8, wall=49814 2021-06-19 08:29:11 | INFO | train_inner | epoch 002: 1401 / 3002 loss=2.688, ppl=6.44, wps=5881.9, ups=0.09, wpb=64811, bsz=128, num_updates=4373, lr=9.9973e-05, gnorm=2.359, loss_scale=4, train_wall=11, gb_free=2.8, wall=49825 2021-06-19 08:29:22 | INFO | train_inner | epoch 002: 1402 / 3002 loss=2.677, ppl=6.4, wps=5840.5, ups=0.09, wpb=64862, bsz=128, num_updates=4374, lr=9.9973e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=49836 2021-06-19 08:29:33 | INFO | train_inner | epoch 002: 1403 / 3002 loss=2.742, ppl=6.69, wps=5899.8, ups=0.09, wpb=64869, bsz=128, num_updates=4375, lr=9.9973e-05, gnorm=2.216, loss_scale=4, train_wall=11, gb_free=2.8, wall=49847 2021-06-19 08:29:44 | INFO | train_inner | epoch 002: 1404 / 3002 loss=2.681, ppl=6.41, wps=5886.6, ups=0.09, wpb=64886, bsz=128, num_updates=4376, lr=9.9973e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=49858 2021-06-19 08:29:55 | INFO | train_inner | epoch 002: 1405 / 3002 loss=2.76, ppl=6.77, wps=5843, ups=0.09, wpb=64838, bsz=128, num_updates=4377, lr=9.9973e-05, gnorm=2.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=49869 2021-06-19 08:30:06 | INFO | train_inner | epoch 002: 1406 / 3002 loss=2.695, ppl=6.47, wps=5884, ups=0.09, wpb=64850, bsz=128, num_updates=4378, lr=9.9973e-05, gnorm=2.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=49880 2021-06-19 08:30:17 | INFO | train_inner | epoch 002: 1407 / 3002 loss=2.807, ppl=7, wps=5810.4, ups=0.09, wpb=64859, bsz=128, num_updates=4379, lr=9.9973e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=49891 2021-06-19 08:30:28 | INFO | train_inner | epoch 002: 1408 / 3002 loss=2.712, ppl=6.55, wps=5880.8, ups=0.09, wpb=64796, bsz=128, num_updates=4380, lr=9.9973e-05, gnorm=2.479, loss_scale=4, train_wall=11, gb_free=2.8, wall=49902 2021-06-19 08:30:39 | INFO | train_inner | epoch 002: 1409 / 3002 loss=2.755, ppl=6.75, wps=5883.4, ups=0.09, wpb=64798, bsz=128, num_updates=4381, lr=9.99729e-05, gnorm=2.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=49913 2021-06-19 08:30:50 | INFO | train_inner | epoch 002: 1410 / 3002 loss=2.525, ppl=5.75, wps=5972.5, ups=0.09, wpb=64890, bsz=128, num_updates=4382, lr=9.99729e-05, gnorm=2.243, loss_scale=4, train_wall=10, gb_free=2.8, wall=49924 2021-06-19 08:31:01 | INFO | train_inner | epoch 002: 1411 / 3002 loss=2.81, ppl=7.01, wps=5991.3, ups=0.09, wpb=64905, bsz=128, num_updates=4383, lr=9.99729e-05, gnorm=2.244, loss_scale=4, train_wall=10, gb_free=2.8, wall=49935 2021-06-19 08:31:12 | INFO | train_inner | epoch 002: 1412 / 3002 loss=2.716, ppl=6.57, wps=5781.3, ups=0.09, wpb=64800, bsz=128, num_updates=4384, lr=9.99729e-05, gnorm=2.343, loss_scale=4, train_wall=11, gb_free=2.8, wall=49946 2021-06-19 08:31:23 | INFO | train_inner | epoch 002: 1413 / 3002 loss=2.702, ppl=6.51, wps=5849, ups=0.09, wpb=64914, bsz=128, num_updates=4385, lr=9.99729e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=49957 2021-06-19 08:31:34 | INFO | train_inner | epoch 002: 1414 / 3002 loss=2.822, ppl=7.07, wps=5814.6, ups=0.09, wpb=64842, bsz=128, num_updates=4386, lr=9.99729e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=49969 2021-06-19 08:31:45 | INFO | train_inner | epoch 002: 1415 / 3002 loss=2.652, ppl=6.28, wps=5838.5, ups=0.09, wpb=64745, bsz=128, num_updates=4387, lr=9.99729e-05, gnorm=2.137, loss_scale=4, train_wall=11, gb_free=2.8, wall=49980 2021-06-19 08:31:56 | INFO | train_inner | epoch 002: 1416 / 3002 loss=2.739, ppl=6.68, wps=5890.5, ups=0.09, wpb=64826, bsz=128, num_updates=4388, lr=9.99729e-05, gnorm=2.176, loss_scale=4, train_wall=11, gb_free=2.8, wall=49991 2021-06-19 08:32:08 | INFO | train_inner | epoch 002: 1417 / 3002 loss=2.767, ppl=6.81, wps=5775.4, ups=0.09, wpb=64836, bsz=128, num_updates=4389, lr=9.99729e-05, gnorm=2.219, loss_scale=4, train_wall=11, gb_free=2.8, wall=50002 2021-06-19 08:32:18 | INFO | train_inner | epoch 002: 1418 / 3002 loss=2.73, ppl=6.64, wps=6041.3, ups=0.09, wpb=64787, bsz=128, num_updates=4390, lr=9.99729e-05, gnorm=2.202, loss_scale=4, train_wall=10, gb_free=2.8, wall=50013 2021-06-19 08:32:29 | INFO | train_inner | epoch 002: 1419 / 3002 loss=2.818, ppl=7.05, wps=5843.3, ups=0.09, wpb=64747, bsz=128, num_updates=4391, lr=9.99729e-05, gnorm=2.233, loss_scale=4, train_wall=11, gb_free=2.8, wall=50024 2021-06-19 08:32:41 | INFO | train_inner | epoch 002: 1420 / 3002 loss=2.73, ppl=6.64, wps=5719.8, ups=0.09, wpb=64754, bsz=128, num_updates=4392, lr=9.99729e-05, gnorm=2.293, loss_scale=4, train_wall=11, gb_free=2.8, wall=50035 2021-06-19 08:32:52 | INFO | train_inner | epoch 002: 1421 / 3002 loss=2.795, ppl=6.94, wps=5897.2, ups=0.09, wpb=64866, bsz=128, num_updates=4393, lr=9.99729e-05, gnorm=2.34, loss_scale=4, train_wall=11, gb_free=2.8, wall=50046 2021-06-19 08:33:03 | INFO | train_inner | epoch 002: 1422 / 3002 loss=2.572, ppl=5.94, wps=5945.5, ups=0.09, wpb=64851, bsz=128, num_updates=4394, lr=9.99728e-05, gnorm=2.188, loss_scale=4, train_wall=10, gb_free=2.8, wall=50057 2021-06-19 08:33:14 | INFO | train_inner | epoch 002: 1423 / 3002 loss=2.77, ppl=6.82, wps=5862.9, ups=0.09, wpb=64874, bsz=128, num_updates=4395, lr=9.99728e-05, gnorm=2.259, loss_scale=4, train_wall=11, gb_free=2.8, wall=50068 2021-06-19 08:33:25 | INFO | train_inner | epoch 002: 1424 / 3002 loss=2.729, ppl=6.63, wps=5793.3, ups=0.09, wpb=64726, bsz=128, num_updates=4396, lr=9.99728e-05, gnorm=2.361, loss_scale=4, train_wall=11, gb_free=2.8, wall=50079 2021-06-19 08:33:36 | INFO | train_inner | epoch 002: 1425 / 3002 loss=2.883, ppl=7.38, wps=5897.9, ups=0.09, wpb=64891, bsz=128, num_updates=4397, lr=9.99728e-05, gnorm=2.341, loss_scale=4, train_wall=11, gb_free=2.8, wall=50090 2021-06-19 08:33:47 | INFO | train_inner | epoch 002: 1426 / 3002 loss=2.681, ppl=6.41, wps=5838, ups=0.09, wpb=64861, bsz=128, num_updates=4398, lr=9.99728e-05, gnorm=2.189, loss_scale=4, train_wall=11, gb_free=2.8, wall=50101 2021-06-19 08:33:58 | INFO | train_inner | epoch 002: 1427 / 3002 loss=2.601, ppl=6.07, wps=5823.9, ups=0.09, wpb=64799, bsz=128, num_updates=4399, lr=9.99728e-05, gnorm=2.161, loss_scale=4, train_wall=11, gb_free=2.8, wall=50112 2021-06-19 08:34:09 | INFO | train_inner | epoch 002: 1428 / 3002 loss=2.685, ppl=6.43, wps=5758.5, ups=0.09, wpb=64761, bsz=128, num_updates=4400, lr=9.99728e-05, gnorm=2.177, loss_scale=4, train_wall=11, gb_free=2.8, wall=50124 2021-06-19 08:34:20 | INFO | train_inner | epoch 002: 1429 / 3002 loss=2.783, ppl=6.88, wps=5885.7, ups=0.09, wpb=64861, bsz=128, num_updates=4401, lr=9.99728e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=50135 2021-06-19 08:34:32 | INFO | train_inner | epoch 002: 1430 / 3002 loss=2.654, ppl=6.3, wps=5698.8, ups=0.09, wpb=64817, bsz=128, num_updates=4402, lr=9.99728e-05, gnorm=2.283, loss_scale=4, train_wall=11, gb_free=2.8, wall=50146 2021-06-19 08:34:43 | INFO | train_inner | epoch 002: 1431 / 3002 loss=2.764, ppl=6.79, wps=5860.7, ups=0.09, wpb=64778, bsz=128, num_updates=4403, lr=9.99728e-05, gnorm=2.123, loss_scale=4, train_wall=11, gb_free=2.8, wall=50157 2021-06-19 08:34:54 | INFO | train_inner | epoch 002: 1432 / 3002 loss=2.64, ppl=6.23, wps=5851.1, ups=0.09, wpb=64842, bsz=128, num_updates=4404, lr=9.99728e-05, gnorm=2.278, loss_scale=4, train_wall=11, gb_free=2.8, wall=50168 2021-06-19 08:35:05 | INFO | train_inner | epoch 002: 1433 / 3002 loss=2.593, ppl=6.03, wps=5957.6, ups=0.09, wpb=64763, bsz=128, num_updates=4405, lr=9.99728e-05, gnorm=2.156, loss_scale=4, train_wall=10, gb_free=2.8, wall=50179 2021-06-19 08:35:16 | INFO | train_inner | epoch 002: 1434 / 3002 loss=2.574, ppl=5.96, wps=5911.4, ups=0.09, wpb=64865, bsz=128, num_updates=4406, lr=9.99727e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=50190 2021-06-19 08:35:27 | INFO | train_inner | epoch 002: 1435 / 3002 loss=3.003, ppl=8.02, wps=5884.6, ups=0.09, wpb=64845, bsz=128, num_updates=4407, lr=9.99727e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=50201 2021-06-19 08:35:38 | INFO | train_inner | epoch 002: 1436 / 3002 loss=2.858, ppl=7.25, wps=5786.1, ups=0.09, wpb=64832, bsz=128, num_updates=4408, lr=9.99727e-05, gnorm=2.376, loss_scale=4, train_wall=11, gb_free=2.8, wall=50212 2021-06-19 08:35:49 | INFO | train_inner | epoch 002: 1437 / 3002 loss=2.679, ppl=6.4, wps=5898.2, ups=0.09, wpb=64905, bsz=128, num_updates=4409, lr=9.99727e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=50223 2021-06-19 08:36:00 | INFO | train_inner | epoch 002: 1438 / 3002 loss=2.632, ppl=6.2, wps=5863.7, ups=0.09, wpb=64805, bsz=128, num_updates=4410, lr=9.99727e-05, gnorm=2.241, loss_scale=4, train_wall=11, gb_free=2.8, wall=50234 2021-06-19 08:36:11 | INFO | train_inner | epoch 002: 1439 / 3002 loss=2.749, ppl=6.72, wps=5897.5, ups=0.09, wpb=64810, bsz=128, num_updates=4411, lr=9.99727e-05, gnorm=2.41, loss_scale=4, train_wall=11, gb_free=2.8, wall=50245 2021-06-19 08:36:22 | INFO | train_inner | epoch 002: 1440 / 3002 loss=2.793, ppl=6.93, wps=5724.6, ups=0.09, wpb=64788, bsz=128, num_updates=4412, lr=9.99727e-05, gnorm=2.198, loss_scale=4, train_wall=11, gb_free=2.8, wall=50257 2021-06-19 08:36:33 | INFO | train_inner | epoch 002: 1441 / 3002 loss=2.754, ppl=6.75, wps=5851.5, ups=0.09, wpb=64812, bsz=128, num_updates=4413, lr=9.99727e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=50268 2021-06-19 08:36:44 | INFO | train_inner | epoch 002: 1442 / 3002 loss=2.714, ppl=6.56, wps=5879.7, ups=0.09, wpb=64834, bsz=128, num_updates=4414, lr=9.99727e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=50279 2021-06-19 08:36:56 | INFO | train_inner | epoch 002: 1443 / 3002 loss=2.769, ppl=6.82, wps=5799.1, ups=0.09, wpb=64819, bsz=128, num_updates=4415, lr=9.99727e-05, gnorm=2.382, loss_scale=4, train_wall=11, gb_free=2.8, wall=50290 2021-06-19 08:37:07 | INFO | train_inner | epoch 002: 1444 / 3002 loss=2.695, ppl=6.47, wps=5760, ups=0.09, wpb=64826, bsz=128, num_updates=4416, lr=9.99727e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=50301 2021-06-19 08:37:18 | INFO | train_inner | epoch 002: 1445 / 3002 loss=2.825, ppl=7.08, wps=5913.1, ups=0.09, wpb=64817, bsz=128, num_updates=4417, lr=9.99727e-05, gnorm=2.392, loss_scale=4, train_wall=11, gb_free=2.8, wall=50312 2021-06-19 08:37:29 | INFO | train_inner | epoch 002: 1446 / 3002 loss=2.788, ppl=6.91, wps=5917.5, ups=0.09, wpb=64808, bsz=128, num_updates=4418, lr=9.99727e-05, gnorm=2.371, loss_scale=4, train_wall=11, gb_free=2.8, wall=50323 2021-06-19 08:37:40 | INFO | train_inner | epoch 002: 1447 / 3002 loss=2.522, ppl=5.74, wps=5903.1, ups=0.09, wpb=64901, bsz=128, num_updates=4419, lr=9.99726e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=50334 2021-06-19 08:37:51 | INFO | train_inner | epoch 002: 1448 / 3002 loss=2.848, ppl=7.2, wps=5913.3, ups=0.09, wpb=64852, bsz=128, num_updates=4420, lr=9.99726e-05, gnorm=2.169, loss_scale=4, train_wall=10, gb_free=2.8, wall=50345 2021-06-19 08:38:02 | INFO | train_inner | epoch 002: 1449 / 3002 loss=2.727, ppl=6.62, wps=5803.3, ups=0.09, wpb=64788, bsz=128, num_updates=4421, lr=9.99726e-05, gnorm=2.261, loss_scale=4, train_wall=11, gb_free=2.8, wall=50356 2021-06-19 08:38:13 | INFO | train_inner | epoch 002: 1450 / 3002 loss=2.718, ppl=6.58, wps=5859.1, ups=0.09, wpb=64862, bsz=128, num_updates=4422, lr=9.99726e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=50367 2021-06-19 08:38:24 | INFO | train_inner | epoch 002: 1451 / 3002 loss=2.599, ppl=6.06, wps=6001.8, ups=0.09, wpb=64808, bsz=128, num_updates=4423, lr=9.99726e-05, gnorm=2.291, loss_scale=4, train_wall=10, gb_free=2.8, wall=50378 2021-06-19 08:38:35 | INFO | train_inner | epoch 002: 1452 / 3002 loss=2.777, ppl=6.86, wps=5771.8, ups=0.09, wpb=64893, bsz=128, num_updates=4424, lr=9.99726e-05, gnorm=2.294, loss_scale=4, train_wall=11, gb_free=2.8, wall=50389 2021-06-19 08:38:46 | INFO | train_inner | epoch 002: 1453 / 3002 loss=2.968, ppl=7.82, wps=5753.2, ups=0.09, wpb=64823, bsz=128, num_updates=4425, lr=9.99726e-05, gnorm=2.261, loss_scale=4, train_wall=11, gb_free=2.8, wall=50401 2021-06-19 08:38:57 | INFO | train_inner | epoch 002: 1454 / 3002 loss=2.739, ppl=6.68, wps=5882, ups=0.09, wpb=64829, bsz=128, num_updates=4426, lr=9.99726e-05, gnorm=2.292, loss_scale=4, train_wall=11, gb_free=2.8, wall=50412 2021-06-19 08:39:08 | INFO | train_inner | epoch 002: 1455 / 3002 loss=2.914, ppl=7.54, wps=5862.9, ups=0.09, wpb=64842, bsz=128, num_updates=4427, lr=9.99726e-05, gnorm=2.209, loss_scale=4, train_wall=11, gb_free=2.8, wall=50423 2021-06-19 08:39:19 | INFO | train_inner | epoch 002: 1456 / 3002 loss=2.777, ppl=6.86, wps=5785.8, ups=0.09, wpb=64722, bsz=128, num_updates=4428, lr=9.99726e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=50434 2021-06-19 08:39:31 | INFO | train_inner | epoch 002: 1457 / 3002 loss=2.796, ppl=6.95, wps=5820, ups=0.09, wpb=64882, bsz=128, num_updates=4429, lr=9.99726e-05, gnorm=2.274, loss_scale=4, train_wall=11, gb_free=2.8, wall=50445 2021-06-19 08:39:42 | INFO | train_inner | epoch 002: 1458 / 3002 loss=2.588, ppl=6.01, wps=5788.5, ups=0.09, wpb=64827, bsz=128, num_updates=4430, lr=9.99726e-05, gnorm=2.216, loss_scale=4, train_wall=11, gb_free=2.8, wall=50456 2021-06-19 08:39:53 | INFO | train_inner | epoch 002: 1459 / 3002 loss=2.519, ppl=5.73, wps=5912.4, ups=0.09, wpb=64873, bsz=128, num_updates=4431, lr=9.99725e-05, gnorm=2.133, loss_scale=4, train_wall=11, gb_free=2.8, wall=50467 2021-06-19 08:40:04 | INFO | train_inner | epoch 002: 1460 / 3002 loss=2.746, ppl=6.71, wps=5833.4, ups=0.09, wpb=64825, bsz=128, num_updates=4432, lr=9.99725e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=50478 2021-06-19 08:40:15 | INFO | train_inner | epoch 002: 1461 / 3002 loss=2.797, ppl=6.95, wps=5862.3, ups=0.09, wpb=64796, bsz=128, num_updates=4433, lr=9.99725e-05, gnorm=2.255, loss_scale=4, train_wall=11, gb_free=2.8, wall=50489 2021-06-19 08:40:26 | INFO | train_inner | epoch 002: 1462 / 3002 loss=2.794, ppl=6.94, wps=5882.3, ups=0.09, wpb=64859, bsz=128, num_updates=4434, lr=9.99725e-05, gnorm=2.191, loss_scale=4, train_wall=11, gb_free=2.8, wall=50500 2021-06-19 08:40:37 | INFO | train_inner | epoch 002: 1463 / 3002 loss=2.874, ppl=7.33, wps=5863.3, ups=0.09, wpb=64826, bsz=128, num_updates=4435, lr=9.99725e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=50511 2021-06-19 08:40:48 | INFO | train_inner | epoch 002: 1464 / 3002 loss=2.724, ppl=6.61, wps=5825.3, ups=0.09, wpb=64873, bsz=128, num_updates=4436, lr=9.99725e-05, gnorm=2.156, loss_scale=4, train_wall=11, gb_free=2.8, wall=50523 2021-06-19 08:40:59 | INFO | train_inner | epoch 002: 1465 / 3002 loss=2.681, ppl=6.41, wps=5858.5, ups=0.09, wpb=64784, bsz=128, num_updates=4437, lr=9.99725e-05, gnorm=2.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=50534 2021-06-19 08:41:11 | INFO | train_inner | epoch 002: 1466 / 3002 loss=3.061, ppl=8.35, wps=5696.9, ups=0.09, wpb=64713, bsz=128, num_updates=4438, lr=9.99725e-05, gnorm=2.32, loss_scale=4, train_wall=11, gb_free=2.8, wall=50545 2021-06-19 08:41:21 | INFO | train_inner | epoch 002: 1467 / 3002 loss=2.803, ppl=6.98, wps=6073.7, ups=0.09, wpb=64944, bsz=128, num_updates=4439, lr=9.99725e-05, gnorm=2.258, loss_scale=4, train_wall=10, gb_free=2.8, wall=50556 2021-06-19 08:41:32 | INFO | train_inner | epoch 002: 1468 / 3002 loss=2.621, ppl=6.15, wps=5987.2, ups=0.09, wpb=64873, bsz=128, num_updates=4440, lr=9.99725e-05, gnorm=2.242, loss_scale=4, train_wall=10, gb_free=2.8, wall=50566 2021-06-19 08:41:43 | INFO | train_inner | epoch 002: 1469 / 3002 loss=2.607, ppl=6.09, wps=5882.6, ups=0.09, wpb=64827, bsz=128, num_updates=4441, lr=9.99725e-05, gnorm=2.132, loss_scale=4, train_wall=11, gb_free=2.8, wall=50577 2021-06-19 08:41:54 | INFO | train_inner | epoch 002: 1470 / 3002 loss=2.648, ppl=6.27, wps=5885.7, ups=0.09, wpb=64868, bsz=128, num_updates=4442, lr=9.99725e-05, gnorm=2.213, loss_scale=4, train_wall=11, gb_free=2.8, wall=50588 2021-06-19 08:42:05 | INFO | train_inner | epoch 002: 1471 / 3002 loss=2.879, ppl=7.35, wps=5770.1, ups=0.09, wpb=64809, bsz=128, num_updates=4443, lr=9.99725e-05, gnorm=2.352, loss_scale=4, train_wall=11, gb_free=2.8, wall=50600 2021-06-19 08:42:17 | INFO | train_inner | epoch 002: 1472 / 3002 loss=2.665, ppl=6.34, wps=5739.6, ups=0.09, wpb=64790, bsz=128, num_updates=4444, lr=9.99724e-05, gnorm=2.174, loss_scale=4, train_wall=11, gb_free=2.8, wall=50611 2021-06-19 08:42:28 | INFO | train_inner | epoch 002: 1473 / 3002 loss=2.83, ppl=7.11, wps=5877.4, ups=0.09, wpb=64732, bsz=128, num_updates=4445, lr=9.99724e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=50622 2021-06-19 08:42:39 | INFO | train_inner | epoch 002: 1474 / 3002 loss=2.563, ppl=5.91, wps=5910.4, ups=0.09, wpb=64893, bsz=128, num_updates=4446, lr=9.99724e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=50633 2021-06-19 08:42:50 | INFO | train_inner | epoch 002: 1475 / 3002 loss=2.72, ppl=6.59, wps=5889.8, ups=0.09, wpb=64853, bsz=128, num_updates=4447, lr=9.99724e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=50644 2021-06-19 08:43:01 | INFO | train_inner | epoch 002: 1476 / 3002 loss=2.955, ppl=7.75, wps=5950.7, ups=0.09, wpb=64830, bsz=128, num_updates=4448, lr=9.99724e-05, gnorm=2.097, loss_scale=4, train_wall=10, gb_free=2.8, wall=50655 2021-06-19 08:43:12 | INFO | train_inner | epoch 002: 1477 / 3002 loss=2.8, ppl=6.96, wps=5820.7, ups=0.09, wpb=64784, bsz=128, num_updates=4449, lr=9.99724e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=50666 2021-06-19 08:43:23 | INFO | train_inner | epoch 002: 1478 / 3002 loss=2.672, ppl=6.37, wps=5964, ups=0.09, wpb=64760, bsz=128, num_updates=4450, lr=9.99724e-05, gnorm=2.121, loss_scale=4, train_wall=10, gb_free=2.8, wall=50677 2021-06-19 08:43:34 | INFO | train_inner | epoch 002: 1479 / 3002 loss=2.794, ppl=6.94, wps=5890.6, ups=0.09, wpb=64846, bsz=128, num_updates=4451, lr=9.99724e-05, gnorm=2.241, loss_scale=4, train_wall=11, gb_free=2.8, wall=50688 2021-06-19 08:43:45 | INFO | train_inner | epoch 002: 1480 / 3002 loss=2.77, ppl=6.82, wps=5870.5, ups=0.09, wpb=64835, bsz=128, num_updates=4452, lr=9.99724e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=50699 2021-06-19 08:43:56 | INFO | train_inner | epoch 002: 1481 / 3002 loss=2.722, ppl=6.6, wps=5802, ups=0.09, wpb=64854, bsz=128, num_updates=4453, lr=9.99724e-05, gnorm=2.203, loss_scale=4, train_wall=11, gb_free=2.8, wall=50710 2021-06-19 08:44:07 | INFO | train_inner | epoch 002: 1482 / 3002 loss=2.626, ppl=6.17, wps=5836.8, ups=0.09, wpb=64801, bsz=128, num_updates=4454, lr=9.99724e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=50721 2021-06-19 08:44:18 | INFO | train_inner | epoch 002: 1483 / 3002 loss=2.679, ppl=6.4, wps=5910.2, ups=0.09, wpb=64895, bsz=128, num_updates=4455, lr=9.99724e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=50732 2021-06-19 08:44:29 | INFO | train_inner | epoch 002: 1484 / 3002 loss=2.759, ppl=6.77, wps=5859.5, ups=0.09, wpb=64837, bsz=128, num_updates=4456, lr=9.99723e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=50743 2021-06-19 08:44:40 | INFO | train_inner | epoch 002: 1485 / 3002 loss=2.74, ppl=6.68, wps=5806.2, ups=0.09, wpb=64779, bsz=128, num_updates=4457, lr=9.99723e-05, gnorm=2.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=50754 2021-06-19 08:44:51 | INFO | train_inner | epoch 002: 1486 / 3002 loss=2.815, ppl=7.04, wps=5803.6, ups=0.09, wpb=64824, bsz=128, num_updates=4458, lr=9.99723e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=50766 2021-06-19 08:45:02 | INFO | train_inner | epoch 002: 1487 / 3002 loss=2.713, ppl=6.56, wps=5796.8, ups=0.09, wpb=64739, bsz=128, num_updates=4459, lr=9.99723e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=50777 2021-06-19 08:45:13 | INFO | train_inner | epoch 002: 1488 / 3002 loss=2.67, ppl=6.36, wps=5910.4, ups=0.09, wpb=64877, bsz=128, num_updates=4460, lr=9.99723e-05, gnorm=2.29, loss_scale=4, train_wall=11, gb_free=2.8, wall=50788 2021-06-19 08:45:24 | INFO | train_inner | epoch 002: 1489 / 3002 loss=2.865, ppl=7.29, wps=5858.4, ups=0.09, wpb=64858, bsz=128, num_updates=4461, lr=9.99723e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=50799 2021-06-19 08:45:35 | INFO | train_inner | epoch 002: 1490 / 3002 loss=2.766, ppl=6.8, wps=5898.2, ups=0.09, wpb=64850, bsz=128, num_updates=4462, lr=9.99723e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=50810 2021-06-19 08:45:46 | INFO | train_inner | epoch 002: 1491 / 3002 loss=2.795, ppl=6.94, wps=5903.2, ups=0.09, wpb=64838, bsz=128, num_updates=4463, lr=9.99723e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=50821 2021-06-19 08:45:57 | INFO | train_inner | epoch 002: 1492 / 3002 loss=2.546, ppl=5.84, wps=5904.2, ups=0.09, wpb=64862, bsz=128, num_updates=4464, lr=9.99723e-05, gnorm=2.088, loss_scale=4, train_wall=11, gb_free=2.8, wall=50832 2021-06-19 08:46:09 | INFO | train_inner | epoch 002: 1493 / 3002 loss=2.718, ppl=6.58, wps=5809.4, ups=0.09, wpb=64842, bsz=128, num_updates=4465, lr=9.99723e-05, gnorm=2.157, loss_scale=4, train_wall=11, gb_free=2.8, wall=50843 2021-06-19 08:46:20 | INFO | train_inner | epoch 002: 1494 / 3002 loss=2.69, ppl=6.45, wps=5960.8, ups=0.09, wpb=64841, bsz=128, num_updates=4466, lr=9.99723e-05, gnorm=2.263, loss_scale=4, train_wall=10, gb_free=2.8, wall=50854 2021-06-19 08:46:30 | INFO | train_inner | epoch 002: 1495 / 3002 loss=2.933, ppl=7.64, wps=5889.9, ups=0.09, wpb=64765, bsz=128, num_updates=4467, lr=9.99723e-05, gnorm=2.2, loss_scale=4, train_wall=11, gb_free=2.8, wall=50865 2021-06-19 08:46:42 | INFO | train_inner | epoch 002: 1496 / 3002 loss=2.879, ppl=7.36, wps=5889.6, ups=0.09, wpb=64953, bsz=128, num_updates=4468, lr=9.99723e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=50876 2021-06-19 08:46:52 | INFO | train_inner | epoch 002: 1497 / 3002 loss=2.837, ppl=7.14, wps=5945.1, ups=0.09, wpb=64836, bsz=128, num_updates=4469, lr=9.99722e-05, gnorm=2.152, loss_scale=4, train_wall=10, gb_free=2.8, wall=50887 2021-06-19 08:47:04 | INFO | train_inner | epoch 002: 1498 / 3002 loss=2.651, ppl=6.28, wps=5860.9, ups=0.09, wpb=64855, bsz=128, num_updates=4470, lr=9.99722e-05, gnorm=2.216, loss_scale=4, train_wall=11, gb_free=2.8, wall=50898 2021-06-19 08:47:15 | INFO | train_inner | epoch 002: 1499 / 3002 loss=2.629, ppl=6.19, wps=5851.8, ups=0.09, wpb=64806, bsz=128, num_updates=4471, lr=9.99722e-05, gnorm=2.237, loss_scale=4, train_wall=11, gb_free=2.8, wall=50909 2021-06-19 08:47:26 | INFO | train_inner | epoch 002: 1500 / 3002 loss=2.6, ppl=6.06, wps=5933.4, ups=0.09, wpb=64908, bsz=128, num_updates=4472, lr=9.99722e-05, gnorm=2.209, loss_scale=4, train_wall=10, gb_free=2.8, wall=50920 2021-06-19 08:47:37 | INFO | train_inner | epoch 002: 1501 / 3002 loss=2.884, ppl=7.38, wps=5888, ups=0.09, wpb=64845, bsz=128, num_updates=4473, lr=9.99722e-05, gnorm=2.287, loss_scale=4, train_wall=11, gb_free=2.8, wall=50931 2021-06-19 08:47:48 | INFO | train_inner | epoch 002: 1502 / 3002 loss=2.821, ppl=7.07, wps=5701, ups=0.09, wpb=64802, bsz=128, num_updates=4474, lr=9.99722e-05, gnorm=2.333, loss_scale=4, train_wall=11, gb_free=2.8, wall=50942 2021-06-19 08:47:59 | INFO | train_inner | epoch 002: 1503 / 3002 loss=2.718, ppl=6.58, wps=5747.5, ups=0.09, wpb=64854, bsz=128, num_updates=4475, lr=9.99722e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=50954 2021-06-19 08:48:10 | INFO | train_inner | epoch 002: 1504 / 3002 loss=2.68, ppl=6.41, wps=5819.7, ups=0.09, wpb=64864, bsz=128, num_updates=4476, lr=9.99722e-05, gnorm=2.215, loss_scale=4, train_wall=11, gb_free=2.8, wall=50965 2021-06-19 08:48:21 | INFO | train_inner | epoch 002: 1505 / 3002 loss=2.784, ppl=6.89, wps=5827.7, ups=0.09, wpb=64851, bsz=128, num_updates=4477, lr=9.99722e-05, gnorm=2.328, loss_scale=4, train_wall=11, gb_free=2.8, wall=50976 2021-06-19 08:48:32 | INFO | train_inner | epoch 002: 1506 / 3002 loss=2.705, ppl=6.52, wps=5882.6, ups=0.09, wpb=64806, bsz=128, num_updates=4478, lr=9.99722e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=50987 2021-06-19 08:48:43 | INFO | train_inner | epoch 002: 1507 / 3002 loss=2.801, ppl=6.97, wps=5899.9, ups=0.09, wpb=64863, bsz=128, num_updates=4479, lr=9.99722e-05, gnorm=2.317, loss_scale=4, train_wall=11, gb_free=2.8, wall=50998 2021-06-19 08:48:55 | INFO | train_inner | epoch 002: 1508 / 3002 loss=2.623, ppl=6.16, wps=5828.7, ups=0.09, wpb=64834, bsz=128, num_updates=4480, lr=9.99722e-05, gnorm=2.19, loss_scale=4, train_wall=11, gb_free=2.8, wall=51009 2021-06-19 08:49:05 | INFO | train_inner | epoch 002: 1509 / 3002 loss=2.808, ppl=7, wps=6008.9, ups=0.09, wpb=64785, bsz=128, num_updates=4481, lr=9.99721e-05, gnorm=2.139, loss_scale=8, train_wall=10, gb_free=2.8, wall=51020 2021-06-19 08:49:16 | INFO | train_inner | epoch 002: 1510 / 3002 loss=2.562, ppl=5.9, wps=5855.3, ups=0.09, wpb=64824, bsz=128, num_updates=4482, lr=9.99721e-05, gnorm=2.188, loss_scale=8, train_wall=11, gb_free=2.8, wall=51031 2021-06-19 08:49:28 | INFO | train_inner | epoch 002: 1511 / 3002 loss=2.833, ppl=7.13, wps=5852.8, ups=0.09, wpb=64837, bsz=128, num_updates=4483, lr=9.99721e-05, gnorm=2.251, loss_scale=8, train_wall=11, gb_free=2.8, wall=51042 2021-06-19 08:49:39 | INFO | train_inner | epoch 002: 1512 / 3002 loss=2.819, ppl=7.06, wps=5876, ups=0.09, wpb=64781, bsz=128, num_updates=4484, lr=9.99721e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=51053 2021-06-19 08:49:50 | INFO | train_inner | epoch 002: 1513 / 3002 loss=2.656, ppl=6.3, wps=5812.6, ups=0.09, wpb=64863, bsz=128, num_updates=4485, lr=9.99721e-05, gnorm=2.194, loss_scale=8, train_wall=11, gb_free=2.8, wall=51064 2021-06-19 08:50:01 | INFO | train_inner | epoch 002: 1514 / 3002 loss=2.853, ppl=7.22, wps=5852.4, ups=0.09, wpb=64781, bsz=128, num_updates=4486, lr=9.99721e-05, gnorm=2.221, loss_scale=8, train_wall=11, gb_free=2.8, wall=51075 2021-06-19 08:50:12 | INFO | train_inner | epoch 002: 1515 / 3002 loss=2.702, ppl=6.51, wps=5802, ups=0.09, wpb=64819, bsz=128, num_updates=4487, lr=9.99721e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=51086 2021-06-19 08:50:23 | INFO | train_inner | epoch 002: 1516 / 3002 loss=2.69, ppl=6.45, wps=5897.4, ups=0.09, wpb=64776, bsz=128, num_updates=4488, lr=9.99721e-05, gnorm=2.249, loss_scale=8, train_wall=11, gb_free=2.8, wall=51097 2021-06-19 08:50:34 | INFO | train_inner | epoch 002: 1517 / 3002 loss=2.659, ppl=6.32, wps=5724.5, ups=0.09, wpb=64877, bsz=128, num_updates=4489, lr=9.99721e-05, gnorm=2.199, loss_scale=8, train_wall=11, gb_free=2.8, wall=51109 2021-06-19 08:50:45 | INFO | train_inner | epoch 002: 1518 / 3002 loss=2.636, ppl=6.22, wps=5891.9, ups=0.09, wpb=64900, bsz=128, num_updates=4490, lr=9.99721e-05, gnorm=2.197, loss_scale=8, train_wall=11, gb_free=2.8, wall=51120 2021-06-19 08:50:56 | INFO | train_inner | epoch 002: 1519 / 3002 loss=2.688, ppl=6.44, wps=5876.4, ups=0.09, wpb=64883, bsz=128, num_updates=4491, lr=9.99721e-05, gnorm=2.293, loss_scale=8, train_wall=11, gb_free=2.8, wall=51131 2021-06-19 08:51:07 | INFO | train_inner | epoch 002: 1520 / 3002 loss=2.675, ppl=6.38, wps=5948.8, ups=0.09, wpb=64805, bsz=128, num_updates=4492, lr=9.99721e-05, gnorm=2.243, loss_scale=8, train_wall=10, gb_free=2.8, wall=51142 2021-06-19 08:51:18 | INFO | train_inner | epoch 002: 1521 / 3002 loss=2.764, ppl=6.79, wps=5911.6, ups=0.09, wpb=64778, bsz=128, num_updates=4493, lr=9.99721e-05, gnorm=2.221, loss_scale=8, train_wall=10, gb_free=2.8, wall=51153 2021-06-19 08:51:29 | INFO | train_inner | epoch 002: 1522 / 3002 loss=2.541, ppl=5.82, wps=5781.5, ups=0.09, wpb=64851, bsz=128, num_updates=4494, lr=9.9972e-05, gnorm=13.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=51164 2021-06-19 08:51:41 | INFO | train_inner | epoch 002: 1523 / 3002 loss=2.61, ppl=6.11, wps=5780.1, ups=0.09, wpb=64845, bsz=128, num_updates=4495, lr=9.9972e-05, gnorm=2.341, loss_scale=8, train_wall=11, gb_free=2.8, wall=51175 2021-06-19 08:51:52 | INFO | train_inner | epoch 002: 1524 / 3002 loss=2.654, ppl=6.29, wps=5880.7, ups=0.09, wpb=64821, bsz=128, num_updates=4496, lr=9.9972e-05, gnorm=2.315, loss_scale=8, train_wall=11, gb_free=2.8, wall=51186 2021-06-19 08:52:03 | INFO | train_inner | epoch 002: 1525 / 3002 loss=2.785, ppl=6.89, wps=5851.9, ups=0.09, wpb=64876, bsz=128, num_updates=4497, lr=9.9972e-05, gnorm=2.306, loss_scale=8, train_wall=11, gb_free=2.8, wall=51197 2021-06-19 08:52:14 | INFO | train_inner | epoch 002: 1526 / 3002 loss=2.712, ppl=6.55, wps=5847.7, ups=0.09, wpb=64816, bsz=128, num_updates=4498, lr=9.9972e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=51208 2021-06-19 08:52:25 | INFO | train_inner | epoch 002: 1527 / 3002 loss=2.691, ppl=6.46, wps=5947.2, ups=0.09, wpb=64885, bsz=128, num_updates=4499, lr=9.9972e-05, gnorm=2.26, loss_scale=8, train_wall=10, gb_free=2.8, wall=51219 2021-06-19 08:52:36 | INFO | train_inner | epoch 002: 1528 / 3002 loss=2.864, ppl=7.28, wps=5878.6, ups=0.09, wpb=64889, bsz=128, num_updates=4500, lr=9.9972e-05, gnorm=2.509, loss_scale=8, train_wall=11, gb_free=2.8, wall=51230 2021-06-19 08:52:47 | INFO | train_inner | epoch 002: 1529 / 3002 loss=2.66, ppl=6.32, wps=5920.3, ups=0.09, wpb=64860, bsz=128, num_updates=4501, lr=9.9972e-05, gnorm=2.268, loss_scale=8, train_wall=11, gb_free=2.8, wall=51241 2021-06-19 08:52:58 | INFO | train_inner | epoch 002: 1530 / 3002 loss=2.726, ppl=6.62, wps=5829.5, ups=0.09, wpb=64834, bsz=128, num_updates=4502, lr=9.9972e-05, gnorm=2.293, loss_scale=8, train_wall=11, gb_free=2.8, wall=51252 2021-06-19 08:53:09 | INFO | train_inner | epoch 002: 1531 / 3002 loss=2.861, ppl=7.27, wps=5844.8, ups=0.09, wpb=64830, bsz=128, num_updates=4503, lr=9.9972e-05, gnorm=2.32, loss_scale=8, train_wall=11, gb_free=2.8, wall=51263 2021-06-19 08:53:20 | INFO | train_inner | epoch 002: 1532 / 3002 loss=2.587, ppl=6.01, wps=5794, ups=0.09, wpb=64822, bsz=128, num_updates=4504, lr=9.9972e-05, gnorm=2.201, loss_scale=8, train_wall=11, gb_free=2.8, wall=51274 2021-06-19 08:53:31 | INFO | train_inner | epoch 002: 1533 / 3002 loss=2.879, ppl=7.36, wps=5819.8, ups=0.09, wpb=64827, bsz=128, num_updates=4505, lr=9.9972e-05, gnorm=2.315, loss_scale=8, train_wall=11, gb_free=2.8, wall=51286 2021-06-19 08:53:42 | INFO | train_inner | epoch 002: 1534 / 3002 loss=2.555, ppl=5.88, wps=5933.6, ups=0.09, wpb=64857, bsz=128, num_updates=4506, lr=9.99719e-05, gnorm=2.349, loss_scale=8, train_wall=10, gb_free=2.8, wall=51297 2021-06-19 08:53:53 | INFO | train_inner | epoch 002: 1535 / 3002 loss=2.927, ppl=7.6, wps=5862.8, ups=0.09, wpb=64858, bsz=128, num_updates=4507, lr=9.99719e-05, gnorm=2.281, loss_scale=8, train_wall=11, gb_free=2.8, wall=51308 2021-06-19 08:54:04 | INFO | train_inner | epoch 002: 1536 / 3002 loss=2.711, ppl=6.55, wps=5882.6, ups=0.09, wpb=64793, bsz=128, num_updates=4508, lr=9.99719e-05, gnorm=2.243, loss_scale=8, train_wall=11, gb_free=2.8, wall=51319 2021-06-19 08:54:15 | INFO | train_inner | epoch 002: 1537 / 3002 loss=2.761, ppl=6.78, wps=5852.1, ups=0.09, wpb=64841, bsz=128, num_updates=4509, lr=9.99719e-05, gnorm=2.346, loss_scale=8, train_wall=11, gb_free=2.8, wall=51330 2021-06-19 08:54:26 | INFO | train_inner | epoch 002: 1538 / 3002 loss=2.683, ppl=6.42, wps=5903.4, ups=0.09, wpb=64858, bsz=128, num_updates=4510, lr=9.99719e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=51341 2021-06-19 08:54:37 | INFO | train_inner | epoch 002: 1539 / 3002 loss=2.73, ppl=6.63, wps=5916.9, ups=0.09, wpb=64837, bsz=128, num_updates=4511, lr=9.99719e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=51352 2021-06-19 08:54:49 | INFO | train_inner | epoch 002: 1540 / 3002 loss=2.767, ppl=6.81, wps=5760.9, ups=0.09, wpb=64882, bsz=128, num_updates=4512, lr=9.99719e-05, gnorm=2.292, loss_scale=8, train_wall=11, gb_free=2.8, wall=51363 2021-06-19 08:55:00 | INFO | train_inner | epoch 002: 1541 / 3002 loss=2.981, ppl=7.89, wps=5808.4, ups=0.09, wpb=64746, bsz=128, num_updates=4513, lr=9.99719e-05, gnorm=2.325, loss_scale=8, train_wall=11, gb_free=2.8, wall=51374 2021-06-19 08:55:11 | INFO | train_inner | epoch 002: 1542 / 3002 loss=2.776, ppl=6.85, wps=5898.4, ups=0.09, wpb=64875, bsz=128, num_updates=4514, lr=9.99719e-05, gnorm=2.294, loss_scale=8, train_wall=11, gb_free=2.8, wall=51385 2021-06-19 08:55:22 | INFO | train_inner | epoch 002: 1543 / 3002 loss=2.771, ppl=6.82, wps=5762.1, ups=0.09, wpb=64845, bsz=128, num_updates=4515, lr=9.99719e-05, gnorm=2.231, loss_scale=8, train_wall=11, gb_free=2.8, wall=51396 2021-06-19 08:55:33 | INFO | train_inner | epoch 002: 1544 / 3002 loss=2.705, ppl=6.52, wps=5859, ups=0.09, wpb=64799, bsz=128, num_updates=4516, lr=9.99719e-05, gnorm=2.258, loss_scale=8, train_wall=11, gb_free=2.8, wall=51407 2021-06-19 08:55:44 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-19 08:55:55 | INFO | train_inner | epoch 002: 1546 / 3002 loss=2.865, ppl=7.29, wps=2939.5, ups=0.05, wpb=64826, bsz=128, num_updates=4517, lr=9.99719e-05, gnorm=2.25, loss_scale=4, train_wall=21, gb_free=2.8, wall=51429 2021-06-19 08:56:06 | INFO | train_inner | epoch 002: 1547 / 3002 loss=2.814, ppl=7.03, wps=5809.5, ups=0.09, wpb=64863, bsz=128, num_updates=4518, lr=9.99719e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=51441 2021-06-19 08:56:17 | INFO | train_inner | epoch 002: 1548 / 3002 loss=2.689, ppl=6.45, wps=5886, ups=0.09, wpb=64888, bsz=128, num_updates=4519, lr=9.99718e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=51452 2021-06-19 08:56:28 | INFO | train_inner | epoch 002: 1549 / 3002 loss=2.681, ppl=6.41, wps=5796, ups=0.09, wpb=64895, bsz=128, num_updates=4520, lr=9.99718e-05, gnorm=2.254, loss_scale=4, train_wall=11, gb_free=2.8, wall=51463 2021-06-19 08:56:40 | INFO | train_inner | epoch 002: 1550 / 3002 loss=2.735, ppl=6.66, wps=5787, ups=0.09, wpb=64840, bsz=128, num_updates=4521, lr=9.99718e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=51474 2021-06-19 08:56:51 | INFO | train_inner | epoch 002: 1551 / 3002 loss=2.754, ppl=6.74, wps=5880, ups=0.09, wpb=64845, bsz=128, num_updates=4522, lr=9.99718e-05, gnorm=2.288, loss_scale=4, train_wall=11, gb_free=2.8, wall=51485 2021-06-19 08:57:02 | INFO | train_inner | epoch 002: 1552 / 3002 loss=2.716, ppl=6.57, wps=5942.9, ups=0.09, wpb=64846, bsz=128, num_updates=4523, lr=9.99718e-05, gnorm=2.159, loss_scale=4, train_wall=10, gb_free=2.8, wall=51496 2021-06-19 08:57:13 | INFO | train_inner | epoch 002: 1553 / 3002 loss=2.74, ppl=6.68, wps=5897.3, ups=0.09, wpb=64840, bsz=128, num_updates=4524, lr=9.99718e-05, gnorm=2.241, loss_scale=4, train_wall=11, gb_free=2.8, wall=51507 2021-06-19 08:57:24 | INFO | train_inner | epoch 002: 1554 / 3002 loss=2.645, ppl=6.25, wps=5900.8, ups=0.09, wpb=64796, bsz=128, num_updates=4525, lr=9.99718e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=51518 2021-06-19 08:57:34 | INFO | train_inner | epoch 002: 1555 / 3002 loss=2.8, ppl=6.96, wps=5939.7, ups=0.09, wpb=64885, bsz=128, num_updates=4526, lr=9.99718e-05, gnorm=2.233, loss_scale=4, train_wall=10, gb_free=2.8, wall=51529 2021-06-19 08:57:46 | INFO | train_inner | epoch 002: 1556 / 3002 loss=2.669, ppl=6.36, wps=5806.5, ups=0.09, wpb=64812, bsz=128, num_updates=4527, lr=9.99718e-05, gnorm=2.286, loss_scale=4, train_wall=11, gb_free=2.8, wall=51540 2021-06-19 08:57:57 | INFO | train_inner | epoch 002: 1557 / 3002 loss=2.847, ppl=7.19, wps=5806.5, ups=0.09, wpb=64826, bsz=128, num_updates=4528, lr=9.99718e-05, gnorm=2.313, loss_scale=4, train_wall=11, gb_free=2.8, wall=51551 2021-06-19 08:58:08 | INFO | train_inner | epoch 002: 1558 / 3002 loss=2.818, ppl=7.05, wps=5764.9, ups=0.09, wpb=64820, bsz=128, num_updates=4529, lr=9.99718e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=51562 2021-06-19 08:58:19 | INFO | train_inner | epoch 002: 1559 / 3002 loss=2.648, ppl=6.27, wps=5968.9, ups=0.09, wpb=64817, bsz=128, num_updates=4530, lr=9.99718e-05, gnorm=2.157, loss_scale=4, train_wall=10, gb_free=2.8, wall=51573 2021-06-19 08:58:30 | INFO | train_inner | epoch 002: 1560 / 3002 loss=2.814, ppl=7.03, wps=5760.7, ups=0.09, wpb=64818, bsz=128, num_updates=4531, lr=9.99717e-05, gnorm=2.326, loss_scale=4, train_wall=11, gb_free=2.8, wall=51585 2021-06-19 08:58:41 | INFO | train_inner | epoch 002: 1561 / 3002 loss=2.831, ppl=7.11, wps=5852.3, ups=0.09, wpb=64857, bsz=128, num_updates=4532, lr=9.99717e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=51596 2021-06-19 08:58:52 | INFO | train_inner | epoch 002: 1562 / 3002 loss=2.668, ppl=6.35, wps=5892, ups=0.09, wpb=64815, bsz=128, num_updates=4533, lr=9.99717e-05, gnorm=2.246, loss_scale=4, train_wall=11, gb_free=2.8, wall=51607 2021-06-19 08:59:03 | INFO | train_inner | epoch 002: 1563 / 3002 loss=2.668, ppl=6.36, wps=5958.9, ups=0.09, wpb=64853, bsz=128, num_updates=4534, lr=9.99717e-05, gnorm=2.213, loss_scale=4, train_wall=10, gb_free=2.8, wall=51617 2021-06-19 08:59:14 | INFO | train_inner | epoch 002: 1564 / 3002 loss=2.906, ppl=7.5, wps=5794.5, ups=0.09, wpb=64771, bsz=128, num_updates=4535, lr=9.99717e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=51629 2021-06-19 08:59:26 | INFO | train_inner | epoch 002: 1565 / 3002 loss=2.878, ppl=7.35, wps=5747, ups=0.09, wpb=64857, bsz=128, num_updates=4536, lr=9.99717e-05, gnorm=2.365, loss_scale=4, train_wall=11, gb_free=2.8, wall=51640 2021-06-19 08:59:37 | INFO | train_inner | epoch 002: 1566 / 3002 loss=2.82, ppl=7.06, wps=5844, ups=0.09, wpb=64786, bsz=128, num_updates=4537, lr=9.99717e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=51651 2021-06-19 08:59:48 | INFO | train_inner | epoch 002: 1567 / 3002 loss=2.742, ppl=6.69, wps=5798.7, ups=0.09, wpb=64827, bsz=128, num_updates=4538, lr=9.99717e-05, gnorm=2.422, loss_scale=4, train_wall=11, gb_free=2.8, wall=51662 2021-06-19 08:59:59 | INFO | train_inner | epoch 002: 1568 / 3002 loss=2.619, ppl=6.14, wps=5757.5, ups=0.09, wpb=64825, bsz=128, num_updates=4539, lr=9.99717e-05, gnorm=2.313, loss_scale=4, train_wall=11, gb_free=2.8, wall=51673 2021-06-19 09:00:10 | INFO | train_inner | epoch 002: 1569 / 3002 loss=2.776, ppl=6.85, wps=5716, ups=0.09, wpb=64803, bsz=128, num_updates=4540, lr=9.99717e-05, gnorm=2.191, loss_scale=4, train_wall=11, gb_free=2.8, wall=51685 2021-06-19 09:00:22 | INFO | train_inner | epoch 002: 1570 / 3002 loss=2.797, ppl=6.95, wps=5875.3, ups=0.09, wpb=64848, bsz=128, num_updates=4541, lr=9.99717e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=51696 2021-06-19 09:00:32 | INFO | train_inner | epoch 002: 1571 / 3002 loss=2.704, ppl=6.52, wps=5939.4, ups=0.09, wpb=64818, bsz=128, num_updates=4542, lr=9.99717e-05, gnorm=2.188, loss_scale=4, train_wall=10, gb_free=2.8, wall=51707 2021-06-19 09:00:44 | INFO | train_inner | epoch 002: 1572 / 3002 loss=2.734, ppl=6.65, wps=5733.2, ups=0.09, wpb=64832, bsz=128, num_updates=4543, lr=9.99717e-05, gnorm=2.125, loss_scale=4, train_wall=11, gb_free=2.8, wall=51718 2021-06-19 09:00:55 | INFO | train_inner | epoch 002: 1573 / 3002 loss=2.786, ppl=6.9, wps=5874.4, ups=0.09, wpb=64883, bsz=128, num_updates=4544, lr=9.99716e-05, gnorm=2.204, loss_scale=4, train_wall=11, gb_free=2.8, wall=51729 2021-06-19 09:01:06 | INFO | train_inner | epoch 002: 1574 / 3002 loss=2.64, ppl=6.24, wps=5931.2, ups=0.09, wpb=64837, bsz=128, num_updates=4545, lr=9.99716e-05, gnorm=2.449, loss_scale=4, train_wall=10, gb_free=2.8, wall=51740 2021-06-19 09:01:17 | INFO | train_inner | epoch 002: 1575 / 3002 loss=2.728, ppl=6.63, wps=5902.6, ups=0.09, wpb=64824, bsz=128, num_updates=4546, lr=9.99716e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=51751 2021-06-19 09:01:28 | INFO | train_inner | epoch 002: 1576 / 3002 loss=2.951, ppl=7.73, wps=5892.4, ups=0.09, wpb=64839, bsz=128, num_updates=4547, lr=9.99716e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=51762 2021-06-19 09:01:39 | INFO | train_inner | epoch 002: 1577 / 3002 loss=2.73, ppl=6.63, wps=5760.4, ups=0.09, wpb=64835, bsz=128, num_updates=4548, lr=9.99716e-05, gnorm=2.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=51773 2021-06-19 09:01:50 | INFO | train_inner | epoch 002: 1578 / 3002 loss=2.639, ppl=6.23, wps=5867.2, ups=0.09, wpb=64812, bsz=128, num_updates=4549, lr=9.99716e-05, gnorm=2.355, loss_scale=4, train_wall=11, gb_free=2.8, wall=51784 2021-06-19 09:02:01 | INFO | train_inner | epoch 002: 1579 / 3002 loss=2.609, ppl=6.1, wps=5967, ups=0.09, wpb=64844, bsz=128, num_updates=4550, lr=9.99716e-05, gnorm=2.137, loss_scale=4, train_wall=10, gb_free=2.8, wall=51795 2021-06-19 09:02:12 | INFO | train_inner | epoch 002: 1580 / 3002 loss=2.704, ppl=6.51, wps=5775.3, ups=0.09, wpb=64770, bsz=128, num_updates=4551, lr=9.99716e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=51806 2021-06-19 09:02:23 | INFO | train_inner | epoch 002: 1581 / 3002 loss=2.786, ppl=6.9, wps=5824, ups=0.09, wpb=64905, bsz=128, num_updates=4552, lr=9.99716e-05, gnorm=2.309, loss_scale=4, train_wall=11, gb_free=2.8, wall=51818 2021-06-19 09:02:34 | INFO | train_inner | epoch 002: 1582 / 3002 loss=2.711, ppl=6.55, wps=5875.8, ups=0.09, wpb=64794, bsz=128, num_updates=4553, lr=9.99716e-05, gnorm=2.214, loss_scale=4, train_wall=11, gb_free=2.8, wall=51829 2021-06-19 09:02:45 | INFO | train_inner | epoch 002: 1583 / 3002 loss=2.751, ppl=6.73, wps=5935.3, ups=0.09, wpb=64867, bsz=128, num_updates=4554, lr=9.99716e-05, gnorm=2.106, loss_scale=4, train_wall=10, gb_free=2.8, wall=51840 2021-06-19 09:02:56 | INFO | train_inner | epoch 002: 1584 / 3002 loss=2.563, ppl=5.91, wps=5802.3, ups=0.09, wpb=64850, bsz=128, num_updates=4555, lr=9.99716e-05, gnorm=2.35, loss_scale=4, train_wall=11, gb_free=2.8, wall=51851 2021-06-19 09:03:07 | INFO | train_inner | epoch 002: 1585 / 3002 loss=2.602, ppl=6.07, wps=5835.5, ups=0.09, wpb=64846, bsz=128, num_updates=4556, lr=9.99715e-05, gnorm=2.282, loss_scale=4, train_wall=11, gb_free=2.8, wall=51862 2021-06-19 09:03:19 | INFO | train_inner | epoch 002: 1586 / 3002 loss=2.779, ppl=6.86, wps=5805.8, ups=0.09, wpb=64909, bsz=128, num_updates=4557, lr=9.99715e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=51873 2021-06-19 09:03:30 | INFO | train_inner | epoch 002: 1587 / 3002 loss=2.901, ppl=7.47, wps=5799.9, ups=0.09, wpb=64808, bsz=128, num_updates=4558, lr=9.99715e-05, gnorm=2.92, loss_scale=4, train_wall=11, gb_free=2.8, wall=51884 2021-06-19 09:03:41 | INFO | train_inner | epoch 002: 1588 / 3002 loss=2.537, ppl=5.8, wps=5809.1, ups=0.09, wpb=64853, bsz=128, num_updates=4559, lr=9.99715e-05, gnorm=2.336, loss_scale=4, train_wall=11, gb_free=2.8, wall=51895 2021-06-19 09:03:52 | INFO | train_inner | epoch 002: 1589 / 3002 loss=2.699, ppl=6.49, wps=5836.7, ups=0.09, wpb=64805, bsz=128, num_updates=4560, lr=9.99715e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=51906 2021-06-19 09:04:03 | INFO | train_inner | epoch 002: 1590 / 3002 loss=2.784, ppl=6.89, wps=5961.8, ups=0.09, wpb=64925, bsz=128, num_updates=4561, lr=9.99715e-05, gnorm=2.305, loss_scale=4, train_wall=10, gb_free=2.8, wall=51917 2021-06-19 09:04:14 | INFO | train_inner | epoch 002: 1591 / 3002 loss=2.66, ppl=6.32, wps=5886.9, ups=0.09, wpb=64899, bsz=128, num_updates=4562, lr=9.99715e-05, gnorm=2.333, loss_scale=4, train_wall=11, gb_free=2.8, wall=51928 2021-06-19 09:04:25 | INFO | train_inner | epoch 002: 1592 / 3002 loss=2.814, ppl=7.03, wps=5888.8, ups=0.09, wpb=64837, bsz=128, num_updates=4563, lr=9.99715e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=51939 2021-06-19 09:04:36 | INFO | train_inner | epoch 002: 1593 / 3002 loss=2.873, ppl=7.33, wps=5832.9, ups=0.09, wpb=64846, bsz=128, num_updates=4564, lr=9.99715e-05, gnorm=2.237, loss_scale=4, train_wall=11, gb_free=2.8, wall=51950 2021-06-19 09:04:47 | INFO | train_inner | epoch 002: 1594 / 3002 loss=2.731, ppl=6.64, wps=5886.1, ups=0.09, wpb=64817, bsz=128, num_updates=4565, lr=9.99715e-05, gnorm=2.291, loss_scale=4, train_wall=11, gb_free=2.8, wall=51961 2021-06-19 09:04:58 | INFO | train_inner | epoch 002: 1595 / 3002 loss=2.663, ppl=6.33, wps=5828.7, ups=0.09, wpb=64804, bsz=128, num_updates=4566, lr=9.99715e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=51973 2021-06-19 09:05:10 | INFO | train_inner | epoch 002: 1596 / 3002 loss=2.691, ppl=6.46, wps=5766.3, ups=0.09, wpb=64820, bsz=128, num_updates=4567, lr=9.99715e-05, gnorm=2.228, loss_scale=4, train_wall=11, gb_free=2.8, wall=51984 2021-06-19 09:05:21 | INFO | train_inner | epoch 002: 1597 / 3002 loss=2.765, ppl=6.8, wps=5708.3, ups=0.09, wpb=64922, bsz=128, num_updates=4568, lr=9.99715e-05, gnorm=2.328, loss_scale=4, train_wall=11, gb_free=2.8, wall=51995 2021-06-19 09:05:32 | INFO | train_inner | epoch 002: 1598 / 3002 loss=2.78, ppl=6.87, wps=5691, ups=0.09, wpb=64772, bsz=128, num_updates=4569, lr=9.99714e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=52007 2021-06-19 09:05:43 | INFO | train_inner | epoch 002: 1599 / 3002 loss=2.639, ppl=6.23, wps=5779.7, ups=0.09, wpb=64868, bsz=128, num_updates=4570, lr=9.99714e-05, gnorm=2.211, loss_scale=4, train_wall=11, gb_free=2.8, wall=52018 2021-06-19 09:05:55 | INFO | train_inner | epoch 002: 1600 / 3002 loss=2.695, ppl=6.48, wps=5881.2, ups=0.09, wpb=64913, bsz=128, num_updates=4571, lr=9.99714e-05, gnorm=2.21, loss_scale=4, train_wall=11, gb_free=2.8, wall=52029 2021-06-19 09:06:06 | INFO | train_inner | epoch 002: 1601 / 3002 loss=2.798, ppl=6.95, wps=5806.7, ups=0.09, wpb=64743, bsz=128, num_updates=4572, lr=9.99714e-05, gnorm=2.741, loss_scale=4, train_wall=11, gb_free=2.8, wall=52040 2021-06-19 09:06:17 | INFO | train_inner | epoch 002: 1602 / 3002 loss=2.796, ppl=6.95, wps=5840.3, ups=0.09, wpb=64823, bsz=128, num_updates=4573, lr=9.99714e-05, gnorm=2.305, loss_scale=4, train_wall=11, gb_free=2.8, wall=52051 2021-06-19 09:06:28 | INFO | train_inner | epoch 002: 1603 / 3002 loss=2.752, ppl=6.73, wps=5880.8, ups=0.09, wpb=64889, bsz=128, num_updates=4574, lr=9.99714e-05, gnorm=2.312, loss_scale=4, train_wall=11, gb_free=2.8, wall=52062 2021-06-19 09:06:39 | INFO | train_inner | epoch 002: 1604 / 3002 loss=2.586, ppl=6, wps=5831.9, ups=0.09, wpb=64764, bsz=128, num_updates=4575, lr=9.99714e-05, gnorm=3.282, loss_scale=4, train_wall=11, gb_free=2.8, wall=52073 2021-06-19 09:06:50 | INFO | train_inner | epoch 002: 1605 / 3002 loss=2.723, ppl=6.6, wps=5761.8, ups=0.09, wpb=64830, bsz=128, num_updates=4576, lr=9.99714e-05, gnorm=2.54, loss_scale=4, train_wall=11, gb_free=2.8, wall=52084 2021-06-19 09:07:01 | INFO | train_inner | epoch 002: 1606 / 3002 loss=2.691, ppl=6.46, wps=5835.9, ups=0.09, wpb=64826, bsz=128, num_updates=4577, lr=9.99714e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=52096 2021-06-19 09:07:12 | INFO | train_inner | epoch 002: 1607 / 3002 loss=2.588, ppl=6.01, wps=5881.3, ups=0.09, wpb=64849, bsz=128, num_updates=4578, lr=9.99714e-05, gnorm=2.302, loss_scale=4, train_wall=11, gb_free=2.8, wall=52107 2021-06-19 09:07:23 | INFO | train_inner | epoch 002: 1608 / 3002 loss=2.932, ppl=7.63, wps=5900.8, ups=0.09, wpb=64885, bsz=128, num_updates=4579, lr=9.99714e-05, gnorm=2.164, loss_scale=4, train_wall=11, gb_free=2.8, wall=52118 2021-06-19 09:07:34 | INFO | train_inner | epoch 002: 1609 / 3002 loss=2.913, ppl=7.53, wps=5787.3, ups=0.09, wpb=64785, bsz=128, num_updates=4580, lr=9.99714e-05, gnorm=3.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=52129 2021-06-19 09:07:45 | INFO | train_inner | epoch 002: 1610 / 3002 loss=2.709, ppl=6.54, wps=5953.9, ups=0.09, wpb=64843, bsz=128, num_updates=4581, lr=9.99713e-05, gnorm=2.196, loss_scale=4, train_wall=10, gb_free=2.8, wall=52140 2021-06-19 09:07:57 | INFO | train_inner | epoch 002: 1611 / 3002 loss=2.8, ppl=6.96, wps=5808.6, ups=0.09, wpb=64755, bsz=128, num_updates=4582, lr=9.99713e-05, gnorm=7.908, loss_scale=4, train_wall=11, gb_free=2.8, wall=52151 2021-06-19 09:08:08 | INFO | train_inner | epoch 002: 1612 / 3002 loss=3.001, ppl=8, wps=5862.6, ups=0.09, wpb=64740, bsz=128, num_updates=4583, lr=9.99713e-05, gnorm=3.893, loss_scale=4, train_wall=11, gb_free=2.8, wall=52162 2021-06-19 09:08:19 | INFO | train_inner | epoch 002: 1613 / 3002 loss=2.799, ppl=6.96, wps=5922.7, ups=0.09, wpb=64846, bsz=128, num_updates=4584, lr=9.99713e-05, gnorm=2.221, loss_scale=4, train_wall=10, gb_free=2.8, wall=52173 2021-06-19 09:08:29 | INFO | train_inner | epoch 002: 1614 / 3002 loss=2.645, ppl=6.26, wps=5989.5, ups=0.09, wpb=64934, bsz=128, num_updates=4585, lr=9.99713e-05, gnorm=2.248, loss_scale=4, train_wall=10, gb_free=2.8, wall=52184 2021-06-19 09:08:41 | INFO | train_inner | epoch 002: 1615 / 3002 loss=2.823, ppl=7.08, wps=5785.9, ups=0.09, wpb=64834, bsz=128, num_updates=4586, lr=9.99713e-05, gnorm=3.786, loss_scale=4, train_wall=11, gb_free=2.8, wall=52195 2021-06-19 09:08:52 | INFO | train_inner | epoch 002: 1616 / 3002 loss=2.76, ppl=6.78, wps=5906.9, ups=0.09, wpb=64771, bsz=128, num_updates=4587, lr=9.99713e-05, gnorm=2.232, loss_scale=4, train_wall=10, gb_free=2.8, wall=52206 2021-06-19 09:09:02 | INFO | train_inner | epoch 002: 1617 / 3002 loss=2.756, ppl=6.75, wps=5943.4, ups=0.09, wpb=64851, bsz=128, num_updates=4588, lr=9.99713e-05, gnorm=2.276, loss_scale=4, train_wall=10, gb_free=2.8, wall=52217 2021-06-19 09:09:13 | INFO | train_inner | epoch 002: 1618 / 3002 loss=2.782, ppl=6.88, wps=5898.4, ups=0.09, wpb=64804, bsz=128, num_updates=4589, lr=9.99713e-05, gnorm=2.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=52228 2021-06-19 09:09:25 | INFO | train_inner | epoch 002: 1619 / 3002 loss=2.593, ppl=6.03, wps=5810.5, ups=0.09, wpb=64755, bsz=128, num_updates=4590, lr=9.99713e-05, gnorm=2.126, loss_scale=4, train_wall=11, gb_free=2.8, wall=52239 2021-06-19 09:09:36 | INFO | train_inner | epoch 002: 1620 / 3002 loss=2.659, ppl=6.32, wps=5841.4, ups=0.09, wpb=64850, bsz=128, num_updates=4591, lr=9.99713e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=52250 2021-06-19 09:09:47 | INFO | train_inner | epoch 002: 1621 / 3002 loss=2.75, ppl=6.73, wps=5921.2, ups=0.09, wpb=64890, bsz=128, num_updates=4592, lr=9.99713e-05, gnorm=2.343, loss_scale=4, train_wall=11, gb_free=2.8, wall=52261 2021-06-19 09:09:58 | INFO | train_inner | epoch 002: 1622 / 3002 loss=2.76, ppl=6.77, wps=5780.5, ups=0.09, wpb=64785, bsz=128, num_updates=4593, lr=9.99713e-05, gnorm=2.301, loss_scale=4, train_wall=11, gb_free=2.8, wall=52272 2021-06-19 09:10:09 | INFO | train_inner | epoch 002: 1623 / 3002 loss=2.822, ppl=7.07, wps=5872.3, ups=0.09, wpb=64831, bsz=128, num_updates=4594, lr=9.99712e-05, gnorm=2.452, loss_scale=4, train_wall=11, gb_free=2.8, wall=52283 2021-06-19 09:10:20 | INFO | train_inner | epoch 002: 1624 / 3002 loss=2.744, ppl=6.7, wps=5860.3, ups=0.09, wpb=64757, bsz=128, num_updates=4595, lr=9.99712e-05, gnorm=2.297, loss_scale=4, train_wall=11, gb_free=2.8, wall=52294 2021-06-19 09:10:31 | INFO | train_inner | epoch 002: 1625 / 3002 loss=2.618, ppl=6.14, wps=5873.8, ups=0.09, wpb=64892, bsz=128, num_updates=4596, lr=9.99712e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=52305 2021-06-19 09:10:42 | INFO | train_inner | epoch 002: 1626 / 3002 loss=2.692, ppl=6.46, wps=5898.7, ups=0.09, wpb=64794, bsz=128, num_updates=4597, lr=9.99712e-05, gnorm=2.143, loss_scale=4, train_wall=11, gb_free=2.8, wall=52316 2021-06-19 09:10:53 | INFO | train_inner | epoch 002: 1627 / 3002 loss=2.785, ppl=6.89, wps=5847.1, ups=0.09, wpb=64806, bsz=128, num_updates=4598, lr=9.99712e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=52327 2021-06-19 09:11:04 | INFO | train_inner | epoch 002: 1628 / 3002 loss=2.875, ppl=7.34, wps=5867.3, ups=0.09, wpb=64787, bsz=128, num_updates=4599, lr=9.99712e-05, gnorm=2.297, loss_scale=4, train_wall=11, gb_free=2.8, wall=52338 2021-06-19 09:11:15 | INFO | train_inner | epoch 002: 1629 / 3002 loss=2.709, ppl=6.54, wps=5798.3, ups=0.09, wpb=64859, bsz=128, num_updates=4600, lr=9.99712e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=52350 2021-06-19 09:11:26 | INFO | train_inner | epoch 002: 1630 / 3002 loss=2.857, ppl=7.24, wps=5787.7, ups=0.09, wpb=64776, bsz=128, num_updates=4601, lr=9.99712e-05, gnorm=2.764, loss_scale=4, train_wall=11, gb_free=2.8, wall=52361 2021-06-19 09:11:37 | INFO | train_inner | epoch 002: 1631 / 3002 loss=2.655, ppl=6.3, wps=5903.9, ups=0.09, wpb=64791, bsz=128, num_updates=4602, lr=9.99712e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=52372 2021-06-19 09:11:49 | INFO | train_inner | epoch 002: 1632 / 3002 loss=2.694, ppl=6.47, wps=5804.6, ups=0.09, wpb=64914, bsz=128, num_updates=4603, lr=9.99712e-05, gnorm=2.731, loss_scale=4, train_wall=11, gb_free=2.8, wall=52383 2021-06-19 09:12:00 | INFO | train_inner | epoch 002: 1633 / 3002 loss=2.677, ppl=6.4, wps=5838.7, ups=0.09, wpb=64932, bsz=128, num_updates=4604, lr=9.99712e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=52394 2021-06-19 09:12:11 | INFO | train_inner | epoch 002: 1634 / 3002 loss=2.758, ppl=6.77, wps=5846.4, ups=0.09, wpb=64810, bsz=128, num_updates=4605, lr=9.99712e-05, gnorm=3.028, loss_scale=4, train_wall=11, gb_free=2.8, wall=52405 2021-06-19 09:12:22 | INFO | train_inner | epoch 002: 1635 / 3002 loss=2.717, ppl=6.57, wps=5890.7, ups=0.09, wpb=64793, bsz=128, num_updates=4606, lr=9.99711e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=52416 2021-06-19 09:12:33 | INFO | train_inner | epoch 002: 1636 / 3002 loss=2.655, ppl=6.3, wps=5764.4, ups=0.09, wpb=64895, bsz=128, num_updates=4607, lr=9.99711e-05, gnorm=2.327, loss_scale=4, train_wall=11, gb_free=2.8, wall=52427 2021-06-19 09:12:44 | INFO | train_inner | epoch 002: 1637 / 3002 loss=2.792, ppl=6.92, wps=5822.7, ups=0.09, wpb=64703, bsz=128, num_updates=4608, lr=9.99711e-05, gnorm=2.33, loss_scale=4, train_wall=11, gb_free=2.8, wall=52439 2021-06-19 09:12:55 | INFO | train_inner | epoch 002: 1638 / 3002 loss=2.872, ppl=7.32, wps=5873.3, ups=0.09, wpb=64812, bsz=128, num_updates=4609, lr=9.99711e-05, gnorm=2.539, loss_scale=4, train_wall=11, gb_free=2.8, wall=52450 2021-06-19 09:13:06 | INFO | train_inner | epoch 002: 1639 / 3002 loss=2.761, ppl=6.78, wps=5946, ups=0.09, wpb=64791, bsz=128, num_updates=4610, lr=9.99711e-05, gnorm=2.754, loss_scale=4, train_wall=10, gb_free=2.8, wall=52460 2021-06-19 09:13:17 | INFO | train_inner | epoch 002: 1640 / 3002 loss=2.716, ppl=6.57, wps=5807.7, ups=0.09, wpb=64906, bsz=128, num_updates=4611, lr=9.99711e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=52472 2021-06-19 09:13:28 | INFO | train_inner | epoch 002: 1641 / 3002 loss=2.995, ppl=7.97, wps=5854.7, ups=0.09, wpb=64744, bsz=128, num_updates=4612, lr=9.99711e-05, gnorm=2.383, loss_scale=4, train_wall=11, gb_free=2.8, wall=52483 2021-06-19 09:13:40 | INFO | train_inner | epoch 002: 1642 / 3002 loss=2.645, ppl=6.26, wps=5786.7, ups=0.09, wpb=64837, bsz=128, num_updates=4613, lr=9.99711e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=52494 2021-06-19 09:13:51 | INFO | train_inner | epoch 002: 1643 / 3002 loss=2.873, ppl=7.32, wps=5741.8, ups=0.09, wpb=64831, bsz=128, num_updates=4614, lr=9.99711e-05, gnorm=2.348, loss_scale=4, train_wall=11, gb_free=2.8, wall=52505 2021-06-19 09:14:02 | INFO | train_inner | epoch 002: 1644 / 3002 loss=2.706, ppl=6.52, wps=5903.4, ups=0.09, wpb=64816, bsz=128, num_updates=4615, lr=9.99711e-05, gnorm=2.293, loss_scale=4, train_wall=11, gb_free=2.8, wall=52516 2021-06-19 09:14:13 | INFO | train_inner | epoch 002: 1645 / 3002 loss=2.781, ppl=6.88, wps=5951, ups=0.09, wpb=64896, bsz=128, num_updates=4616, lr=9.99711e-05, gnorm=2.445, loss_scale=4, train_wall=10, gb_free=2.8, wall=52527 2021-06-19 09:14:24 | INFO | train_inner | epoch 002: 1646 / 3002 loss=2.883, ppl=7.38, wps=5783.7, ups=0.09, wpb=64803, bsz=128, num_updates=4617, lr=9.99711e-05, gnorm=2.325, loss_scale=4, train_wall=11, gb_free=2.8, wall=52538 2021-06-19 09:14:35 | INFO | train_inner | epoch 002: 1647 / 3002 loss=2.819, ppl=7.06, wps=5799.9, ups=0.09, wpb=64788, bsz=128, num_updates=4618, lr=9.99711e-05, gnorm=12.636, loss_scale=4, train_wall=11, gb_free=2.8, wall=52549 2021-06-19 09:14:46 | INFO | train_inner | epoch 002: 1648 / 3002 loss=2.819, ppl=7.06, wps=5913.2, ups=0.09, wpb=64777, bsz=128, num_updates=4619, lr=9.9971e-05, gnorm=2.205, loss_scale=4, train_wall=10, gb_free=2.8, wall=52560 2021-06-19 09:14:57 | INFO | train_inner | epoch 002: 1649 / 3002 loss=2.816, ppl=7.04, wps=5875.3, ups=0.09, wpb=64840, bsz=128, num_updates=4620, lr=9.9971e-05, gnorm=2.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=52571 2021-06-19 09:15:08 | INFO | train_inner | epoch 002: 1650 / 3002 loss=2.627, ppl=6.18, wps=5869.7, ups=0.09, wpb=64865, bsz=128, num_updates=4621, lr=9.9971e-05, gnorm=2.297, loss_scale=4, train_wall=11, gb_free=2.8, wall=52583 2021-06-19 09:15:19 | INFO | train_inner | epoch 002: 1651 / 3002 loss=2.636, ppl=6.22, wps=5853.6, ups=0.09, wpb=64797, bsz=128, num_updates=4622, lr=9.9971e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=52594 2021-06-19 09:15:30 | INFO | train_inner | epoch 002: 1652 / 3002 loss=2.792, ppl=6.92, wps=5807.2, ups=0.09, wpb=64872, bsz=128, num_updates=4623, lr=9.9971e-05, gnorm=2.325, loss_scale=4, train_wall=11, gb_free=2.8, wall=52605 2021-06-19 09:15:41 | INFO | train_inner | epoch 002: 1653 / 3002 loss=2.772, ppl=6.83, wps=5939.9, ups=0.09, wpb=64803, bsz=128, num_updates=4624, lr=9.9971e-05, gnorm=2.641, loss_scale=4, train_wall=10, gb_free=2.8, wall=52616 2021-06-19 09:15:52 | INFO | train_inner | epoch 002: 1654 / 3002 loss=2.823, ppl=7.07, wps=5810.6, ups=0.09, wpb=64765, bsz=128, num_updates=4625, lr=9.9971e-05, gnorm=2.25, loss_scale=4, train_wall=11, gb_free=2.8, wall=52627 2021-06-19 09:16:04 | INFO | train_inner | epoch 002: 1655 / 3002 loss=2.789, ppl=6.91, wps=5682.1, ups=0.09, wpb=64860, bsz=128, num_updates=4626, lr=9.9971e-05, gnorm=2.176, loss_scale=4, train_wall=11, gb_free=2.8, wall=52638 2021-06-19 09:16:15 | INFO | train_inner | epoch 002: 1656 / 3002 loss=2.85, ppl=7.21, wps=5850.7, ups=0.09, wpb=64844, bsz=128, num_updates=4627, lr=9.9971e-05, gnorm=2.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=52649 2021-06-19 09:16:26 | INFO | train_inner | epoch 002: 1657 / 3002 loss=2.858, ppl=7.25, wps=5911.5, ups=0.09, wpb=64745, bsz=128, num_updates=4628, lr=9.9971e-05, gnorm=2.423, loss_scale=4, train_wall=11, gb_free=2.8, wall=52660 2021-06-19 09:16:37 | INFO | train_inner | epoch 002: 1658 / 3002 loss=2.703, ppl=6.51, wps=5807.3, ups=0.09, wpb=64761, bsz=128, num_updates=4629, lr=9.9971e-05, gnorm=2.294, loss_scale=4, train_wall=11, gb_free=2.8, wall=52671 2021-06-19 09:16:48 | INFO | train_inner | epoch 002: 1659 / 3002 loss=2.77, ppl=6.82, wps=5729.6, ups=0.09, wpb=64736, bsz=128, num_updates=4630, lr=9.9971e-05, gnorm=2.355, loss_scale=4, train_wall=11, gb_free=2.8, wall=52683 2021-06-19 09:17:00 | INFO | train_inner | epoch 002: 1660 / 3002 loss=2.653, ppl=6.29, wps=5782.9, ups=0.09, wpb=64830, bsz=128, num_updates=4631, lr=9.99709e-05, gnorm=2.397, loss_scale=4, train_wall=11, gb_free=2.8, wall=52694 2021-06-19 09:17:11 | INFO | train_inner | epoch 002: 1661 / 3002 loss=2.697, ppl=6.48, wps=5875.2, ups=0.09, wpb=64761, bsz=128, num_updates=4632, lr=9.99709e-05, gnorm=2.359, loss_scale=4, train_wall=11, gb_free=2.8, wall=52705 2021-06-19 09:17:22 | INFO | train_inner | epoch 002: 1662 / 3002 loss=2.837, ppl=7.15, wps=5815.6, ups=0.09, wpb=64839, bsz=128, num_updates=4633, lr=9.99709e-05, gnorm=2.324, loss_scale=4, train_wall=11, gb_free=2.8, wall=52716 2021-06-19 09:17:33 | INFO | train_inner | epoch 002: 1663 / 3002 loss=2.749, ppl=6.72, wps=5869.6, ups=0.09, wpb=64920, bsz=128, num_updates=4634, lr=9.99709e-05, gnorm=3.591, loss_scale=4, train_wall=11, gb_free=2.8, wall=52727 2021-06-19 09:17:44 | INFO | train_inner | epoch 002: 1664 / 3002 loss=2.786, ppl=6.9, wps=5919.7, ups=0.09, wpb=64855, bsz=128, num_updates=4635, lr=9.99709e-05, gnorm=2.214, loss_scale=4, train_wall=10, gb_free=2.8, wall=52738 2021-06-19 09:17:55 | INFO | train_inner | epoch 002: 1665 / 3002 loss=2.787, ppl=6.9, wps=6001.4, ups=0.09, wpb=64908, bsz=128, num_updates=4636, lr=9.99709e-05, gnorm=2.169, loss_scale=4, train_wall=10, gb_free=2.8, wall=52749 2021-06-19 09:18:06 | INFO | train_inner | epoch 002: 1666 / 3002 loss=2.73, ppl=6.63, wps=5841.2, ups=0.09, wpb=64935, bsz=128, num_updates=4637, lr=9.99709e-05, gnorm=2.13, loss_scale=4, train_wall=11, gb_free=2.8, wall=52760 2021-06-19 09:18:17 | INFO | train_inner | epoch 002: 1667 / 3002 loss=2.62, ppl=6.15, wps=5813.6, ups=0.09, wpb=64869, bsz=128, num_updates=4638, lr=9.99709e-05, gnorm=2.627, loss_scale=4, train_wall=11, gb_free=2.8, wall=52771 2021-06-19 09:18:28 | INFO | train_inner | epoch 002: 1668 / 3002 loss=2.665, ppl=6.34, wps=5940.6, ups=0.09, wpb=64802, bsz=128, num_updates=4639, lr=9.99709e-05, gnorm=2.271, loss_scale=4, train_wall=10, gb_free=2.8, wall=52782 2021-06-19 09:18:39 | INFO | train_inner | epoch 002: 1669 / 3002 loss=2.733, ppl=6.65, wps=5842.4, ups=0.09, wpb=64804, bsz=128, num_updates=4640, lr=9.99709e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=52793 2021-06-19 09:18:50 | INFO | train_inner | epoch 002: 1670 / 3002 loss=2.836, ppl=7.14, wps=5847.7, ups=0.09, wpb=64761, bsz=128, num_updates=4641, lr=9.99709e-05, gnorm=2.402, loss_scale=4, train_wall=11, gb_free=2.8, wall=52804 2021-06-19 09:19:01 | INFO | train_inner | epoch 002: 1671 / 3002 loss=2.768, ppl=6.81, wps=5893.7, ups=0.09, wpb=64764, bsz=128, num_updates=4642, lr=9.99709e-05, gnorm=2.181, loss_scale=4, train_wall=11, gb_free=2.8, wall=52815 2021-06-19 09:19:12 | INFO | train_inner | epoch 002: 1672 / 3002 loss=2.781, ppl=6.87, wps=5743.5, ups=0.09, wpb=64908, bsz=128, num_updates=4643, lr=9.99709e-05, gnorm=2.466, loss_scale=4, train_wall=11, gb_free=2.8, wall=52827 2021-06-19 09:19:23 | INFO | train_inner | epoch 002: 1673 / 3002 loss=2.692, ppl=6.46, wps=5862.9, ups=0.09, wpb=64801, bsz=128, num_updates=4644, lr=9.99708e-05, gnorm=2.209, loss_scale=8, train_wall=11, gb_free=2.8, wall=52838 2021-06-19 09:19:34 | INFO | train_inner | epoch 002: 1674 / 3002 loss=2.819, ppl=7.05, wps=5822.2, ups=0.09, wpb=64780, bsz=128, num_updates=4645, lr=9.99708e-05, gnorm=2.189, loss_scale=8, train_wall=11, gb_free=2.8, wall=52849 2021-06-19 09:19:46 | INFO | train_inner | epoch 002: 1675 / 3002 loss=2.716, ppl=6.57, wps=5845.1, ups=0.09, wpb=64836, bsz=128, num_updates=4646, lr=9.99708e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=52860 2021-06-19 09:19:57 | INFO | train_inner | epoch 002: 1676 / 3002 loss=2.651, ppl=6.28, wps=5768.5, ups=0.09, wpb=64842, bsz=128, num_updates=4647, lr=9.99708e-05, gnorm=2.277, loss_scale=8, train_wall=11, gb_free=2.8, wall=52871 2021-06-19 09:20:08 | INFO | train_inner | epoch 002: 1677 / 3002 loss=2.779, ppl=6.86, wps=5848.6, ups=0.09, wpb=64822, bsz=128, num_updates=4648, lr=9.99708e-05, gnorm=2.259, loss_scale=8, train_wall=11, gb_free=2.8, wall=52882 2021-06-19 09:20:19 | INFO | train_inner | epoch 002: 1678 / 3002 loss=2.684, ppl=6.43, wps=5820.6, ups=0.09, wpb=64846, bsz=128, num_updates=4649, lr=9.99708e-05, gnorm=2.229, loss_scale=8, train_wall=11, gb_free=2.8, wall=52893 2021-06-19 09:20:30 | INFO | train_inner | epoch 002: 1679 / 3002 loss=2.847, ppl=7.19, wps=5730, ups=0.09, wpb=64791, bsz=128, num_updates=4650, lr=9.99708e-05, gnorm=2.466, loss_scale=8, train_wall=11, gb_free=2.8, wall=52905 2021-06-19 09:20:41 | INFO | train_inner | epoch 002: 1680 / 3002 loss=2.786, ppl=6.9, wps=5831.5, ups=0.09, wpb=64866, bsz=128, num_updates=4651, lr=9.99708e-05, gnorm=2.207, loss_scale=8, train_wall=11, gb_free=2.8, wall=52916 2021-06-19 09:20:52 | INFO | train_inner | epoch 002: 1681 / 3002 loss=2.676, ppl=6.39, wps=5865.2, ups=0.09, wpb=64810, bsz=128, num_updates=4652, lr=9.99708e-05, gnorm=2.337, loss_scale=8, train_wall=11, gb_free=2.8, wall=52927 2021-06-19 09:21:04 | INFO | train_inner | epoch 002: 1682 / 3002 loss=2.757, ppl=6.76, wps=5747.7, ups=0.09, wpb=64846, bsz=128, num_updates=4653, lr=9.99708e-05, gnorm=2.15, loss_scale=8, train_wall=11, gb_free=2.8, wall=52938 2021-06-19 09:21:15 | INFO | train_inner | epoch 002: 1683 / 3002 loss=2.856, ppl=7.24, wps=5906, ups=0.09, wpb=64840, bsz=128, num_updates=4654, lr=9.99708e-05, gnorm=2.297, loss_scale=8, train_wall=11, gb_free=2.8, wall=52949 2021-06-19 09:21:26 | INFO | train_inner | epoch 002: 1684 / 3002 loss=2.773, ppl=6.84, wps=5783.6, ups=0.09, wpb=64846, bsz=128, num_updates=4655, lr=9.99708e-05, gnorm=2.255, loss_scale=8, train_wall=11, gb_free=2.8, wall=52960 2021-06-19 09:21:37 | INFO | train_inner | epoch 002: 1685 / 3002 loss=2.782, ppl=6.88, wps=5891.3, ups=0.09, wpb=64748, bsz=128, num_updates=4656, lr=9.99707e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=52971 2021-06-19 09:21:48 | INFO | train_inner | epoch 002: 1686 / 3002 loss=2.749, ppl=6.72, wps=5813.5, ups=0.09, wpb=64849, bsz=128, num_updates=4657, lr=9.99707e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=52982 2021-06-19 09:21:59 | INFO | train_inner | epoch 002: 1687 / 3002 loss=2.862, ppl=7.27, wps=5854.2, ups=0.09, wpb=64829, bsz=128, num_updates=4658, lr=9.99707e-05, gnorm=2.319, loss_scale=8, train_wall=11, gb_free=2.8, wall=52993 2021-06-19 09:22:10 | INFO | train_inner | epoch 002: 1688 / 3002 loss=2.761, ppl=6.78, wps=5796, ups=0.09, wpb=64748, bsz=128, num_updates=4659, lr=9.99707e-05, gnorm=2.316, loss_scale=8, train_wall=11, gb_free=2.8, wall=53005 2021-06-19 09:22:21 | INFO | train_inner | epoch 002: 1689 / 3002 loss=2.551, ppl=5.86, wps=5843.9, ups=0.09, wpb=64875, bsz=128, num_updates=4660, lr=9.99707e-05, gnorm=2.161, loss_scale=8, train_wall=11, gb_free=2.8, wall=53016 2021-06-19 09:22:32 | INFO | train_inner | epoch 002: 1690 / 3002 loss=2.809, ppl=7.01, wps=5875.3, ups=0.09, wpb=64796, bsz=128, num_updates=4661, lr=9.99707e-05, gnorm=2.332, loss_scale=8, train_wall=11, gb_free=2.8, wall=53027 2021-06-19 09:22:44 | INFO | train_inner | epoch 002: 1691 / 3002 loss=2.725, ppl=6.61, wps=5820.5, ups=0.09, wpb=64812, bsz=128, num_updates=4662, lr=9.99707e-05, gnorm=2.211, loss_scale=8, train_wall=11, gb_free=2.8, wall=53038 2021-06-19 09:22:55 | INFO | train_inner | epoch 002: 1692 / 3002 loss=2.662, ppl=6.33, wps=5798.4, ups=0.09, wpb=64793, bsz=128, num_updates=4663, lr=9.99707e-05, gnorm=2.276, loss_scale=8, train_wall=11, gb_free=2.8, wall=53049 2021-06-19 09:23:06 | INFO | train_inner | epoch 002: 1693 / 3002 loss=2.62, ppl=6.15, wps=5911, ups=0.09, wpb=64825, bsz=128, num_updates=4664, lr=9.99707e-05, gnorm=2.304, loss_scale=8, train_wall=10, gb_free=2.8, wall=53060 2021-06-19 09:23:17 | INFO | train_inner | epoch 002: 1694 / 3002 loss=2.734, ppl=6.65, wps=5822.5, ups=0.09, wpb=64849, bsz=128, num_updates=4665, lr=9.99707e-05, gnorm=2.127, loss_scale=8, train_wall=11, gb_free=2.8, wall=53071 2021-06-19 09:23:28 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-19 09:23:39 | INFO | train_inner | epoch 002: 1696 / 3002 loss=2.824, ppl=7.08, wps=2891.5, ups=0.04, wpb=64803, bsz=128, num_updates=4666, lr=9.99707e-05, gnorm=2.259, loss_scale=4, train_wall=21, gb_free=2.8, wall=53094 2021-06-19 09:23:50 | INFO | train_inner | epoch 002: 1697 / 3002 loss=2.614, ppl=6.12, wps=5846.6, ups=0.09, wpb=64873, bsz=128, num_updates=4667, lr=9.99707e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=53105 2021-06-19 09:24:02 | INFO | train_inner | epoch 002: 1698 / 3002 loss=2.76, ppl=6.77, wps=5697.1, ups=0.09, wpb=64754, bsz=128, num_updates=4668, lr=9.99707e-05, gnorm=2.191, loss_scale=4, train_wall=11, gb_free=2.8, wall=53116 2021-06-19 09:24:13 | INFO | train_inner | epoch 002: 1699 / 3002 loss=2.751, ppl=6.73, wps=5879.8, ups=0.09, wpb=64903, bsz=128, num_updates=4669, lr=9.99706e-05, gnorm=2.266, loss_scale=4, train_wall=11, gb_free=2.8, wall=53127 2021-06-19 09:24:24 | INFO | train_inner | epoch 002: 1700 / 3002 loss=2.746, ppl=6.71, wps=5861.3, ups=0.09, wpb=64770, bsz=128, num_updates=4670, lr=9.99706e-05, gnorm=2.199, loss_scale=4, train_wall=11, gb_free=2.8, wall=53138 2021-06-19 09:24:35 | INFO | train_inner | epoch 002: 1701 / 3002 loss=2.812, ppl=7.02, wps=5785.1, ups=0.09, wpb=64769, bsz=128, num_updates=4671, lr=9.99706e-05, gnorm=2.389, loss_scale=4, train_wall=11, gb_free=2.8, wall=53149 2021-06-19 09:24:46 | INFO | train_inner | epoch 002: 1702 / 3002 loss=2.619, ppl=6.14, wps=5971.4, ups=0.09, wpb=64895, bsz=128, num_updates=4672, lr=9.99706e-05, gnorm=2.226, loss_scale=4, train_wall=10, gb_free=2.8, wall=53160 2021-06-19 09:24:57 | INFO | train_inner | epoch 002: 1703 / 3002 loss=2.752, ppl=6.73, wps=5966, ups=0.09, wpb=64877, bsz=128, num_updates=4673, lr=9.99706e-05, gnorm=2.337, loss_scale=4, train_wall=10, gb_free=2.8, wall=53171 2021-06-19 09:25:08 | INFO | train_inner | epoch 002: 1704 / 3002 loss=2.607, ppl=6.09, wps=5853.4, ups=0.09, wpb=64843, bsz=128, num_updates=4674, lr=9.99706e-05, gnorm=2.154, loss_scale=4, train_wall=11, gb_free=2.8, wall=53182 2021-06-19 09:25:19 | INFO | train_inner | epoch 002: 1705 / 3002 loss=2.695, ppl=6.48, wps=5853.5, ups=0.09, wpb=64857, bsz=128, num_updates=4675, lr=9.99706e-05, gnorm=2.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=53193 2021-06-19 09:25:30 | INFO | train_inner | epoch 002: 1706 / 3002 loss=2.685, ppl=6.43, wps=5771.5, ups=0.09, wpb=64840, bsz=128, num_updates=4676, lr=9.99706e-05, gnorm=2.21, loss_scale=4, train_wall=11, gb_free=2.8, wall=53204 2021-06-19 09:25:41 | INFO | train_inner | epoch 002: 1707 / 3002 loss=2.781, ppl=6.87, wps=5832, ups=0.09, wpb=64707, bsz=128, num_updates=4677, lr=9.99706e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=53216 2021-06-19 09:25:53 | INFO | train_inner | epoch 002: 1708 / 3002 loss=2.792, ppl=6.93, wps=5760.1, ups=0.09, wpb=64885, bsz=128, num_updates=4678, lr=9.99706e-05, gnorm=2.51, loss_scale=4, train_wall=11, gb_free=2.8, wall=53227 2021-06-19 09:26:03 | INFO | train_inner | epoch 002: 1709 / 3002 loss=2.785, ppl=6.89, wps=5959, ups=0.09, wpb=64905, bsz=128, num_updates=4679, lr=9.99706e-05, gnorm=2.129, loss_scale=4, train_wall=10, gb_free=2.8, wall=53238 2021-06-19 09:26:15 | INFO | train_inner | epoch 002: 1710 / 3002 loss=2.719, ppl=6.58, wps=5735.6, ups=0.09, wpb=64796, bsz=128, num_updates=4680, lr=9.99706e-05, gnorm=2.324, loss_scale=4, train_wall=11, gb_free=2.8, wall=53249 2021-06-19 09:26:26 | INFO | train_inner | epoch 002: 1711 / 3002 loss=2.716, ppl=6.57, wps=5752.3, ups=0.09, wpb=64786, bsz=128, num_updates=4681, lr=9.99705e-05, gnorm=2.199, loss_scale=4, train_wall=11, gb_free=2.8, wall=53260 2021-06-19 09:26:37 | INFO | train_inner | epoch 002: 1712 / 3002 loss=2.708, ppl=6.53, wps=5887.6, ups=0.09, wpb=64827, bsz=128, num_updates=4682, lr=9.99705e-05, gnorm=3.823, loss_scale=4, train_wall=11, gb_free=2.8, wall=53271 2021-06-19 09:26:48 | INFO | train_inner | epoch 002: 1713 / 3002 loss=2.667, ppl=6.35, wps=5798.7, ups=0.09, wpb=64812, bsz=128, num_updates=4683, lr=9.99705e-05, gnorm=2.586, loss_scale=4, train_wall=11, gb_free=2.8, wall=53282 2021-06-19 09:26:59 | INFO | train_inner | epoch 002: 1714 / 3002 loss=2.762, ppl=6.78, wps=5946.1, ups=0.09, wpb=64846, bsz=128, num_updates=4684, lr=9.99705e-05, gnorm=2.259, loss_scale=4, train_wall=10, gb_free=2.8, wall=53293 2021-06-19 09:27:10 | INFO | train_inner | epoch 002: 1715 / 3002 loss=2.861, ppl=7.26, wps=5930.8, ups=0.09, wpb=64901, bsz=128, num_updates=4685, lr=9.99705e-05, gnorm=2.57, loss_scale=4, train_wall=10, gb_free=2.8, wall=53304 2021-06-19 09:27:21 | INFO | train_inner | epoch 002: 1716 / 3002 loss=2.742, ppl=6.69, wps=5820, ups=0.09, wpb=64801, bsz=128, num_updates=4686, lr=9.99705e-05, gnorm=2.304, loss_scale=4, train_wall=11, gb_free=2.8, wall=53315 2021-06-19 09:27:32 | INFO | train_inner | epoch 002: 1717 / 3002 loss=2.871, ppl=7.31, wps=5865.6, ups=0.09, wpb=64767, bsz=128, num_updates=4687, lr=9.99705e-05, gnorm=2.206, loss_scale=4, train_wall=11, gb_free=2.8, wall=53327 2021-06-19 09:27:43 | INFO | train_inner | epoch 002: 1718 / 3002 loss=2.826, ppl=7.09, wps=5963.7, ups=0.09, wpb=64935, bsz=128, num_updates=4688, lr=9.99705e-05, gnorm=2.265, loss_scale=4, train_wall=10, gb_free=2.8, wall=53337 2021-06-19 09:27:54 | INFO | train_inner | epoch 002: 1719 / 3002 loss=2.955, ppl=7.76, wps=5849.3, ups=0.09, wpb=64804, bsz=128, num_updates=4689, lr=9.99705e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=53348 2021-06-19 09:28:05 | INFO | train_inner | epoch 002: 1720 / 3002 loss=2.75, ppl=6.73, wps=5743.1, ups=0.09, wpb=64901, bsz=128, num_updates=4690, lr=9.99705e-05, gnorm=2.251, loss_scale=4, train_wall=11, gb_free=2.8, wall=53360 2021-06-19 09:28:17 | INFO | train_inner | epoch 002: 1721 / 3002 loss=2.634, ppl=6.21, wps=5810.9, ups=0.09, wpb=64852, bsz=128, num_updates=4691, lr=9.99705e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=53371 2021-06-19 09:28:28 | INFO | train_inner | epoch 002: 1722 / 3002 loss=2.773, ppl=6.84, wps=5955.9, ups=0.09, wpb=64881, bsz=128, num_updates=4692, lr=9.99705e-05, gnorm=2.251, loss_scale=4, train_wall=10, gb_free=2.8, wall=53382 2021-06-19 09:28:39 | INFO | train_inner | epoch 002: 1723 / 3002 loss=2.862, ppl=7.27, wps=5900.7, ups=0.09, wpb=64864, bsz=128, num_updates=4693, lr=9.99705e-05, gnorm=2.236, loss_scale=4, train_wall=11, gb_free=2.8, wall=53393 2021-06-19 09:28:50 | INFO | train_inner | epoch 002: 1724 / 3002 loss=2.709, ppl=6.54, wps=5814.5, ups=0.09, wpb=64892, bsz=128, num_updates=4694, lr=9.99704e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=53404 2021-06-19 09:29:01 | INFO | train_inner | epoch 002: 1725 / 3002 loss=2.568, ppl=5.93, wps=5971.9, ups=0.09, wpb=64832, bsz=128, num_updates=4695, lr=9.99704e-05, gnorm=2.199, loss_scale=4, train_wall=10, gb_free=2.8, wall=53415 2021-06-19 09:29:12 | INFO | train_inner | epoch 002: 1726 / 3002 loss=2.871, ppl=7.32, wps=5786.9, ups=0.09, wpb=64792, bsz=128, num_updates=4696, lr=9.99704e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=53426 2021-06-19 09:29:23 | INFO | train_inner | epoch 002: 1727 / 3002 loss=2.716, ppl=6.57, wps=5850.4, ups=0.09, wpb=64803, bsz=128, num_updates=4697, lr=9.99704e-05, gnorm=2.166, loss_scale=4, train_wall=11, gb_free=2.8, wall=53437 2021-06-19 09:29:34 | INFO | train_inner | epoch 002: 1728 / 3002 loss=2.833, ppl=7.13, wps=5914.8, ups=0.09, wpb=64813, bsz=128, num_updates=4698, lr=9.99704e-05, gnorm=2.233, loss_scale=4, train_wall=10, gb_free=2.8, wall=53448 2021-06-19 09:29:45 | INFO | train_inner | epoch 002: 1729 / 3002 loss=2.861, ppl=7.27, wps=5868.3, ups=0.09, wpb=64789, bsz=128, num_updates=4699, lr=9.99704e-05, gnorm=2.263, loss_scale=4, train_wall=11, gb_free=2.8, wall=53459 2021-06-19 09:29:56 | INFO | train_inner | epoch 002: 1730 / 3002 loss=2.72, ppl=6.59, wps=5960.5, ups=0.09, wpb=64807, bsz=128, num_updates=4700, lr=9.99704e-05, gnorm=2.464, loss_scale=4, train_wall=10, gb_free=2.8, wall=53470 2021-06-19 09:30:07 | INFO | train_inner | epoch 002: 1731 / 3002 loss=2.698, ppl=6.49, wps=5854.1, ups=0.09, wpb=64877, bsz=128, num_updates=4701, lr=9.99704e-05, gnorm=2.137, loss_scale=4, train_wall=11, gb_free=2.8, wall=53481 2021-06-19 09:30:18 | INFO | train_inner | epoch 002: 1732 / 3002 loss=2.737, ppl=6.67, wps=5901.7, ups=0.09, wpb=64923, bsz=128, num_updates=4702, lr=9.99704e-05, gnorm=2.183, loss_scale=4, train_wall=11, gb_free=2.8, wall=53492 2021-06-19 09:30:29 | INFO | train_inner | epoch 002: 1733 / 3002 loss=2.671, ppl=6.37, wps=5920.7, ups=0.09, wpb=64763, bsz=128, num_updates=4703, lr=9.99704e-05, gnorm=2.193, loss_scale=4, train_wall=10, gb_free=2.8, wall=53503 2021-06-19 09:30:40 | INFO | train_inner | epoch 002: 1734 / 3002 loss=2.754, ppl=6.74, wps=5904.2, ups=0.09, wpb=64793, bsz=128, num_updates=4704, lr=9.99704e-05, gnorm=2.205, loss_scale=4, train_wall=11, gb_free=2.8, wall=53514 2021-06-19 09:30:51 | INFO | train_inner | epoch 002: 1735 / 3002 loss=2.696, ppl=6.48, wps=5773.4, ups=0.09, wpb=64909, bsz=128, num_updates=4705, lr=9.99704e-05, gnorm=2.281, loss_scale=4, train_wall=11, gb_free=2.8, wall=53525 2021-06-19 09:31:02 | INFO | train_inner | epoch 002: 1736 / 3002 loss=2.667, ppl=6.35, wps=5822.4, ups=0.09, wpb=64822, bsz=128, num_updates=4706, lr=9.99703e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=53536 2021-06-19 09:31:13 | INFO | train_inner | epoch 002: 1737 / 3002 loss=2.534, ppl=5.79, wps=5768, ups=0.09, wpb=64799, bsz=128, num_updates=4707, lr=9.99703e-05, gnorm=2.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=53548 2021-06-19 09:31:24 | INFO | train_inner | epoch 002: 1738 / 3002 loss=2.71, ppl=6.54, wps=5813.4, ups=0.09, wpb=64820, bsz=128, num_updates=4708, lr=9.99703e-05, gnorm=2.105, loss_scale=4, train_wall=11, gb_free=2.8, wall=53559 2021-06-19 09:31:36 | INFO | train_inner | epoch 002: 1739 / 3002 loss=2.565, ppl=5.92, wps=5768.5, ups=0.09, wpb=64875, bsz=128, num_updates=4709, lr=9.99703e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=53570 2021-06-19 09:31:47 | INFO | train_inner | epoch 002: 1740 / 3002 loss=2.66, ppl=6.32, wps=5915.7, ups=0.09, wpb=64902, bsz=128, num_updates=4710, lr=9.99703e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=53581 2021-06-19 09:31:58 | INFO | train_inner | epoch 002: 1741 / 3002 loss=2.604, ppl=6.08, wps=5792.2, ups=0.09, wpb=64914, bsz=128, num_updates=4711, lr=9.99703e-05, gnorm=2.378, loss_scale=4, train_wall=11, gb_free=2.8, wall=53592 2021-06-19 09:32:09 | INFO | train_inner | epoch 002: 1742 / 3002 loss=2.759, ppl=6.77, wps=5936.1, ups=0.09, wpb=64901, bsz=128, num_updates=4712, lr=9.99703e-05, gnorm=2.254, loss_scale=4, train_wall=10, gb_free=2.8, wall=53603 2021-06-19 09:32:20 | INFO | train_inner | epoch 002: 1743 / 3002 loss=2.817, ppl=7.05, wps=5923.1, ups=0.09, wpb=64785, bsz=128, num_updates=4713, lr=9.99703e-05, gnorm=2.235, loss_scale=4, train_wall=10, gb_free=2.8, wall=53614 2021-06-19 09:32:31 | INFO | train_inner | epoch 002: 1744 / 3002 loss=2.707, ppl=6.53, wps=5815.1, ups=0.09, wpb=64879, bsz=128, num_updates=4714, lr=9.99703e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=53625 2021-06-19 09:32:42 | INFO | train_inner | epoch 002: 1745 / 3002 loss=2.819, ppl=7.06, wps=5833.1, ups=0.09, wpb=64856, bsz=128, num_updates=4715, lr=9.99703e-05, gnorm=2.328, loss_scale=4, train_wall=11, gb_free=2.8, wall=53636 2021-06-19 09:32:53 | INFO | train_inner | epoch 002: 1746 / 3002 loss=2.812, ppl=7.02, wps=5965.6, ups=0.09, wpb=64855, bsz=128, num_updates=4716, lr=9.99703e-05, gnorm=2.337, loss_scale=4, train_wall=10, gb_free=2.8, wall=53647 2021-06-19 09:33:04 | INFO | train_inner | epoch 002: 1747 / 3002 loss=2.772, ppl=6.83, wps=5827.5, ups=0.09, wpb=64863, bsz=128, num_updates=4717, lr=9.99703e-05, gnorm=2.335, loss_scale=4, train_wall=11, gb_free=2.8, wall=53658 2021-06-19 09:33:15 | INFO | train_inner | epoch 002: 1748 / 3002 loss=2.71, ppl=6.54, wps=5892.3, ups=0.09, wpb=64814, bsz=128, num_updates=4718, lr=9.99703e-05, gnorm=2.266, loss_scale=4, train_wall=11, gb_free=2.8, wall=53669 2021-06-19 09:33:26 | INFO | train_inner | epoch 002: 1749 / 3002 loss=2.63, ppl=6.19, wps=5908.3, ups=0.09, wpb=64751, bsz=128, num_updates=4719, lr=9.99702e-05, gnorm=2.407, loss_scale=4, train_wall=11, gb_free=2.8, wall=53680 2021-06-19 09:33:37 | INFO | train_inner | epoch 002: 1750 / 3002 loss=2.854, ppl=7.23, wps=5791.1, ups=0.09, wpb=64807, bsz=128, num_updates=4720, lr=9.99702e-05, gnorm=2.279, loss_scale=4, train_wall=11, gb_free=2.8, wall=53691 2021-06-19 09:33:48 | INFO | train_inner | epoch 002: 1751 / 3002 loss=2.892, ppl=7.43, wps=5903.9, ups=0.09, wpb=64835, bsz=128, num_updates=4721, lr=9.99702e-05, gnorm=2.316, loss_scale=4, train_wall=11, gb_free=2.8, wall=53702 2021-06-19 09:33:59 | INFO | train_inner | epoch 002: 1752 / 3002 loss=2.763, ppl=6.79, wps=6049.4, ups=0.09, wpb=64892, bsz=128, num_updates=4722, lr=9.99702e-05, gnorm=2.296, loss_scale=4, train_wall=10, gb_free=2.8, wall=53713 2021-06-19 09:34:10 | INFO | train_inner | epoch 002: 1753 / 3002 loss=2.802, ppl=6.98, wps=5814.5, ups=0.09, wpb=64742, bsz=128, num_updates=4723, lr=9.99702e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=53724 2021-06-19 09:34:21 | INFO | train_inner | epoch 002: 1754 / 3002 loss=2.712, ppl=6.55, wps=5813.4, ups=0.09, wpb=64831, bsz=128, num_updates=4724, lr=9.99702e-05, gnorm=2.414, loss_scale=4, train_wall=11, gb_free=2.8, wall=53735 2021-06-19 09:34:32 | INFO | train_inner | epoch 002: 1755 / 3002 loss=2.761, ppl=6.78, wps=5746.3, ups=0.09, wpb=64706, bsz=128, num_updates=4725, lr=9.99702e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=53747 2021-06-19 09:34:43 | INFO | train_inner | epoch 002: 1756 / 3002 loss=2.615, ppl=6.13, wps=5851.7, ups=0.09, wpb=64832, bsz=128, num_updates=4726, lr=9.99702e-05, gnorm=2.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=53758 2021-06-19 09:34:55 | INFO | train_inner | epoch 002: 1757 / 3002 loss=2.862, ppl=7.27, wps=5853, ups=0.09, wpb=64802, bsz=128, num_updates=4727, lr=9.99702e-05, gnorm=2.339, loss_scale=4, train_wall=11, gb_free=2.8, wall=53769 2021-06-19 09:35:06 | INFO | train_inner | epoch 002: 1758 / 3002 loss=2.728, ppl=6.63, wps=5884.6, ups=0.09, wpb=64809, bsz=128, num_updates=4728, lr=9.99702e-05, gnorm=2.246, loss_scale=4, train_wall=11, gb_free=2.8, wall=53780 2021-06-19 09:35:17 | INFO | train_inner | epoch 002: 1759 / 3002 loss=2.661, ppl=6.33, wps=5745, ups=0.09, wpb=64774, bsz=128, num_updates=4729, lr=9.99702e-05, gnorm=2.146, loss_scale=4, train_wall=11, gb_free=2.8, wall=53791 2021-06-19 09:35:28 | INFO | train_inner | epoch 002: 1760 / 3002 loss=2.83, ppl=7.11, wps=5915.9, ups=0.09, wpb=64765, bsz=128, num_updates=4730, lr=9.99702e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=53802 2021-06-19 09:35:39 | INFO | train_inner | epoch 002: 1761 / 3002 loss=2.684, ppl=6.43, wps=5813.8, ups=0.09, wpb=64820, bsz=128, num_updates=4731, lr=9.99701e-05, gnorm=2.267, loss_scale=4, train_wall=11, gb_free=2.8, wall=53813 2021-06-19 09:35:50 | INFO | train_inner | epoch 002: 1762 / 3002 loss=2.685, ppl=6.43, wps=5928.6, ups=0.09, wpb=64811, bsz=128, num_updates=4732, lr=9.99701e-05, gnorm=2.201, loss_scale=4, train_wall=10, gb_free=2.8, wall=53824 2021-06-19 09:36:01 | INFO | train_inner | epoch 002: 1763 / 3002 loss=2.619, ppl=6.14, wps=5923.2, ups=0.09, wpb=64890, bsz=128, num_updates=4733, lr=9.99701e-05, gnorm=2.335, loss_scale=4, train_wall=10, gb_free=2.8, wall=53835 2021-06-19 09:36:12 | INFO | train_inner | epoch 002: 1764 / 3002 loss=2.786, ppl=6.9, wps=5900.3, ups=0.09, wpb=64781, bsz=128, num_updates=4734, lr=9.99701e-05, gnorm=2.899, loss_scale=4, train_wall=11, gb_free=2.8, wall=53846 2021-06-19 09:36:23 | INFO | train_inner | epoch 002: 1765 / 3002 loss=2.732, ppl=6.64, wps=5852.7, ups=0.09, wpb=64736, bsz=128, num_updates=4735, lr=9.99701e-05, gnorm=2.24, loss_scale=4, train_wall=11, gb_free=2.8, wall=53857 2021-06-19 09:36:34 | INFO | train_inner | epoch 002: 1766 / 3002 loss=2.7, ppl=6.5, wps=5812.2, ups=0.09, wpb=64782, bsz=128, num_updates=4736, lr=9.99701e-05, gnorm=2.416, loss_scale=4, train_wall=11, gb_free=2.8, wall=53868 2021-06-19 09:36:45 | INFO | train_inner | epoch 002: 1767 / 3002 loss=3.043, ppl=8.24, wps=5833.7, ups=0.09, wpb=64676, bsz=128, num_updates=4737, lr=9.99701e-05, gnorm=2.477, loss_scale=4, train_wall=11, gb_free=2.8, wall=53879 2021-06-19 09:36:56 | INFO | train_inner | epoch 002: 1768 / 3002 loss=2.931, ppl=7.62, wps=5928.7, ups=0.09, wpb=64819, bsz=128, num_updates=4738, lr=9.99701e-05, gnorm=2.293, loss_scale=4, train_wall=10, gb_free=2.8, wall=53890 2021-06-19 09:37:07 | INFO | train_inner | epoch 002: 1769 / 3002 loss=2.748, ppl=6.72, wps=5945.7, ups=0.09, wpb=64774, bsz=128, num_updates=4739, lr=9.99701e-05, gnorm=2.199, loss_scale=4, train_wall=10, gb_free=2.8, wall=53901 2021-06-19 09:37:18 | INFO | train_inner | epoch 002: 1770 / 3002 loss=2.668, ppl=6.36, wps=5834.1, ups=0.09, wpb=64947, bsz=128, num_updates=4740, lr=9.99701e-05, gnorm=2.346, loss_scale=4, train_wall=11, gb_free=2.8, wall=53912 2021-06-19 09:37:29 | INFO | train_inner | epoch 002: 1771 / 3002 loss=2.841, ppl=7.16, wps=5829.9, ups=0.09, wpb=64857, bsz=128, num_updates=4741, lr=9.99701e-05, gnorm=2.326, loss_scale=4, train_wall=11, gb_free=2.8, wall=53924 2021-06-19 09:37:40 | INFO | train_inner | epoch 002: 1772 / 3002 loss=2.633, ppl=6.2, wps=5834.4, ups=0.09, wpb=64797, bsz=128, num_updates=4742, lr=9.99701e-05, gnorm=2.304, loss_scale=4, train_wall=11, gb_free=2.8, wall=53935 2021-06-19 09:37:51 | INFO | train_inner | epoch 002: 1773 / 3002 loss=2.946, ppl=7.7, wps=5816.5, ups=0.09, wpb=64846, bsz=128, num_updates=4743, lr=9.99701e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=53946 2021-06-19 09:38:02 | INFO | train_inner | epoch 002: 1774 / 3002 loss=3.005, ppl=8.03, wps=5969, ups=0.09, wpb=64739, bsz=128, num_updates=4744, lr=9.997e-05, gnorm=2.246, loss_scale=4, train_wall=10, gb_free=2.8, wall=53957 2021-06-19 09:38:13 | INFO | train_inner | epoch 002: 1775 / 3002 loss=2.772, ppl=6.83, wps=5840.5, ups=0.09, wpb=64867, bsz=128, num_updates=4745, lr=9.997e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=53968 2021-06-19 09:38:25 | INFO | train_inner | epoch 002: 1776 / 3002 loss=2.758, ppl=6.77, wps=5842.3, ups=0.09, wpb=64855, bsz=128, num_updates=4746, lr=9.997e-05, gnorm=2.219, loss_scale=4, train_wall=11, gb_free=2.8, wall=53979 2021-06-19 09:38:36 | INFO | train_inner | epoch 002: 1777 / 3002 loss=2.73, ppl=6.63, wps=5876.6, ups=0.09, wpb=64918, bsz=128, num_updates=4747, lr=9.997e-05, gnorm=2.282, loss_scale=4, train_wall=11, gb_free=2.8, wall=53990 2021-06-19 09:38:46 | INFO | train_inner | epoch 002: 1778 / 3002 loss=2.851, ppl=7.22, wps=5922.6, ups=0.09, wpb=64845, bsz=128, num_updates=4748, lr=9.997e-05, gnorm=2.348, loss_scale=4, train_wall=11, gb_free=2.8, wall=54001 2021-06-19 09:38:58 | INFO | train_inner | epoch 002: 1779 / 3002 loss=2.746, ppl=6.71, wps=5781.7, ups=0.09, wpb=64804, bsz=128, num_updates=4749, lr=9.997e-05, gnorm=2.695, loss_scale=4, train_wall=11, gb_free=2.8, wall=54012 2021-06-19 09:39:09 | INFO | train_inner | epoch 002: 1780 / 3002 loss=2.771, ppl=6.83, wps=5824.4, ups=0.09, wpb=64791, bsz=128, num_updates=4750, lr=9.997e-05, gnorm=3.57, loss_scale=4, train_wall=11, gb_free=2.8, wall=54023 2021-06-19 09:39:20 | INFO | train_inner | epoch 002: 1781 / 3002 loss=2.714, ppl=6.56, wps=5751.1, ups=0.09, wpb=64833, bsz=128, num_updates=4751, lr=9.997e-05, gnorm=2.239, loss_scale=4, train_wall=11, gb_free=2.8, wall=54034 2021-06-19 09:39:31 | INFO | train_inner | epoch 002: 1782 / 3002 loss=2.75, ppl=6.73, wps=5753.6, ups=0.09, wpb=64923, bsz=128, num_updates=4752, lr=9.997e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=54046 2021-06-19 09:39:42 | INFO | train_inner | epoch 002: 1783 / 3002 loss=2.744, ppl=6.7, wps=5878.5, ups=0.09, wpb=64864, bsz=128, num_updates=4753, lr=9.997e-05, gnorm=2.275, loss_scale=4, train_wall=11, gb_free=2.8, wall=54057 2021-06-19 09:39:54 | INFO | train_inner | epoch 002: 1784 / 3002 loss=2.929, ppl=7.62, wps=5702.5, ups=0.09, wpb=64772, bsz=128, num_updates=4754, lr=9.997e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=54068 2021-06-19 09:40:05 | INFO | train_inner | epoch 002: 1785 / 3002 loss=2.733, ppl=6.65, wps=5744.3, ups=0.09, wpb=64752, bsz=128, num_updates=4755, lr=9.997e-05, gnorm=2.305, loss_scale=4, train_wall=11, gb_free=2.8, wall=54079 2021-06-19 09:40:16 | INFO | train_inner | epoch 002: 1786 / 3002 loss=2.754, ppl=6.75, wps=5805.1, ups=0.09, wpb=64805, bsz=128, num_updates=4756, lr=9.99699e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=54091 2021-06-19 09:40:28 | INFO | train_inner | epoch 002: 1787 / 3002 loss=2.888, ppl=7.4, wps=5715.4, ups=0.09, wpb=64825, bsz=128, num_updates=4757, lr=9.99699e-05, gnorm=2.355, loss_scale=4, train_wall=11, gb_free=2.8, wall=54102 2021-06-19 09:40:39 | INFO | train_inner | epoch 002: 1788 / 3002 loss=2.737, ppl=6.67, wps=5838.7, ups=0.09, wpb=64835, bsz=128, num_updates=4758, lr=9.99699e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=54113 2021-06-19 09:40:50 | INFO | train_inner | epoch 002: 1789 / 3002 loss=2.958, ppl=7.77, wps=5816.3, ups=0.09, wpb=64791, bsz=128, num_updates=4759, lr=9.99699e-05, gnorm=2.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=54124 2021-06-19 09:41:01 | INFO | train_inner | epoch 002: 1790 / 3002 loss=2.727, ppl=6.62, wps=5784.4, ups=0.09, wpb=64855, bsz=128, num_updates=4760, lr=9.99699e-05, gnorm=2.268, loss_scale=4, train_wall=11, gb_free=2.8, wall=54135 2021-06-19 09:41:12 | INFO | train_inner | epoch 002: 1791 / 3002 loss=2.701, ppl=6.5, wps=5772.7, ups=0.09, wpb=64732, bsz=128, num_updates=4761, lr=9.99699e-05, gnorm=2.309, loss_scale=4, train_wall=11, gb_free=2.8, wall=54147 2021-06-19 09:41:24 | INFO | train_inner | epoch 002: 1792 / 3002 loss=2.823, ppl=7.08, wps=5735.1, ups=0.09, wpb=64778, bsz=128, num_updates=4762, lr=9.99699e-05, gnorm=2.755, loss_scale=4, train_wall=11, gb_free=2.8, wall=54158 2021-06-19 09:41:35 | INFO | train_inner | epoch 002: 1793 / 3002 loss=2.667, ppl=6.35, wps=5847.8, ups=0.09, wpb=64802, bsz=128, num_updates=4763, lr=9.99699e-05, gnorm=2.582, loss_scale=4, train_wall=11, gb_free=2.8, wall=54169 2021-06-19 09:41:46 | INFO | train_inner | epoch 002: 1794 / 3002 loss=2.61, ppl=6.1, wps=5812.7, ups=0.09, wpb=64850, bsz=128, num_updates=4764, lr=9.99699e-05, gnorm=2.202, loss_scale=4, train_wall=11, gb_free=2.8, wall=54180 2021-06-19 09:41:57 | INFO | train_inner | epoch 002: 1795 / 3002 loss=2.722, ppl=6.6, wps=5777.1, ups=0.09, wpb=64796, bsz=128, num_updates=4765, lr=9.99699e-05, gnorm=2.422, loss_scale=4, train_wall=11, gb_free=2.8, wall=54191 2021-06-19 09:42:08 | INFO | train_inner | epoch 002: 1796 / 3002 loss=3.019, ppl=8.1, wps=5850.3, ups=0.09, wpb=64886, bsz=128, num_updates=4766, lr=9.99699e-05, gnorm=2.423, loss_scale=4, train_wall=11, gb_free=2.8, wall=54202 2021-06-19 09:42:19 | INFO | train_inner | epoch 002: 1797 / 3002 loss=2.741, ppl=6.69, wps=5874.6, ups=0.09, wpb=64818, bsz=128, num_updates=4767, lr=9.99699e-05, gnorm=4.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=54213 2021-06-19 09:42:30 | INFO | train_inner | epoch 002: 1798 / 3002 loss=2.68, ppl=6.41, wps=5930.7, ups=0.09, wpb=64825, bsz=128, num_updates=4768, lr=9.99699e-05, gnorm=2.614, loss_scale=4, train_wall=10, gb_free=2.8, wall=54224 2021-06-19 09:42:41 | INFO | train_inner | epoch 002: 1799 / 3002 loss=2.735, ppl=6.66, wps=5840, ups=0.09, wpb=64757, bsz=128, num_updates=4769, lr=9.99698e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=54235 2021-06-19 09:42:52 | INFO | train_inner | epoch 002: 1800 / 3002 loss=2.688, ppl=6.44, wps=5833.6, ups=0.09, wpb=64704, bsz=128, num_updates=4770, lr=9.99698e-05, gnorm=2.382, loss_scale=4, train_wall=11, gb_free=2.8, wall=54247 2021-06-19 09:43:03 | INFO | train_inner | epoch 002: 1801 / 3002 loss=2.867, ppl=7.29, wps=5934.1, ups=0.09, wpb=64738, bsz=128, num_updates=4771, lr=9.99698e-05, gnorm=2.825, loss_scale=4, train_wall=10, gb_free=2.8, wall=54257 2021-06-19 09:43:14 | INFO | train_inner | epoch 002: 1802 / 3002 loss=2.79, ppl=6.92, wps=5808.3, ups=0.09, wpb=64787, bsz=128, num_updates=4772, lr=9.99698e-05, gnorm=2.354, loss_scale=4, train_wall=11, gb_free=2.8, wall=54269 2021-06-19 09:43:25 | INFO | train_inner | epoch 002: 1803 / 3002 loss=2.696, ppl=6.48, wps=5821.3, ups=0.09, wpb=64850, bsz=128, num_updates=4773, lr=9.99698e-05, gnorm=2.489, loss_scale=4, train_wall=11, gb_free=2.8, wall=54280 2021-06-19 09:43:37 | INFO | train_inner | epoch 002: 1804 / 3002 loss=2.796, ppl=6.94, wps=5781.9, ups=0.09, wpb=64824, bsz=128, num_updates=4774, lr=9.99698e-05, gnorm=5.679, loss_scale=4, train_wall=11, gb_free=2.8, wall=54291 2021-06-19 09:43:48 | INFO | train_inner | epoch 002: 1805 / 3002 loss=2.719, ppl=6.59, wps=5770.7, ups=0.09, wpb=64800, bsz=128, num_updates=4775, lr=9.99698e-05, gnorm=6.844, loss_scale=4, train_wall=11, gb_free=2.8, wall=54302 2021-06-19 09:43:59 | INFO | train_inner | epoch 002: 1806 / 3002 loss=2.692, ppl=6.46, wps=5878, ups=0.09, wpb=64813, bsz=128, num_updates=4776, lr=9.99698e-05, gnorm=2.462, loss_scale=4, train_wall=11, gb_free=2.8, wall=54313 2021-06-19 09:44:10 | INFO | train_inner | epoch 002: 1807 / 3002 loss=2.865, ppl=7.29, wps=5800.1, ups=0.09, wpb=64771, bsz=128, num_updates=4777, lr=9.99698e-05, gnorm=2.39, loss_scale=4, train_wall=11, gb_free=2.8, wall=54324 2021-06-19 09:44:21 | INFO | train_inner | epoch 002: 1808 / 3002 loss=2.713, ppl=6.56, wps=5850.8, ups=0.09, wpb=64869, bsz=128, num_updates=4778, lr=9.99698e-05, gnorm=2.766, loss_scale=4, train_wall=11, gb_free=2.8, wall=54335 2021-06-19 09:44:32 | INFO | train_inner | epoch 002: 1809 / 3002 loss=2.917, ppl=7.55, wps=5912.2, ups=0.09, wpb=64744, bsz=128, num_updates=4779, lr=9.99698e-05, gnorm=2.31, loss_scale=4, train_wall=11, gb_free=2.8, wall=54346 2021-06-19 09:44:43 | INFO | train_inner | epoch 002: 1810 / 3002 loss=2.927, ppl=7.6, wps=5872.9, ups=0.09, wpb=64833, bsz=128, num_updates=4780, lr=9.99698e-05, gnorm=2.577, loss_scale=4, train_wall=11, gb_free=2.8, wall=54357 2021-06-19 09:44:54 | INFO | train_inner | epoch 002: 1811 / 3002 loss=2.562, ppl=5.9, wps=5847.3, ups=0.09, wpb=64889, bsz=128, num_updates=4781, lr=9.99697e-05, gnorm=2.387, loss_scale=4, train_wall=11, gb_free=2.8, wall=54369 2021-06-19 09:45:05 | INFO | train_inner | epoch 002: 1812 / 3002 loss=2.871, ppl=7.31, wps=5761, ups=0.09, wpb=64773, bsz=128, num_updates=4782, lr=9.99697e-05, gnorm=5.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=54380 2021-06-19 09:45:17 | INFO | train_inner | epoch 002: 1813 / 3002 loss=2.74, ppl=6.68, wps=5832.6, ups=0.09, wpb=64750, bsz=128, num_updates=4783, lr=9.99697e-05, gnorm=2.374, loss_scale=4, train_wall=11, gb_free=2.8, wall=54391 2021-06-19 09:45:28 | INFO | train_inner | epoch 002: 1814 / 3002 loss=2.823, ppl=7.08, wps=5756.1, ups=0.09, wpb=64804, bsz=128, num_updates=4784, lr=9.99697e-05, gnorm=2.37, loss_scale=4, train_wall=11, gb_free=2.8, wall=54402 2021-06-19 09:45:39 | INFO | train_inner | epoch 002: 1815 / 3002 loss=2.719, ppl=6.59, wps=5847.8, ups=0.09, wpb=64856, bsz=128, num_updates=4785, lr=9.99697e-05, gnorm=2.482, loss_scale=4, train_wall=11, gb_free=2.8, wall=54413 2021-06-19 09:45:50 | INFO | train_inner | epoch 002: 1816 / 3002 loss=2.986, ppl=7.92, wps=5880.4, ups=0.09, wpb=64816, bsz=128, num_updates=4786, lr=9.99697e-05, gnorm=2.608, loss_scale=4, train_wall=11, gb_free=2.8, wall=54424 2021-06-19 09:46:01 | INFO | train_inner | epoch 002: 1817 / 3002 loss=2.918, ppl=7.56, wps=6030.4, ups=0.09, wpb=64805, bsz=128, num_updates=4787, lr=9.99697e-05, gnorm=2.593, loss_scale=4, train_wall=10, gb_free=2.8, wall=54435 2021-06-19 09:46:12 | INFO | train_inner | epoch 002: 1818 / 3002 loss=2.714, ppl=6.56, wps=5843.3, ups=0.09, wpb=64830, bsz=128, num_updates=4788, lr=9.99697e-05, gnorm=3.67, loss_scale=4, train_wall=11, gb_free=2.8, wall=54446 2021-06-19 09:46:23 | INFO | train_inner | epoch 002: 1819 / 3002 loss=2.828, ppl=7.1, wps=5871.7, ups=0.09, wpb=64874, bsz=128, num_updates=4789, lr=9.99697e-05, gnorm=2.451, loss_scale=4, train_wall=11, gb_free=2.8, wall=54457 2021-06-19 09:46:34 | INFO | train_inner | epoch 002: 1820 / 3002 loss=2.64, ppl=6.23, wps=5827.4, ups=0.09, wpb=64825, bsz=128, num_updates=4790, lr=9.99697e-05, gnorm=2.587, loss_scale=4, train_wall=11, gb_free=2.8, wall=54468 2021-06-19 09:46:45 | INFO | train_inner | epoch 002: 1821 / 3002 loss=2.72, ppl=6.59, wps=5896, ups=0.09, wpb=64814, bsz=128, num_updates=4791, lr=9.99697e-05, gnorm=3.029, loss_scale=4, train_wall=11, gb_free=2.8, wall=54479 2021-06-19 09:46:56 | INFO | train_inner | epoch 002: 1822 / 3002 loss=2.759, ppl=6.77, wps=5974.4, ups=0.09, wpb=64769, bsz=128, num_updates=4792, lr=9.99697e-05, gnorm=2.745, loss_scale=4, train_wall=10, gb_free=2.8, wall=54490 2021-06-19 09:47:07 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-19 09:47:18 | INFO | train_inner | epoch 002: 1824 / 3002 loss=2.858, ppl=7.25, wps=2937.8, ups=0.05, wpb=64830, bsz=128, num_updates=4793, lr=9.99697e-05, gnorm=2.443, loss_scale=2, train_wall=21, gb_free=2.8, wall=54512 2021-06-19 09:47:29 | INFO | train_inner | epoch 002: 1825 / 3002 loss=2.893, ppl=7.43, wps=5848.9, ups=0.09, wpb=64866, bsz=128, num_updates=4794, lr=9.99696e-05, gnorm=2.429, loss_scale=2, train_wall=11, gb_free=2.8, wall=54523 2021-06-19 09:47:40 | INFO | train_inner | epoch 002: 1826 / 3002 loss=2.709, ppl=6.54, wps=5769.5, ups=0.09, wpb=64812, bsz=128, num_updates=4795, lr=9.99696e-05, gnorm=2.36, loss_scale=2, train_wall=11, gb_free=2.8, wall=54535 2021-06-19 09:47:51 | INFO | train_inner | epoch 002: 1827 / 3002 loss=2.682, ppl=6.42, wps=5942.8, ups=0.09, wpb=64850, bsz=128, num_updates=4796, lr=9.99696e-05, gnorm=2.431, loss_scale=2, train_wall=10, gb_free=2.8, wall=54545 2021-06-19 09:48:02 | INFO | train_inner | epoch 002: 1828 / 3002 loss=2.596, ppl=6.04, wps=5941.3, ups=0.09, wpb=64844, bsz=128, num_updates=4797, lr=9.99696e-05, gnorm=2.362, loss_scale=2, train_wall=10, gb_free=2.8, wall=54556 2021-06-19 09:48:13 | INFO | train_inner | epoch 002: 1829 / 3002 loss=2.752, ppl=6.73, wps=5828.8, ups=0.09, wpb=64894, bsz=128, num_updates=4798, lr=9.99696e-05, gnorm=2.313, loss_scale=2, train_wall=11, gb_free=2.8, wall=54567 2021-06-19 09:48:24 | INFO | train_inner | epoch 002: 1830 / 3002 loss=2.826, ppl=7.09, wps=5861.8, ups=0.09, wpb=64749, bsz=128, num_updates=4799, lr=9.99696e-05, gnorm=2.92, loss_scale=2, train_wall=11, gb_free=2.8, wall=54579 2021-06-19 09:48:35 | INFO | train_inner | epoch 002: 1831 / 3002 loss=2.905, ppl=7.49, wps=5999.5, ups=0.09, wpb=64810, bsz=128, num_updates=4800, lr=9.99696e-05, gnorm=2.432, loss_scale=2, train_wall=10, gb_free=2.8, wall=54589 2021-06-19 09:48:46 | INFO | train_inner | epoch 002: 1832 / 3002 loss=2.648, ppl=6.27, wps=5938.2, ups=0.09, wpb=64957, bsz=128, num_updates=4801, lr=9.99696e-05, gnorm=2.609, loss_scale=2, train_wall=10, gb_free=2.8, wall=54600 2021-06-19 09:48:57 | INFO | train_inner | epoch 002: 1833 / 3002 loss=2.815, ppl=7.04, wps=5788.7, ups=0.09, wpb=64842, bsz=128, num_updates=4802, lr=9.99696e-05, gnorm=3.829, loss_scale=2, train_wall=11, gb_free=2.8, wall=54611 2021-06-19 09:49:08 | INFO | train_inner | epoch 002: 1834 / 3002 loss=2.888, ppl=7.4, wps=5965, ups=0.09, wpb=64779, bsz=128, num_updates=4803, lr=9.99696e-05, gnorm=2.297, loss_scale=2, train_wall=10, gb_free=2.8, wall=54622 2021-06-19 09:49:19 | INFO | train_inner | epoch 002: 1835 / 3002 loss=2.856, ppl=7.24, wps=5719, ups=0.09, wpb=64811, bsz=128, num_updates=4804, lr=9.99696e-05, gnorm=2.971, loss_scale=2, train_wall=11, gb_free=2.8, wall=54634 2021-06-19 09:49:30 | INFO | train_inner | epoch 002: 1836 / 3002 loss=2.769, ppl=6.82, wps=5821.7, ups=0.09, wpb=64743, bsz=128, num_updates=4805, lr=9.99696e-05, gnorm=2.698, loss_scale=2, train_wall=11, gb_free=2.8, wall=54645 2021-06-19 09:49:41 | INFO | train_inner | epoch 002: 1837 / 3002 loss=2.61, ppl=6.1, wps=5914.2, ups=0.09, wpb=64871, bsz=128, num_updates=4806, lr=9.99695e-05, gnorm=3.172, loss_scale=2, train_wall=11, gb_free=2.8, wall=54656 2021-06-19 09:49:53 | INFO | train_inner | epoch 002: 1838 / 3002 loss=2.806, ppl=7, wps=5833.1, ups=0.09, wpb=64889, bsz=128, num_updates=4807, lr=9.99695e-05, gnorm=2.341, loss_scale=2, train_wall=11, gb_free=2.8, wall=54667 2021-06-19 09:50:04 | INFO | train_inner | epoch 002: 1839 / 3002 loss=2.576, ppl=5.96, wps=5829, ups=0.09, wpb=64891, bsz=128, num_updates=4808, lr=9.99695e-05, gnorm=2.179, loss_scale=2, train_wall=11, gb_free=2.8, wall=54678 2021-06-19 09:50:15 | INFO | train_inner | epoch 002: 1840 / 3002 loss=2.784, ppl=6.89, wps=5786.8, ups=0.09, wpb=64773, bsz=128, num_updates=4809, lr=9.99695e-05, gnorm=2.284, loss_scale=2, train_wall=11, gb_free=2.8, wall=54689 2021-06-19 09:50:26 | INFO | train_inner | epoch 002: 1841 / 3002 loss=2.927, ppl=7.6, wps=5773, ups=0.09, wpb=64785, bsz=128, num_updates=4810, lr=9.99695e-05, gnorm=2.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=54700 2021-06-19 09:50:37 | INFO | train_inner | epoch 002: 1842 / 3002 loss=2.705, ppl=6.52, wps=5810.8, ups=0.09, wpb=64842, bsz=128, num_updates=4811, lr=9.99695e-05, gnorm=2.568, loss_scale=2, train_wall=11, gb_free=2.8, wall=54712 2021-06-19 09:50:48 | INFO | train_inner | epoch 002: 1843 / 3002 loss=2.748, ppl=6.72, wps=5918.6, ups=0.09, wpb=64810, bsz=128, num_updates=4812, lr=9.99695e-05, gnorm=2.454, loss_scale=2, train_wall=10, gb_free=2.8, wall=54723 2021-06-19 09:50:59 | INFO | train_inner | epoch 002: 1844 / 3002 loss=2.895, ppl=7.44, wps=5931, ups=0.09, wpb=64820, bsz=128, num_updates=4813, lr=9.99695e-05, gnorm=2.541, loss_scale=2, train_wall=10, gb_free=2.8, wall=54733 2021-06-19 09:51:10 | INFO | train_inner | epoch 002: 1845 / 3002 loss=2.696, ppl=6.48, wps=5918.9, ups=0.09, wpb=64933, bsz=128, num_updates=4814, lr=9.99695e-05, gnorm=2.405, loss_scale=2, train_wall=10, gb_free=2.8, wall=54744 2021-06-19 09:51:21 | INFO | train_inner | epoch 002: 1846 / 3002 loss=2.712, ppl=6.55, wps=5801.7, ups=0.09, wpb=64904, bsz=128, num_updates=4815, lr=9.99695e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=54756 2021-06-19 09:51:32 | INFO | train_inner | epoch 002: 1847 / 3002 loss=2.908, ppl=7.5, wps=5896.3, ups=0.09, wpb=64777, bsz=128, num_updates=4816, lr=9.99695e-05, gnorm=2.241, loss_scale=2, train_wall=11, gb_free=2.8, wall=54767 2021-06-19 09:51:43 | INFO | train_inner | epoch 002: 1848 / 3002 loss=2.914, ppl=7.54, wps=5860.6, ups=0.09, wpb=64781, bsz=128, num_updates=4817, lr=9.99695e-05, gnorm=2.251, loss_scale=2, train_wall=11, gb_free=2.8, wall=54778 2021-06-19 09:51:55 | INFO | train_inner | epoch 002: 1849 / 3002 loss=2.83, ppl=7.11, wps=5765.9, ups=0.09, wpb=64899, bsz=128, num_updates=4818, lr=9.99695e-05, gnorm=2.156, loss_scale=2, train_wall=11, gb_free=2.8, wall=54789 2021-06-19 09:52:06 | INFO | train_inner | epoch 002: 1850 / 3002 loss=2.822, ppl=7.07, wps=5735.9, ups=0.09, wpb=64811, bsz=128, num_updates=4819, lr=9.99694e-05, gnorm=2.264, loss_scale=2, train_wall=11, gb_free=2.8, wall=54800 2021-06-19 09:52:17 | INFO | train_inner | epoch 002: 1851 / 3002 loss=2.809, ppl=7.01, wps=5836.4, ups=0.09, wpb=64886, bsz=128, num_updates=4820, lr=9.99694e-05, gnorm=2.263, loss_scale=2, train_wall=11, gb_free=2.8, wall=54811 2021-06-19 09:52:28 | INFO | train_inner | epoch 002: 1852 / 3002 loss=2.942, ppl=7.68, wps=5793.9, ups=0.09, wpb=64865, bsz=128, num_updates=4821, lr=9.99694e-05, gnorm=2.381, loss_scale=2, train_wall=11, gb_free=2.8, wall=54823 2021-06-19 09:52:39 | INFO | train_inner | epoch 002: 1853 / 3002 loss=2.903, ppl=7.48, wps=5863.6, ups=0.09, wpb=64836, bsz=128, num_updates=4822, lr=9.99694e-05, gnorm=2.816, loss_scale=2, train_wall=11, gb_free=2.8, wall=54834 2021-06-19 09:52:50 | INFO | train_inner | epoch 002: 1854 / 3002 loss=2.694, ppl=6.47, wps=5798.6, ups=0.09, wpb=64887, bsz=128, num_updates=4823, lr=9.99694e-05, gnorm=2.486, loss_scale=2, train_wall=11, gb_free=2.8, wall=54845 2021-06-19 09:53:01 | INFO | train_inner | epoch 002: 1855 / 3002 loss=2.773, ppl=6.83, wps=6018.1, ups=0.09, wpb=64799, bsz=128, num_updates=4824, lr=9.99694e-05, gnorm=2.316, loss_scale=2, train_wall=10, gb_free=2.8, wall=54856 2021-06-19 09:53:12 | INFO | train_inner | epoch 002: 1856 / 3002 loss=2.697, ppl=6.48, wps=5881.6, ups=0.09, wpb=64797, bsz=128, num_updates=4825, lr=9.99694e-05, gnorm=2.301, loss_scale=2, train_wall=11, gb_free=2.8, wall=54867 2021-06-19 09:53:23 | INFO | train_inner | epoch 002: 1857 / 3002 loss=2.819, ppl=7.06, wps=5799.1, ups=0.09, wpb=64731, bsz=128, num_updates=4826, lr=9.99694e-05, gnorm=2.229, loss_scale=2, train_wall=11, gb_free=2.8, wall=54878 2021-06-19 09:53:34 | INFO | train_inner | epoch 002: 1858 / 3002 loss=2.829, ppl=7.11, wps=5952.2, ups=0.09, wpb=64848, bsz=128, num_updates=4827, lr=9.99694e-05, gnorm=2.703, loss_scale=2, train_wall=10, gb_free=2.8, wall=54889 2021-06-19 09:53:46 | INFO | train_inner | epoch 002: 1859 / 3002 loss=2.88, ppl=7.36, wps=5720.5, ups=0.09, wpb=64833, bsz=128, num_updates=4828, lr=9.99694e-05, gnorm=2.744, loss_scale=2, train_wall=11, gb_free=2.8, wall=54900 2021-06-19 09:53:57 | INFO | train_inner | epoch 002: 1860 / 3002 loss=2.783, ppl=6.88, wps=5963.1, ups=0.09, wpb=64867, bsz=128, num_updates=4829, lr=9.99694e-05, gnorm=2.313, loss_scale=2, train_wall=10, gb_free=2.8, wall=54911 2021-06-19 09:54:08 | INFO | train_inner | epoch 002: 1861 / 3002 loss=2.694, ppl=6.47, wps=5898.7, ups=0.09, wpb=64860, bsz=128, num_updates=4830, lr=9.99694e-05, gnorm=2.289, loss_scale=2, train_wall=11, gb_free=2.8, wall=54922 2021-06-19 09:54:19 | INFO | train_inner | epoch 002: 1862 / 3002 loss=2.808, ppl=7.01, wps=5803.9, ups=0.09, wpb=64778, bsz=128, num_updates=4831, lr=9.99693e-05, gnorm=2.41, loss_scale=2, train_wall=11, gb_free=2.8, wall=54933 2021-06-19 09:54:30 | INFO | train_inner | epoch 002: 1863 / 3002 loss=2.717, ppl=6.58, wps=5913.6, ups=0.09, wpb=64887, bsz=128, num_updates=4832, lr=9.99693e-05, gnorm=2.221, loss_scale=2, train_wall=11, gb_free=2.8, wall=54944 2021-06-19 09:54:41 | INFO | train_inner | epoch 002: 1864 / 3002 loss=2.914, ppl=7.54, wps=5973.6, ups=0.09, wpb=64899, bsz=128, num_updates=4833, lr=9.99693e-05, gnorm=2.274, loss_scale=2, train_wall=10, gb_free=2.8, wall=54955 2021-06-19 09:54:52 | INFO | train_inner | epoch 002: 1865 / 3002 loss=2.685, ppl=6.43, wps=5815.2, ups=0.09, wpb=64861, bsz=128, num_updates=4834, lr=9.99693e-05, gnorm=2.176, loss_scale=2, train_wall=11, gb_free=2.8, wall=54966 2021-06-19 09:55:03 | INFO | train_inner | epoch 002: 1866 / 3002 loss=2.772, ppl=6.83, wps=5796.2, ups=0.09, wpb=64793, bsz=128, num_updates=4835, lr=9.99693e-05, gnorm=2.255, loss_scale=2, train_wall=11, gb_free=2.8, wall=54977 2021-06-19 09:55:14 | INFO | train_inner | epoch 002: 1867 / 3002 loss=2.865, ppl=7.28, wps=5822, ups=0.09, wpb=64784, bsz=128, num_updates=4836, lr=9.99693e-05, gnorm=2.372, loss_scale=2, train_wall=11, gb_free=2.8, wall=54988 2021-06-19 09:55:25 | INFO | train_inner | epoch 002: 1868 / 3002 loss=2.717, ppl=6.58, wps=5898.8, ups=0.09, wpb=64850, bsz=128, num_updates=4837, lr=9.99693e-05, gnorm=2.199, loss_scale=2, train_wall=11, gb_free=2.8, wall=54999 2021-06-19 09:55:36 | INFO | train_inner | epoch 002: 1869 / 3002 loss=2.649, ppl=6.27, wps=5781.5, ups=0.09, wpb=64791, bsz=128, num_updates=4838, lr=9.99693e-05, gnorm=2.288, loss_scale=2, train_wall=11, gb_free=2.8, wall=55011 2021-06-19 09:55:47 | INFO | train_inner | epoch 002: 1870 / 3002 loss=2.671, ppl=6.37, wps=5842.6, ups=0.09, wpb=64761, bsz=128, num_updates=4839, lr=9.99693e-05, gnorm=2.183, loss_scale=2, train_wall=11, gb_free=2.8, wall=55022 2021-06-19 09:55:58 | INFO | train_inner | epoch 002: 1871 / 3002 loss=2.713, ppl=6.56, wps=5920.7, ups=0.09, wpb=64802, bsz=128, num_updates=4840, lr=9.99693e-05, gnorm=2.151, loss_scale=2, train_wall=10, gb_free=2.8, wall=55033 2021-06-19 09:56:09 | INFO | train_inner | epoch 002: 1872 / 3002 loss=2.68, ppl=6.41, wps=5931, ups=0.09, wpb=64803, bsz=128, num_updates=4841, lr=9.99693e-05, gnorm=2.381, loss_scale=2, train_wall=10, gb_free=2.8, wall=55043 2021-06-19 09:56:20 | INFO | train_inner | epoch 002: 1873 / 3002 loss=2.796, ppl=6.95, wps=5959.9, ups=0.09, wpb=64801, bsz=128, num_updates=4842, lr=9.99693e-05, gnorm=2.347, loss_scale=2, train_wall=10, gb_free=2.8, wall=55054 2021-06-19 09:56:31 | INFO | train_inner | epoch 002: 1874 / 3002 loss=2.715, ppl=6.56, wps=5909.9, ups=0.09, wpb=64846, bsz=128, num_updates=4843, lr=9.99693e-05, gnorm=2.265, loss_scale=2, train_wall=11, gb_free=2.8, wall=55065 2021-06-19 09:56:42 | INFO | train_inner | epoch 002: 1875 / 3002 loss=2.693, ppl=6.47, wps=5882.2, ups=0.09, wpb=64855, bsz=128, num_updates=4844, lr=9.99692e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=55076 2021-06-19 09:56:53 | INFO | train_inner | epoch 002: 1876 / 3002 loss=2.782, ppl=6.88, wps=5837.3, ups=0.09, wpb=64752, bsz=128, num_updates=4845, lr=9.99692e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=55087 2021-06-19 09:57:04 | INFO | train_inner | epoch 002: 1877 / 3002 loss=2.684, ppl=6.43, wps=5916.1, ups=0.09, wpb=64853, bsz=128, num_updates=4846, lr=9.99692e-05, gnorm=2.221, loss_scale=2, train_wall=10, gb_free=2.8, wall=55098 2021-06-19 09:57:15 | INFO | train_inner | epoch 002: 1878 / 3002 loss=2.842, ppl=7.17, wps=5821.7, ups=0.09, wpb=64796, bsz=128, num_updates=4847, lr=9.99692e-05, gnorm=2.163, loss_scale=2, train_wall=11, gb_free=2.8, wall=55110 2021-06-19 09:57:26 | INFO | train_inner | epoch 002: 1879 / 3002 loss=2.661, ppl=6.33, wps=5950.5, ups=0.09, wpb=64928, bsz=128, num_updates=4848, lr=9.99692e-05, gnorm=2.298, loss_scale=2, train_wall=10, gb_free=2.8, wall=55120 2021-06-19 09:57:37 | INFO | train_inner | epoch 002: 1880 / 3002 loss=2.674, ppl=6.38, wps=5842.5, ups=0.09, wpb=64871, bsz=128, num_updates=4849, lr=9.99692e-05, gnorm=2.251, loss_scale=2, train_wall=11, gb_free=2.8, wall=55132 2021-06-19 09:57:48 | INFO | train_inner | epoch 002: 1881 / 3002 loss=2.833, ppl=7.13, wps=5761, ups=0.09, wpb=64854, bsz=128, num_updates=4850, lr=9.99692e-05, gnorm=2.332, loss_scale=2, train_wall=11, gb_free=2.8, wall=55143 2021-06-19 09:58:00 | INFO | train_inner | epoch 002: 1882 / 3002 loss=2.633, ppl=6.2, wps=5847.8, ups=0.09, wpb=64802, bsz=128, num_updates=4851, lr=9.99692e-05, gnorm=2.274, loss_scale=2, train_wall=11, gb_free=2.8, wall=55154 2021-06-19 09:58:11 | INFO | train_inner | epoch 002: 1883 / 3002 loss=2.656, ppl=6.3, wps=5912.8, ups=0.09, wpb=64837, bsz=128, num_updates=4852, lr=9.99692e-05, gnorm=2.198, loss_scale=2, train_wall=11, gb_free=2.8, wall=55165 2021-06-19 09:58:22 | INFO | train_inner | epoch 002: 1884 / 3002 loss=2.925, ppl=7.59, wps=5794.1, ups=0.09, wpb=64829, bsz=128, num_updates=4853, lr=9.99692e-05, gnorm=2.463, loss_scale=2, train_wall=11, gb_free=2.8, wall=55176 2021-06-19 09:58:33 | INFO | train_inner | epoch 002: 1885 / 3002 loss=2.879, ppl=7.36, wps=5804.5, ups=0.09, wpb=64749, bsz=128, num_updates=4854, lr=9.99692e-05, gnorm=2.369, loss_scale=2, train_wall=11, gb_free=2.8, wall=55187 2021-06-19 09:58:44 | INFO | train_inner | epoch 002: 1886 / 3002 loss=2.625, ppl=6.17, wps=5827.6, ups=0.09, wpb=64882, bsz=128, num_updates=4855, lr=9.99692e-05, gnorm=2.598, loss_scale=2, train_wall=11, gb_free=2.8, wall=55198 2021-06-19 09:58:55 | INFO | train_inner | epoch 002: 1887 / 3002 loss=2.939, ppl=7.67, wps=5884.8, ups=0.09, wpb=64842, bsz=128, num_updates=4856, lr=9.99691e-05, gnorm=2.291, loss_scale=2, train_wall=11, gb_free=2.8, wall=55209 2021-06-19 09:59:06 | INFO | train_inner | epoch 002: 1888 / 3002 loss=2.799, ppl=6.96, wps=5864.2, ups=0.09, wpb=64859, bsz=128, num_updates=4857, lr=9.99691e-05, gnorm=2.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=55220 2021-06-19 09:59:17 | INFO | train_inner | epoch 002: 1889 / 3002 loss=2.852, ppl=7.22, wps=5818.1, ups=0.09, wpb=64782, bsz=128, num_updates=4858, lr=9.99691e-05, gnorm=2.345, loss_scale=2, train_wall=11, gb_free=2.8, wall=55232 2021-06-19 09:59:28 | INFO | train_inner | epoch 002: 1890 / 3002 loss=2.846, ppl=7.19, wps=5885.7, ups=0.09, wpb=64828, bsz=128, num_updates=4859, lr=9.99691e-05, gnorm=2.571, loss_scale=2, train_wall=11, gb_free=2.8, wall=55243 2021-06-19 09:59:39 | INFO | train_inner | epoch 002: 1891 / 3002 loss=2.82, ppl=7.06, wps=5940.1, ups=0.09, wpb=64718, bsz=128, num_updates=4860, lr=9.99691e-05, gnorm=2.214, loss_scale=2, train_wall=10, gb_free=2.8, wall=55253 2021-06-19 09:59:50 | INFO | train_inner | epoch 002: 1892 / 3002 loss=2.668, ppl=6.36, wps=5981.4, ups=0.09, wpb=64913, bsz=128, num_updates=4861, lr=9.99691e-05, gnorm=2.722, loss_scale=2, train_wall=10, gb_free=2.8, wall=55264 2021-06-19 10:00:01 | INFO | train_inner | epoch 002: 1893 / 3002 loss=2.679, ppl=6.41, wps=5810, ups=0.09, wpb=64819, bsz=128, num_updates=4862, lr=9.99691e-05, gnorm=2.314, loss_scale=2, train_wall=11, gb_free=2.8, wall=55275 2021-06-19 10:00:12 | INFO | train_inner | epoch 002: 1894 / 3002 loss=2.802, ppl=6.98, wps=5812.1, ups=0.09, wpb=64759, bsz=128, num_updates=4863, lr=9.99691e-05, gnorm=2.321, loss_scale=2, train_wall=11, gb_free=2.8, wall=55287 2021-06-19 10:00:24 | INFO | train_inner | epoch 002: 1895 / 3002 loss=2.798, ppl=6.96, wps=5714.9, ups=0.09, wpb=64713, bsz=128, num_updates=4864, lr=9.99691e-05, gnorm=2.197, loss_scale=2, train_wall=11, gb_free=2.8, wall=55298 2021-06-19 10:00:35 | INFO | train_inner | epoch 002: 1896 / 3002 loss=2.783, ppl=6.88, wps=5931.6, ups=0.09, wpb=64833, bsz=128, num_updates=4865, lr=9.99691e-05, gnorm=2.233, loss_scale=2, train_wall=10, gb_free=2.8, wall=55309 2021-06-19 10:00:46 | INFO | train_inner | epoch 002: 1897 / 3002 loss=2.641, ppl=6.24, wps=5888.9, ups=0.09, wpb=64939, bsz=128, num_updates=4866, lr=9.99691e-05, gnorm=2.366, loss_scale=2, train_wall=11, gb_free=2.8, wall=55320 2021-06-19 10:00:57 | INFO | train_inner | epoch 002: 1898 / 3002 loss=2.756, ppl=6.75, wps=5813.2, ups=0.09, wpb=64842, bsz=128, num_updates=4867, lr=9.99691e-05, gnorm=2.272, loss_scale=2, train_wall=11, gb_free=2.8, wall=55331 2021-06-19 10:01:08 | INFO | train_inner | epoch 002: 1899 / 3002 loss=2.917, ppl=7.56, wps=5992, ups=0.09, wpb=64759, bsz=128, num_updates=4868, lr=9.99691e-05, gnorm=2.307, loss_scale=2, train_wall=10, gb_free=2.8, wall=55342 2021-06-19 10:01:18 | INFO | train_inner | epoch 002: 1900 / 3002 loss=2.664, ppl=6.34, wps=6007.3, ups=0.09, wpb=64839, bsz=128, num_updates=4869, lr=9.9969e-05, gnorm=2.474, loss_scale=2, train_wall=10, gb_free=2.8, wall=55353 2021-06-19 10:01:29 | INFO | train_inner | epoch 002: 1901 / 3002 loss=2.712, ppl=6.55, wps=5908.1, ups=0.09, wpb=64862, bsz=128, num_updates=4870, lr=9.9969e-05, gnorm=2.284, loss_scale=2, train_wall=11, gb_free=2.8, wall=55364 2021-06-19 10:01:40 | INFO | train_inner | epoch 002: 1902 / 3002 loss=2.745, ppl=6.71, wps=5844.1, ups=0.09, wpb=64890, bsz=128, num_updates=4871, lr=9.9969e-05, gnorm=2.309, loss_scale=2, train_wall=11, gb_free=2.8, wall=55375 2021-06-19 10:01:51 | INFO | train_inner | epoch 002: 1903 / 3002 loss=2.795, ppl=6.94, wps=5891.8, ups=0.09, wpb=64809, bsz=128, num_updates=4872, lr=9.9969e-05, gnorm=2.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=55386 2021-06-19 10:02:02 | INFO | train_inner | epoch 002: 1904 / 3002 loss=2.633, ppl=6.2, wps=5918.3, ups=0.09, wpb=64829, bsz=128, num_updates=4873, lr=9.9969e-05, gnorm=2.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=55397 2021-06-19 10:02:13 | INFO | train_inner | epoch 002: 1905 / 3002 loss=2.762, ppl=6.79, wps=5939.3, ups=0.09, wpb=64822, bsz=128, num_updates=4874, lr=9.9969e-05, gnorm=2.282, loss_scale=2, train_wall=10, gb_free=2.8, wall=55408 2021-06-19 10:02:24 | INFO | train_inner | epoch 002: 1906 / 3002 loss=2.903, ppl=7.48, wps=5790.6, ups=0.09, wpb=64699, bsz=128, num_updates=4875, lr=9.9969e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=55419 2021-06-19 10:02:36 | INFO | train_inner | epoch 002: 1907 / 3002 loss=2.83, ppl=7.11, wps=5733.2, ups=0.09, wpb=64810, bsz=128, num_updates=4876, lr=9.9969e-05, gnorm=3.063, loss_scale=2, train_wall=11, gb_free=2.8, wall=55430 2021-06-19 10:02:47 | INFO | train_inner | epoch 002: 1908 / 3002 loss=2.762, ppl=6.78, wps=5871.6, ups=0.09, wpb=64874, bsz=128, num_updates=4877, lr=9.9969e-05, gnorm=2.251, loss_scale=2, train_wall=11, gb_free=2.8, wall=55441 2021-06-19 10:02:58 | INFO | train_inner | epoch 002: 1909 / 3002 loss=2.656, ppl=6.3, wps=5931.2, ups=0.09, wpb=64831, bsz=128, num_updates=4878, lr=9.9969e-05, gnorm=2.276, loss_scale=2, train_wall=10, gb_free=2.8, wall=55452 2021-06-19 10:03:09 | INFO | train_inner | epoch 002: 1910 / 3002 loss=2.755, ppl=6.75, wps=5967.4, ups=0.09, wpb=64743, bsz=128, num_updates=4879, lr=9.9969e-05, gnorm=2.353, loss_scale=2, train_wall=10, gb_free=2.8, wall=55463 2021-06-19 10:03:20 | INFO | train_inner | epoch 002: 1911 / 3002 loss=2.897, ppl=7.45, wps=5855.1, ups=0.09, wpb=64889, bsz=128, num_updates=4880, lr=9.9969e-05, gnorm=2.361, loss_scale=2, train_wall=11, gb_free=2.8, wall=55474 2021-06-19 10:03:31 | INFO | train_inner | epoch 002: 1912 / 3002 loss=2.663, ppl=6.34, wps=5840.2, ups=0.09, wpb=64890, bsz=128, num_updates=4881, lr=9.99689e-05, gnorm=2.247, loss_scale=2, train_wall=11, gb_free=2.8, wall=55485 2021-06-19 10:03:42 | INFO | train_inner | epoch 002: 1913 / 3002 loss=2.687, ppl=6.44, wps=5808, ups=0.09, wpb=64763, bsz=128, num_updates=4882, lr=9.99689e-05, gnorm=2.462, loss_scale=2, train_wall=11, gb_free=2.8, wall=55496 2021-06-19 10:03:53 | INFO | train_inner | epoch 002: 1914 / 3002 loss=2.87, ppl=7.31, wps=5841.2, ups=0.09, wpb=64883, bsz=128, num_updates=4883, lr=9.99689e-05, gnorm=2.318, loss_scale=2, train_wall=11, gb_free=2.8, wall=55507 2021-06-19 10:04:04 | INFO | train_inner | epoch 002: 1915 / 3002 loss=2.728, ppl=6.63, wps=5935, ups=0.09, wpb=64821, bsz=128, num_updates=4884, lr=9.99689e-05, gnorm=2.844, loss_scale=2, train_wall=10, gb_free=2.8, wall=55518 2021-06-19 10:04:15 | INFO | train_inner | epoch 002: 1916 / 3002 loss=2.776, ppl=6.85, wps=5857.3, ups=0.09, wpb=64823, bsz=128, num_updates=4885, lr=9.99689e-05, gnorm=2.316, loss_scale=2, train_wall=11, gb_free=2.8, wall=55529 2021-06-19 10:04:26 | INFO | train_inner | epoch 002: 1917 / 3002 loss=2.74, ppl=6.68, wps=5908.6, ups=0.09, wpb=64777, bsz=128, num_updates=4886, lr=9.99689e-05, gnorm=2.155, loss_scale=2, train_wall=10, gb_free=2.8, wall=55540 2021-06-19 10:04:37 | INFO | train_inner | epoch 002: 1918 / 3002 loss=2.913, ppl=7.53, wps=5855.5, ups=0.09, wpb=64774, bsz=128, num_updates=4887, lr=9.99689e-05, gnorm=4.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=55551 2021-06-19 10:04:48 | INFO | train_inner | epoch 002: 1919 / 3002 loss=2.749, ppl=6.72, wps=5924.5, ups=0.09, wpb=64835, bsz=128, num_updates=4888, lr=9.99689e-05, gnorm=2.394, loss_scale=2, train_wall=11, gb_free=2.8, wall=55562 2021-06-19 10:04:59 | INFO | train_inner | epoch 002: 1920 / 3002 loss=2.871, ppl=7.32, wps=5802.6, ups=0.09, wpb=64794, bsz=128, num_updates=4889, lr=9.99689e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=55573 2021-06-19 10:05:10 | INFO | train_inner | epoch 002: 1921 / 3002 loss=2.823, ppl=7.08, wps=5903.3, ups=0.09, wpb=64876, bsz=128, num_updates=4890, lr=9.99689e-05, gnorm=2.215, loss_scale=2, train_wall=11, gb_free=2.8, wall=55584 2021-06-19 10:05:21 | INFO | train_inner | epoch 002: 1922 / 3002 loss=3.014, ppl=8.08, wps=5724, ups=0.09, wpb=64830, bsz=128, num_updates=4891, lr=9.99689e-05, gnorm=2.297, loss_scale=2, train_wall=11, gb_free=2.8, wall=55596 2021-06-19 10:05:32 | INFO | train_inner | epoch 002: 1923 / 3002 loss=2.788, ppl=6.91, wps=5935.7, ups=0.09, wpb=64849, bsz=128, num_updates=4892, lr=9.99689e-05, gnorm=2.244, loss_scale=2, train_wall=10, gb_free=2.8, wall=55607 2021-06-19 10:05:43 | INFO | train_inner | epoch 002: 1924 / 3002 loss=2.863, ppl=7.28, wps=5967.6, ups=0.09, wpb=64815, bsz=128, num_updates=4893, lr=9.99689e-05, gnorm=2.178, loss_scale=2, train_wall=10, gb_free=2.8, wall=55618 2021-06-19 10:05:54 | INFO | train_inner | epoch 002: 1925 / 3002 loss=2.551, ppl=5.86, wps=5916.3, ups=0.09, wpb=64855, bsz=128, num_updates=4894, lr=9.99688e-05, gnorm=2.217, loss_scale=2, train_wall=10, gb_free=2.8, wall=55629 2021-06-19 10:06:06 | INFO | train_inner | epoch 002: 1926 / 3002 loss=2.796, ppl=6.95, wps=5720.4, ups=0.09, wpb=64780, bsz=128, num_updates=4895, lr=9.99688e-05, gnorm=2.38, loss_scale=2, train_wall=11, gb_free=2.8, wall=55640 2021-06-19 10:06:17 | INFO | train_inner | epoch 002: 1927 / 3002 loss=2.823, ppl=7.08, wps=5829.6, ups=0.09, wpb=64841, bsz=128, num_updates=4896, lr=9.99688e-05, gnorm=2.386, loss_scale=2, train_wall=11, gb_free=2.8, wall=55651 2021-06-19 10:06:28 | INFO | train_inner | epoch 002: 1928 / 3002 loss=2.697, ppl=6.48, wps=5883.1, ups=0.09, wpb=64863, bsz=128, num_updates=4897, lr=9.99688e-05, gnorm=2.22, loss_scale=2, train_wall=11, gb_free=2.8, wall=55662 2021-06-19 10:06:39 | INFO | train_inner | epoch 002: 1929 / 3002 loss=2.756, ppl=6.76, wps=5756.1, ups=0.09, wpb=64825, bsz=128, num_updates=4898, lr=9.99688e-05, gnorm=2.47, loss_scale=2, train_wall=11, gb_free=2.8, wall=55673 2021-06-19 10:06:50 | INFO | train_inner | epoch 002: 1930 / 3002 loss=2.587, ppl=6.01, wps=5749.3, ups=0.09, wpb=64903, bsz=128, num_updates=4899, lr=9.99688e-05, gnorm=2.168, loss_scale=2, train_wall=11, gb_free=2.8, wall=55685 2021-06-19 10:07:01 | INFO | train_inner | epoch 002: 1931 / 3002 loss=2.803, ppl=6.98, wps=5823.2, ups=0.09, wpb=64766, bsz=128, num_updates=4900, lr=9.99688e-05, gnorm=2.171, loss_scale=2, train_wall=11, gb_free=2.8, wall=55696 2021-06-19 10:07:12 | INFO | train_inner | epoch 002: 1932 / 3002 loss=2.519, ppl=5.73, wps=5948.2, ups=0.09, wpb=64826, bsz=128, num_updates=4901, lr=9.99688e-05, gnorm=2.233, loss_scale=2, train_wall=10, gb_free=2.8, wall=55707 2021-06-19 10:07:23 | INFO | train_inner | epoch 002: 1933 / 3002 loss=2.641, ppl=6.24, wps=5909.2, ups=0.09, wpb=64802, bsz=128, num_updates=4902, lr=9.99688e-05, gnorm=2.236, loss_scale=2, train_wall=11, gb_free=2.8, wall=55718 2021-06-19 10:07:34 | INFO | train_inner | epoch 002: 1934 / 3002 loss=2.863, ppl=7.27, wps=5903.5, ups=0.09, wpb=64943, bsz=128, num_updates=4903, lr=9.99688e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=55729 2021-06-19 10:07:45 | INFO | train_inner | epoch 002: 1935 / 3002 loss=2.9, ppl=7.46, wps=5880.6, ups=0.09, wpb=64858, bsz=128, num_updates=4904, lr=9.99688e-05, gnorm=2.23, loss_scale=2, train_wall=11, gb_free=2.8, wall=55740 2021-06-19 10:07:56 | INFO | train_inner | epoch 002: 1936 / 3002 loss=2.764, ppl=6.79, wps=5918, ups=0.09, wpb=64844, bsz=128, num_updates=4905, lr=9.99688e-05, gnorm=2.378, loss_scale=2, train_wall=10, gb_free=2.8, wall=55751 2021-06-19 10:08:07 | INFO | train_inner | epoch 002: 1937 / 3002 loss=2.83, ppl=7.11, wps=5779.2, ups=0.09, wpb=64885, bsz=128, num_updates=4906, lr=9.99687e-05, gnorm=2.19, loss_scale=2, train_wall=11, gb_free=2.8, wall=55762 2021-06-19 10:08:18 | INFO | train_inner | epoch 002: 1938 / 3002 loss=2.86, ppl=7.26, wps=5897.7, ups=0.09, wpb=64765, bsz=128, num_updates=4907, lr=9.99687e-05, gnorm=2.314, loss_scale=2, train_wall=11, gb_free=2.8, wall=55773 2021-06-19 10:08:29 | INFO | train_inner | epoch 002: 1939 / 3002 loss=2.565, ppl=5.92, wps=5917.3, ups=0.09, wpb=64819, bsz=128, num_updates=4908, lr=9.99687e-05, gnorm=2.249, loss_scale=2, train_wall=10, gb_free=2.8, wall=55784 2021-06-19 10:08:40 | INFO | train_inner | epoch 002: 1940 / 3002 loss=2.676, ppl=6.39, wps=5942.2, ups=0.09, wpb=64888, bsz=128, num_updates=4909, lr=9.99687e-05, gnorm=2.149, loss_scale=2, train_wall=10, gb_free=2.8, wall=55795 2021-06-19 10:08:51 | INFO | train_inner | epoch 002: 1941 / 3002 loss=2.721, ppl=6.59, wps=5970, ups=0.09, wpb=64937, bsz=128, num_updates=4910, lr=9.99687e-05, gnorm=2.17, loss_scale=2, train_wall=10, gb_free=2.8, wall=55805 2021-06-19 10:09:02 | INFO | train_inner | epoch 002: 1942 / 3002 loss=2.746, ppl=6.71, wps=5892.2, ups=0.09, wpb=64800, bsz=128, num_updates=4911, lr=9.99687e-05, gnorm=2.248, loss_scale=2, train_wall=11, gb_free=2.8, wall=55816 2021-06-19 10:09:13 | INFO | train_inner | epoch 002: 1943 / 3002 loss=2.808, ppl=7, wps=5900.8, ups=0.09, wpb=64789, bsz=128, num_updates=4912, lr=9.99687e-05, gnorm=4.291, loss_scale=2, train_wall=11, gb_free=2.8, wall=55827 2021-06-19 10:09:24 | INFO | train_inner | epoch 002: 1944 / 3002 loss=2.798, ppl=6.95, wps=5827, ups=0.09, wpb=64741, bsz=128, num_updates=4913, lr=9.99687e-05, gnorm=2.304, loss_scale=2, train_wall=11, gb_free=2.8, wall=55839 2021-06-19 10:09:35 | INFO | train_inner | epoch 002: 1945 / 3002 loss=2.682, ppl=6.42, wps=5876, ups=0.09, wpb=64802, bsz=128, num_updates=4914, lr=9.99687e-05, gnorm=2.199, loss_scale=2, train_wall=11, gb_free=2.8, wall=55850 2021-06-19 10:09:46 | INFO | train_inner | epoch 002: 1946 / 3002 loss=2.774, ppl=6.84, wps=5958.8, ups=0.09, wpb=64799, bsz=128, num_updates=4915, lr=9.99687e-05, gnorm=2.695, loss_scale=2, train_wall=10, gb_free=2.8, wall=55860 2021-06-19 10:09:57 | INFO | train_inner | epoch 002: 1947 / 3002 loss=2.811, ppl=7.02, wps=5831.9, ups=0.09, wpb=64800, bsz=128, num_updates=4916, lr=9.99687e-05, gnorm=38.292, loss_scale=2, train_wall=11, gb_free=2.8, wall=55872 2021-06-19 10:10:08 | INFO | train_inner | epoch 002: 1948 / 3002 loss=2.707, ppl=6.53, wps=5879.1, ups=0.09, wpb=64805, bsz=128, num_updates=4917, lr=9.99687e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=55883 2021-06-19 10:10:19 | INFO | train_inner | epoch 002: 1949 / 3002 loss=2.743, ppl=6.69, wps=5835.4, ups=0.09, wpb=64803, bsz=128, num_updates=4918, lr=9.99687e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=55894 2021-06-19 10:10:31 | INFO | train_inner | epoch 002: 1950 / 3002 loss=2.76, ppl=6.78, wps=5738.2, ups=0.09, wpb=64757, bsz=128, num_updates=4919, lr=9.99686e-05, gnorm=2.291, loss_scale=2, train_wall=11, gb_free=2.8, wall=55905 2021-06-19 10:10:42 | INFO | train_inner | epoch 002: 1951 / 3002 loss=2.776, ppl=6.85, wps=5898.8, ups=0.09, wpb=64845, bsz=128, num_updates=4920, lr=9.99686e-05, gnorm=2.285, loss_scale=4, train_wall=11, gb_free=2.8, wall=55916 2021-06-19 10:10:53 | INFO | train_inner | epoch 002: 1952 / 3002 loss=2.776, ppl=6.85, wps=5908.8, ups=0.09, wpb=64805, bsz=128, num_updates=4921, lr=9.99686e-05, gnorm=2.338, loss_scale=4, train_wall=11, gb_free=2.8, wall=55927 2021-06-19 10:11:04 | INFO | train_inner | epoch 002: 1953 / 3002 loss=2.773, ppl=6.83, wps=5810.2, ups=0.09, wpb=64867, bsz=128, num_updates=4922, lr=9.99686e-05, gnorm=5.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=55938 2021-06-19 10:11:15 | INFO | train_inner | epoch 002: 1954 / 3002 loss=2.793, ppl=6.93, wps=5855.9, ups=0.09, wpb=64798, bsz=128, num_updates=4923, lr=9.99686e-05, gnorm=2.823, loss_scale=4, train_wall=11, gb_free=2.8, wall=55949 2021-06-19 10:11:26 | INFO | train_inner | epoch 002: 1955 / 3002 loss=2.829, ppl=7.1, wps=5800.7, ups=0.09, wpb=64753, bsz=128, num_updates=4924, lr=9.99686e-05, gnorm=2.487, loss_scale=4, train_wall=11, gb_free=2.8, wall=55960 2021-06-19 10:11:37 | INFO | train_inner | epoch 002: 1956 / 3002 loss=2.816, ppl=7.04, wps=5844.6, ups=0.09, wpb=64854, bsz=128, num_updates=4925, lr=9.99686e-05, gnorm=2.286, loss_scale=4, train_wall=11, gb_free=2.8, wall=55971 2021-06-19 10:11:48 | INFO | train_inner | epoch 002: 1957 / 3002 loss=2.982, ppl=7.9, wps=5798.9, ups=0.09, wpb=64814, bsz=128, num_updates=4926, lr=9.99686e-05, gnorm=2.263, loss_scale=4, train_wall=11, gb_free=2.8, wall=55983 2021-06-19 10:11:59 | INFO | train_inner | epoch 002: 1958 / 3002 loss=2.705, ppl=6.52, wps=5805.9, ups=0.09, wpb=64753, bsz=128, num_updates=4927, lr=9.99686e-05, gnorm=2.406, loss_scale=4, train_wall=11, gb_free=2.8, wall=55994 2021-06-19 10:12:10 | INFO | train_inner | epoch 002: 1959 / 3002 loss=2.687, ppl=6.44, wps=5882.8, ups=0.09, wpb=64869, bsz=128, num_updates=4928, lr=9.99686e-05, gnorm=2.343, loss_scale=4, train_wall=11, gb_free=2.8, wall=56005 2021-06-19 10:12:22 | INFO | train_inner | epoch 002: 1960 / 3002 loss=2.832, ppl=7.12, wps=5889.6, ups=0.09, wpb=64889, bsz=128, num_updates=4929, lr=9.99686e-05, gnorm=2.518, loss_scale=4, train_wall=11, gb_free=2.8, wall=56016 2021-06-19 10:12:33 | INFO | train_inner | epoch 002: 1961 / 3002 loss=2.77, ppl=6.82, wps=5849.7, ups=0.09, wpb=64873, bsz=128, num_updates=4930, lr=9.99686e-05, gnorm=2.494, loss_scale=4, train_wall=11, gb_free=2.8, wall=56027 2021-06-19 10:12:44 | INFO | train_inner | epoch 002: 1962 / 3002 loss=2.843, ppl=7.17, wps=5838.2, ups=0.09, wpb=64879, bsz=128, num_updates=4931, lr=9.99685e-05, gnorm=8.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=56038 2021-06-19 10:12:55 | INFO | train_inner | epoch 002: 1963 / 3002 loss=2.587, ppl=6.01, wps=5806.4, ups=0.09, wpb=64892, bsz=128, num_updates=4932, lr=9.99685e-05, gnorm=8.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=56049 2021-06-19 10:13:06 | INFO | train_inner | epoch 002: 1964 / 3002 loss=2.81, ppl=7.01, wps=5834.3, ups=0.09, wpb=64869, bsz=128, num_updates=4933, lr=9.99685e-05, gnorm=2.4, loss_scale=4, train_wall=11, gb_free=2.8, wall=56060 2021-06-19 10:13:17 | INFO | train_inner | epoch 002: 1965 / 3002 loss=2.713, ppl=6.56, wps=5698.1, ups=0.09, wpb=64775, bsz=128, num_updates=4934, lr=9.99685e-05, gnorm=2.455, loss_scale=4, train_wall=11, gb_free=2.8, wall=56072 2021-06-19 10:13:28 | INFO | train_inner | epoch 002: 1966 / 3002 loss=2.691, ppl=6.46, wps=5857.5, ups=0.09, wpb=64812, bsz=128, num_updates=4935, lr=9.99685e-05, gnorm=2.887, loss_scale=4, train_wall=11, gb_free=2.8, wall=56083 2021-06-19 10:13:40 | INFO | train_inner | epoch 002: 1967 / 3002 loss=2.879, ppl=7.36, wps=5758.9, ups=0.09, wpb=64825, bsz=128, num_updates=4936, lr=9.99685e-05, gnorm=2.278, loss_scale=4, train_wall=11, gb_free=2.8, wall=56094 2021-06-19 10:13:51 | INFO | train_inner | epoch 002: 1968 / 3002 loss=2.787, ppl=6.9, wps=5870.6, ups=0.09, wpb=64760, bsz=128, num_updates=4937, lr=9.99685e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=56105 2021-06-19 10:14:02 | INFO | train_inner | epoch 002: 1969 / 3002 loss=2.741, ppl=6.68, wps=5717.2, ups=0.09, wpb=64784, bsz=128, num_updates=4938, lr=9.99685e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=56116 2021-06-19 10:14:13 | INFO | train_inner | epoch 002: 1970 / 3002 loss=2.902, ppl=7.47, wps=5892.8, ups=0.09, wpb=64808, bsz=128, num_updates=4939, lr=9.99685e-05, gnorm=2.363, loss_scale=4, train_wall=11, gb_free=2.8, wall=56127 2021-06-19 10:14:24 | INFO | train_inner | epoch 002: 1971 / 3002 loss=2.754, ppl=6.75, wps=5809.4, ups=0.09, wpb=64781, bsz=128, num_updates=4940, lr=9.99685e-05, gnorm=2.832, loss_scale=4, train_wall=11, gb_free=2.8, wall=56139 2021-06-19 10:14:35 | INFO | train_inner | epoch 002: 1972 / 3002 loss=2.617, ppl=6.13, wps=6003.1, ups=0.09, wpb=64854, bsz=128, num_updates=4941, lr=9.99685e-05, gnorm=2.351, loss_scale=4, train_wall=10, gb_free=2.8, wall=56149 2021-06-19 10:14:46 | INFO | train_inner | epoch 002: 1973 / 3002 loss=2.813, ppl=7.03, wps=5818.7, ups=0.09, wpb=64786, bsz=128, num_updates=4942, lr=9.99685e-05, gnorm=2.525, loss_scale=4, train_wall=11, gb_free=2.8, wall=56160 2021-06-19 10:14:57 | INFO | train_inner | epoch 002: 1974 / 3002 loss=2.768, ppl=6.81, wps=5917, ups=0.09, wpb=64841, bsz=128, num_updates=4943, lr=9.99685e-05, gnorm=2.535, loss_scale=4, train_wall=10, gb_free=2.8, wall=56171 2021-06-19 10:15:08 | INFO | train_inner | epoch 002: 1975 / 3002 loss=2.911, ppl=7.52, wps=5922.3, ups=0.09, wpb=64847, bsz=128, num_updates=4944, lr=9.99684e-05, gnorm=2.271, loss_scale=4, train_wall=10, gb_free=2.8, wall=56182 2021-06-19 10:15:19 | INFO | train_inner | epoch 002: 1976 / 3002 loss=2.736, ppl=6.66, wps=5732.7, ups=0.09, wpb=64830, bsz=128, num_updates=4945, lr=9.99684e-05, gnorm=2.32, loss_scale=4, train_wall=11, gb_free=2.8, wall=56194 2021-06-19 10:15:30 | INFO | train_inner | epoch 002: 1977 / 3002 loss=2.809, ppl=7.01, wps=5848.8, ups=0.09, wpb=64806, bsz=128, num_updates=4946, lr=9.99684e-05, gnorm=2.494, loss_scale=4, train_wall=11, gb_free=2.8, wall=56205 2021-06-19 10:15:42 | INFO | train_inner | epoch 002: 1978 / 3002 loss=2.594, ppl=6.04, wps=5835.1, ups=0.09, wpb=64833, bsz=128, num_updates=4947, lr=9.99684e-05, gnorm=3.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=56216 2021-06-19 10:15:53 | INFO | train_inner | epoch 002: 1979 / 3002 loss=2.859, ppl=7.26, wps=5750.4, ups=0.09, wpb=64830, bsz=128, num_updates=4948, lr=9.99684e-05, gnorm=2.199, loss_scale=4, train_wall=11, gb_free=2.8, wall=56227 2021-06-19 10:16:04 | INFO | train_inner | epoch 002: 1980 / 3002 loss=2.798, ppl=6.95, wps=5795.2, ups=0.09, wpb=64777, bsz=128, num_updates=4949, lr=9.99684e-05, gnorm=3.738, loss_scale=4, train_wall=11, gb_free=2.8, wall=56238 2021-06-19 10:16:15 | INFO | train_inner | epoch 002: 1981 / 3002 loss=2.789, ppl=6.91, wps=5754.1, ups=0.09, wpb=64839, bsz=128, num_updates=4950, lr=9.99684e-05, gnorm=12.572, loss_scale=4, train_wall=11, gb_free=2.8, wall=56250 2021-06-19 10:16:26 | INFO | train_inner | epoch 002: 1982 / 3002 loss=2.665, ppl=6.34, wps=5838.3, ups=0.09, wpb=64878, bsz=128, num_updates=4951, lr=9.99684e-05, gnorm=2.775, loss_scale=4, train_wall=11, gb_free=2.8, wall=56261 2021-06-19 10:16:37 | INFO | train_inner | epoch 002: 1983 / 3002 loss=2.638, ppl=6.22, wps=5889.8, ups=0.09, wpb=64836, bsz=128, num_updates=4952, lr=9.99684e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=56272 2021-06-19 10:16:49 | INFO | train_inner | epoch 002: 1984 / 3002 loss=2.754, ppl=6.74, wps=5828, ups=0.09, wpb=64834, bsz=128, num_updates=4953, lr=9.99684e-05, gnorm=2.586, loss_scale=4, train_wall=11, gb_free=2.8, wall=56283 2021-06-19 10:16:59 | INFO | train_inner | epoch 002: 1985 / 3002 loss=2.686, ppl=6.43, wps=5964.7, ups=0.09, wpb=64859, bsz=128, num_updates=4954, lr=9.99684e-05, gnorm=2.249, loss_scale=4, train_wall=10, gb_free=2.8, wall=56294 2021-06-19 10:17:11 | INFO | train_inner | epoch 002: 1986 / 3002 loss=2.721, ppl=6.59, wps=5841.6, ups=0.09, wpb=64883, bsz=128, num_updates=4955, lr=9.99684e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=56305 2021-06-19 10:17:22 | INFO | train_inner | epoch 002: 1987 / 3002 loss=2.739, ppl=6.68, wps=5809.3, ups=0.09, wpb=64855, bsz=128, num_updates=4956, lr=9.99683e-05, gnorm=2.413, loss_scale=4, train_wall=11, gb_free=2.8, wall=56316 2021-06-19 10:17:33 | INFO | train_inner | epoch 002: 1988 / 3002 loss=2.755, ppl=6.75, wps=5817.5, ups=0.09, wpb=64845, bsz=128, num_updates=4957, lr=9.99683e-05, gnorm=2.297, loss_scale=4, train_wall=11, gb_free=2.8, wall=56327 2021-06-19 10:17:44 | INFO | train_inner | epoch 002: 1989 / 3002 loss=2.724, ppl=6.61, wps=5814.7, ups=0.09, wpb=64825, bsz=128, num_updates=4958, lr=9.99683e-05, gnorm=2.367, loss_scale=4, train_wall=11, gb_free=2.8, wall=56338 2021-06-19 10:17:55 | INFO | train_inner | epoch 002: 1990 / 3002 loss=2.552, ppl=5.86, wps=5759, ups=0.09, wpb=64833, bsz=128, num_updates=4959, lr=9.99683e-05, gnorm=2.41, loss_scale=4, train_wall=11, gb_free=2.8, wall=56350 2021-06-19 10:18:06 | INFO | train_inner | epoch 002: 1991 / 3002 loss=2.764, ppl=6.79, wps=5829.4, ups=0.09, wpb=64839, bsz=128, num_updates=4960, lr=9.99683e-05, gnorm=2.535, loss_scale=4, train_wall=11, gb_free=2.8, wall=56361 2021-06-19 10:18:17 | INFO | train_inner | epoch 002: 1992 / 3002 loss=2.783, ppl=6.88, wps=5835.9, ups=0.09, wpb=64991, bsz=128, num_updates=4961, lr=9.99683e-05, gnorm=2.414, loss_scale=4, train_wall=11, gb_free=2.8, wall=56372 2021-06-19 10:18:29 | INFO | train_inner | epoch 002: 1993 / 3002 loss=2.773, ppl=6.83, wps=5741.6, ups=0.09, wpb=64863, bsz=128, num_updates=4962, lr=9.99683e-05, gnorm=2.292, loss_scale=4, train_wall=11, gb_free=2.8, wall=56383 2021-06-19 10:18:40 | INFO | train_inner | epoch 002: 1994 / 3002 loss=2.799, ppl=6.96, wps=5902.5, ups=0.09, wpb=64830, bsz=128, num_updates=4963, lr=9.99683e-05, gnorm=2.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=56394 2021-06-19 10:18:51 | INFO | train_inner | epoch 002: 1995 / 3002 loss=2.76, ppl=6.77, wps=5853.4, ups=0.09, wpb=64837, bsz=128, num_updates=4964, lr=9.99683e-05, gnorm=2.471, loss_scale=4, train_wall=11, gb_free=2.8, wall=56405 2021-06-19 10:19:02 | INFO | train_inner | epoch 002: 1996 / 3002 loss=2.833, ppl=7.12, wps=5904.5, ups=0.09, wpb=64878, bsz=128, num_updates=4965, lr=9.99683e-05, gnorm=4.567, loss_scale=4, train_wall=11, gb_free=2.8, wall=56416 2021-06-19 10:19:13 | INFO | train_inner | epoch 002: 1997 / 3002 loss=2.66, ppl=6.32, wps=5850.6, ups=0.09, wpb=64792, bsz=128, num_updates=4966, lr=9.99683e-05, gnorm=2.397, loss_scale=4, train_wall=11, gb_free=2.8, wall=56427 2021-06-19 10:19:24 | INFO | train_inner | epoch 002: 1998 / 3002 loss=2.745, ppl=6.71, wps=5895.4, ups=0.09, wpb=64796, bsz=128, num_updates=4967, lr=9.99683e-05, gnorm=2.377, loss_scale=4, train_wall=11, gb_free=2.8, wall=56438 2021-06-19 10:19:35 | INFO | train_inner | epoch 002: 1999 / 3002 loss=2.816, ppl=7.04, wps=5889.8, ups=0.09, wpb=64743, bsz=128, num_updates=4968, lr=9.99683e-05, gnorm=3.047, loss_scale=4, train_wall=11, gb_free=2.8, wall=56449 2021-06-19 10:19:46 | INFO | train_inner | epoch 002: 2000 / 3002 loss=2.825, ppl=7.09, wps=5788.4, ups=0.09, wpb=64834, bsz=128, num_updates=4969, lr=9.99682e-05, gnorm=4.528, loss_scale=4, train_wall=11, gb_free=2.8, wall=56460 2021-06-19 10:19:57 | INFO | train_inner | epoch 002: 2001 / 3002 loss=2.791, ppl=6.92, wps=5822.9, ups=0.09, wpb=64783, bsz=128, num_updates=4970, lr=9.99682e-05, gnorm=2.49, loss_scale=4, train_wall=11, gb_free=2.8, wall=56472 2021-06-19 10:20:08 | INFO | train_inner | epoch 002: 2002 / 3002 loss=2.793, ppl=6.93, wps=5821.3, ups=0.09, wpb=64845, bsz=128, num_updates=4971, lr=9.99682e-05, gnorm=2.355, loss_scale=4, train_wall=11, gb_free=2.8, wall=56483 2021-06-19 10:20:19 | INFO | train_inner | epoch 002: 2003 / 3002 loss=2.852, ppl=7.22, wps=5862.5, ups=0.09, wpb=64779, bsz=128, num_updates=4972, lr=9.99682e-05, gnorm=2.236, loss_scale=4, train_wall=11, gb_free=2.8, wall=56494 2021-06-19 10:20:30 | INFO | train_inner | epoch 002: 2004 / 3002 loss=2.724, ppl=6.61, wps=5941.5, ups=0.09, wpb=64808, bsz=128, num_updates=4973, lr=9.99682e-05, gnorm=6.989, loss_scale=4, train_wall=10, gb_free=2.8, wall=56505 2021-06-19 10:20:41 | INFO | train_inner | epoch 002: 2005 / 3002 loss=2.684, ppl=6.43, wps=5894.8, ups=0.09, wpb=64829, bsz=128, num_updates=4974, lr=9.99682e-05, gnorm=2.616, loss_scale=4, train_wall=11, gb_free=2.8, wall=56516 2021-06-19 10:20:53 | INFO | train_inner | epoch 002: 2006 / 3002 loss=2.632, ppl=6.2, wps=5749.7, ups=0.09, wpb=64822, bsz=128, num_updates=4975, lr=9.99682e-05, gnorm=2.432, loss_scale=4, train_wall=11, gb_free=2.8, wall=56527 2021-06-19 10:21:04 | INFO | train_inner | epoch 002: 2007 / 3002 loss=2.732, ppl=6.64, wps=5880.5, ups=0.09, wpb=64857, bsz=128, num_updates=4976, lr=9.99682e-05, gnorm=2.497, loss_scale=4, train_wall=11, gb_free=2.8, wall=56538 2021-06-19 10:21:15 | INFO | train_inner | epoch 002: 2008 / 3002 loss=2.681, ppl=6.42, wps=5802, ups=0.09, wpb=64812, bsz=128, num_updates=4977, lr=9.99682e-05, gnorm=2.419, loss_scale=4, train_wall=11, gb_free=2.8, wall=56549 2021-06-19 10:21:26 | INFO | train_inner | epoch 002: 2009 / 3002 loss=2.609, ppl=6.1, wps=5776.4, ups=0.09, wpb=64892, bsz=128, num_updates=4978, lr=9.99682e-05, gnorm=2.515, loss_scale=4, train_wall=11, gb_free=2.8, wall=56560 2021-06-19 10:21:37 | INFO | train_inner | epoch 002: 2010 / 3002 loss=2.852, ppl=7.22, wps=5915.6, ups=0.09, wpb=64860, bsz=128, num_updates=4979, lr=9.99682e-05, gnorm=2.775, loss_scale=4, train_wall=11, gb_free=2.8, wall=56571 2021-06-19 10:21:48 | INFO | train_inner | epoch 002: 2011 / 3002 loss=2.796, ppl=6.95, wps=5924.9, ups=0.09, wpb=64733, bsz=128, num_updates=4980, lr=9.99682e-05, gnorm=2.765, loss_scale=4, train_wall=10, gb_free=2.8, wall=56582 2021-06-19 10:21:59 | INFO | train_inner | epoch 002: 2012 / 3002 loss=2.694, ppl=6.47, wps=5863.9, ups=0.09, wpb=64909, bsz=128, num_updates=4981, lr=9.99681e-05, gnorm=2.502, loss_scale=4, train_wall=11, gb_free=2.8, wall=56593 2021-06-19 10:22:10 | INFO | train_inner | epoch 002: 2013 / 3002 loss=2.733, ppl=6.65, wps=5849.6, ups=0.09, wpb=64866, bsz=128, num_updates=4982, lr=9.99681e-05, gnorm=3.029, loss_scale=4, train_wall=11, gb_free=2.8, wall=56604 2021-06-19 10:22:21 | INFO | train_inner | epoch 002: 2014 / 3002 loss=2.662, ppl=6.33, wps=5835.7, ups=0.09, wpb=64880, bsz=128, num_updates=4983, lr=9.99681e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=56616 2021-06-19 10:22:32 | INFO | train_inner | epoch 002: 2015 / 3002 loss=2.743, ppl=6.69, wps=5893.7, ups=0.09, wpb=64848, bsz=128, num_updates=4984, lr=9.99681e-05, gnorm=2.399, loss_scale=4, train_wall=11, gb_free=2.8, wall=56627 2021-06-19 10:22:43 | INFO | train_inner | epoch 002: 2016 / 3002 loss=2.701, ppl=6.5, wps=5939.3, ups=0.09, wpb=64846, bsz=128, num_updates=4985, lr=9.99681e-05, gnorm=2.472, loss_scale=4, train_wall=10, gb_free=2.8, wall=56637 2021-06-19 10:22:54 | INFO | train_inner | epoch 002: 2017 / 3002 loss=2.669, ppl=6.36, wps=5818.2, ups=0.09, wpb=64864, bsz=128, num_updates=4986, lr=9.99681e-05, gnorm=2.435, loss_scale=4, train_wall=11, gb_free=2.8, wall=56649 2021-06-19 10:23:05 | INFO | train_inner | epoch 002: 2018 / 3002 loss=2.733, ppl=6.65, wps=5783.3, ups=0.09, wpb=64806, bsz=128, num_updates=4987, lr=9.99681e-05, gnorm=2.641, loss_scale=4, train_wall=11, gb_free=2.8, wall=56660 2021-06-19 10:23:17 | INFO | train_inner | epoch 002: 2019 / 3002 loss=2.642, ppl=6.24, wps=5776.9, ups=0.09, wpb=64749, bsz=128, num_updates=4988, lr=9.99681e-05, gnorm=2.843, loss_scale=4, train_wall=11, gb_free=2.8, wall=56671 2021-06-19 10:23:28 | INFO | train_inner | epoch 002: 2020 / 3002 loss=2.677, ppl=6.4, wps=5921.2, ups=0.09, wpb=64874, bsz=128, num_updates=4989, lr=9.99681e-05, gnorm=2.314, loss_scale=4, train_wall=10, gb_free=2.8, wall=56682 2021-06-19 10:23:39 | INFO | train_inner | epoch 002: 2021 / 3002 loss=2.826, ppl=7.09, wps=5862.3, ups=0.09, wpb=64916, bsz=128, num_updates=4990, lr=9.99681e-05, gnorm=8.448, loss_scale=4, train_wall=11, gb_free=2.8, wall=56693 2021-06-19 10:23:50 | INFO | train_inner | epoch 002: 2022 / 3002 loss=2.748, ppl=6.72, wps=5759.7, ups=0.09, wpb=64821, bsz=128, num_updates=4991, lr=9.99681e-05, gnorm=2.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=56704 2021-06-19 10:24:01 | INFO | train_inner | epoch 002: 2023 / 3002 loss=2.711, ppl=6.55, wps=5845.1, ups=0.09, wpb=64885, bsz=128, num_updates=4992, lr=9.99681e-05, gnorm=3.882, loss_scale=4, train_wall=11, gb_free=2.8, wall=56715 2021-06-19 10:24:12 | INFO | train_inner | epoch 002: 2024 / 3002 loss=2.859, ppl=7.26, wps=5939.2, ups=0.09, wpb=64901, bsz=128, num_updates=4993, lr=9.99681e-05, gnorm=2.614, loss_scale=4, train_wall=10, gb_free=2.8, wall=56726 2021-06-19 10:24:23 | INFO | train_inner | epoch 002: 2025 / 3002 loss=2.736, ppl=6.66, wps=5904.7, ups=0.09, wpb=64857, bsz=128, num_updates=4994, lr=9.9968e-05, gnorm=2.792, loss_scale=4, train_wall=11, gb_free=2.8, wall=56737 2021-06-19 10:24:34 | INFO | train_inner | epoch 002: 2026 / 3002 loss=2.669, ppl=6.36, wps=5889.3, ups=0.09, wpb=64793, bsz=128, num_updates=4995, lr=9.9968e-05, gnorm=2.275, loss_scale=4, train_wall=11, gb_free=2.8, wall=56748 2021-06-19 10:24:45 | INFO | train_inner | epoch 002: 2027 / 3002 loss=2.614, ppl=6.12, wps=5960.6, ups=0.09, wpb=64822, bsz=128, num_updates=4996, lr=9.9968e-05, gnorm=2.572, loss_scale=4, train_wall=10, gb_free=2.8, wall=56759 2021-06-19 10:24:56 | INFO | train_inner | epoch 002: 2028 / 3002 loss=2.773, ppl=6.84, wps=5738.9, ups=0.09, wpb=64754, bsz=128, num_updates=4997, lr=9.9968e-05, gnorm=4.472, loss_scale=4, train_wall=11, gb_free=2.8, wall=56770 2021-06-19 10:25:07 | INFO | train_inner | epoch 002: 2029 / 3002 loss=2.899, ppl=7.46, wps=5862, ups=0.09, wpb=64807, bsz=128, num_updates=4998, lr=9.9968e-05, gnorm=2.47, loss_scale=4, train_wall=11, gb_free=2.8, wall=56782 2021-06-19 10:25:18 | INFO | train_inner | epoch 002: 2030 / 3002 loss=2.856, ppl=7.24, wps=5946.6, ups=0.09, wpb=64942, bsz=128, num_updates=4999, lr=9.9968e-05, gnorm=8.935, loss_scale=4, train_wall=10, gb_free=2.8, wall=56792 2021-06-19 10:25:29 | INFO | train_inner | epoch 002: 2031 / 3002 loss=2.819, ppl=7.05, wps=5810.9, ups=0.09, wpb=64835, bsz=128, num_updates=5000, lr=9.9968e-05, gnorm=2.618, loss_scale=4, train_wall=11, gb_free=2.8, wall=56804 2021-06-19 10:25:40 | INFO | train_inner | epoch 002: 2032 / 3002 loss=2.722, ppl=6.6, wps=5812.9, ups=0.09, wpb=64738, bsz=128, num_updates=5001, lr=9.9968e-05, gnorm=3.183, loss_scale=4, train_wall=11, gb_free=2.8, wall=56815 2021-06-19 10:25:52 | INFO | train_inner | epoch 002: 2033 / 3002 loss=2.655, ppl=6.3, wps=5832.8, ups=0.09, wpb=64813, bsz=128, num_updates=5002, lr=9.9968e-05, gnorm=2.513, loss_scale=4, train_wall=11, gb_free=2.8, wall=56826 2021-06-19 10:26:02 | INFO | train_inner | epoch 002: 2034 / 3002 loss=2.597, ppl=6.05, wps=5934.4, ups=0.09, wpb=64840, bsz=128, num_updates=5003, lr=9.9968e-05, gnorm=2.379, loss_scale=4, train_wall=10, gb_free=2.8, wall=56837 2021-06-19 10:26:14 | INFO | train_inner | epoch 002: 2035 / 3002 loss=2.725, ppl=6.61, wps=5754.6, ups=0.09, wpb=64852, bsz=128, num_updates=5004, lr=9.9968e-05, gnorm=4.15, loss_scale=4, train_wall=11, gb_free=2.8, wall=56848 2021-06-19 10:26:25 | INFO | train_inner | epoch 002: 2036 / 3002 loss=2.569, ppl=5.93, wps=5905.1, ups=0.09, wpb=64865, bsz=128, num_updates=5005, lr=9.9968e-05, gnorm=2.624, loss_scale=4, train_wall=11, gb_free=2.8, wall=56859 2021-06-19 10:26:36 | INFO | train_inner | epoch 002: 2037 / 3002 loss=2.626, ppl=6.17, wps=5802.4, ups=0.09, wpb=64768, bsz=128, num_updates=5006, lr=9.99679e-05, gnorm=2.568, loss_scale=4, train_wall=11, gb_free=2.8, wall=56870 2021-06-19 10:26:47 | INFO | train_inner | epoch 002: 2038 / 3002 loss=2.766, ppl=6.8, wps=5877.5, ups=0.09, wpb=64789, bsz=128, num_updates=5007, lr=9.99679e-05, gnorm=2.498, loss_scale=4, train_wall=11, gb_free=2.8, wall=56881 2021-06-19 10:26:58 | INFO | train_inner | epoch 002: 2039 / 3002 loss=2.661, ppl=6.32, wps=5823.1, ups=0.09, wpb=64785, bsz=128, num_updates=5008, lr=9.99679e-05, gnorm=2.414, loss_scale=4, train_wall=11, gb_free=2.8, wall=56892 2021-06-19 10:27:09 | INFO | train_inner | epoch 002: 2040 / 3002 loss=2.632, ppl=6.2, wps=5895.9, ups=0.09, wpb=64868, bsz=128, num_updates=5009, lr=9.99679e-05, gnorm=2.335, loss_scale=4, train_wall=11, gb_free=2.8, wall=56903 2021-06-19 10:27:20 | INFO | train_inner | epoch 002: 2041 / 3002 loss=2.823, ppl=7.08, wps=5869.1, ups=0.09, wpb=64782, bsz=128, num_updates=5010, lr=9.99679e-05, gnorm=5.294, loss_scale=4, train_wall=11, gb_free=2.8, wall=56914 2021-06-19 10:27:31 | INFO | train_inner | epoch 002: 2042 / 3002 loss=2.841, ppl=7.16, wps=5860.6, ups=0.09, wpb=64818, bsz=128, num_updates=5011, lr=9.99679e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=56925 2021-06-19 10:27:42 | INFO | train_inner | epoch 002: 2043 / 3002 loss=2.629, ppl=6.19, wps=5884.2, ups=0.09, wpb=64857, bsz=128, num_updates=5012, lr=9.99679e-05, gnorm=2.246, loss_scale=4, train_wall=11, gb_free=2.8, wall=56936 2021-06-19 10:27:53 | INFO | train_inner | epoch 002: 2044 / 3002 loss=2.666, ppl=6.35, wps=5904.5, ups=0.09, wpb=64876, bsz=128, num_updates=5013, lr=9.99679e-05, gnorm=2.378, loss_scale=4, train_wall=11, gb_free=2.8, wall=56947 2021-06-19 10:28:04 | INFO | train_inner | epoch 002: 2045 / 3002 loss=2.767, ppl=6.81, wps=5883.7, ups=0.09, wpb=64823, bsz=128, num_updates=5014, lr=9.99679e-05, gnorm=2.327, loss_scale=4, train_wall=11, gb_free=2.8, wall=56958 2021-06-19 10:28:15 | INFO | train_inner | epoch 002: 2046 / 3002 loss=2.717, ppl=6.57, wps=5924, ups=0.09, wpb=64817, bsz=128, num_updates=5015, lr=9.99679e-05, gnorm=2.452, loss_scale=4, train_wall=11, gb_free=2.8, wall=56969 2021-06-19 10:28:26 | INFO | train_inner | epoch 002: 2047 / 3002 loss=2.675, ppl=6.39, wps=5949.7, ups=0.09, wpb=64824, bsz=128, num_updates=5016, lr=9.99679e-05, gnorm=2.409, loss_scale=4, train_wall=10, gb_free=2.8, wall=56980 2021-06-19 10:28:37 | INFO | train_inner | epoch 002: 2048 / 3002 loss=2.652, ppl=6.28, wps=5807.4, ups=0.09, wpb=64814, bsz=128, num_updates=5017, lr=9.99679e-05, gnorm=2.331, loss_scale=4, train_wall=11, gb_free=2.8, wall=56991 2021-06-19 10:28:49 | INFO | train_inner | epoch 002: 2049 / 3002 loss=2.771, ppl=6.83, wps=5688.4, ups=0.09, wpb=64837, bsz=128, num_updates=5018, lr=9.99679e-05, gnorm=2.284, loss_scale=4, train_wall=11, gb_free=2.8, wall=57003 2021-06-19 10:28:59 | INFO | train_inner | epoch 002: 2050 / 3002 loss=2.591, ppl=6.03, wps=5919.8, ups=0.09, wpb=64884, bsz=128, num_updates=5019, lr=9.99678e-05, gnorm=2.645, loss_scale=4, train_wall=10, gb_free=2.8, wall=57014 2021-06-19 10:29:11 | INFO | train_inner | epoch 002: 2051 / 3002 loss=2.643, ppl=6.25, wps=5862.5, ups=0.09, wpb=64783, bsz=128, num_updates=5020, lr=9.99678e-05, gnorm=2.313, loss_scale=4, train_wall=11, gb_free=2.8, wall=57025 2021-06-19 10:29:22 | INFO | train_inner | epoch 002: 2052 / 3002 loss=2.674, ppl=6.38, wps=5785.9, ups=0.09, wpb=64827, bsz=128, num_updates=5021, lr=9.99678e-05, gnorm=2.251, loss_scale=4, train_wall=11, gb_free=2.8, wall=57036 2021-06-19 10:29:33 | INFO | train_inner | epoch 002: 2053 / 3002 loss=2.725, ppl=6.61, wps=5719.5, ups=0.09, wpb=64811, bsz=128, num_updates=5022, lr=9.99678e-05, gnorm=2.316, loss_scale=4, train_wall=11, gb_free=2.8, wall=57047 2021-06-19 10:29:44 | INFO | train_inner | epoch 002: 2054 / 3002 loss=2.912, ppl=7.53, wps=5879.1, ups=0.09, wpb=64774, bsz=128, num_updates=5023, lr=9.99678e-05, gnorm=2.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=57058 2021-06-19 10:29:55 | INFO | train_inner | epoch 002: 2055 / 3002 loss=2.719, ppl=6.59, wps=5806.9, ups=0.09, wpb=64745, bsz=128, num_updates=5024, lr=9.99678e-05, gnorm=2.389, loss_scale=4, train_wall=11, gb_free=2.8, wall=57070 2021-06-19 10:30:06 | INFO | train_inner | epoch 002: 2056 / 3002 loss=2.766, ppl=6.8, wps=5870.7, ups=0.09, wpb=64737, bsz=128, num_updates=5025, lr=9.99678e-05, gnorm=2.324, loss_scale=4, train_wall=11, gb_free=2.8, wall=57081 2021-06-19 10:30:17 | INFO | train_inner | epoch 002: 2057 / 3002 loss=2.887, ppl=7.4, wps=5775.4, ups=0.09, wpb=64790, bsz=128, num_updates=5026, lr=9.99678e-05, gnorm=2.308, loss_scale=4, train_wall=11, gb_free=2.8, wall=57092 2021-06-19 10:30:29 | INFO | train_inner | epoch 002: 2058 / 3002 loss=2.589, ppl=6.02, wps=5782.6, ups=0.09, wpb=64837, bsz=128, num_updates=5027, lr=9.99678e-05, gnorm=2.634, loss_scale=4, train_wall=11, gb_free=2.8, wall=57103 2021-06-19 10:30:40 | INFO | train_inner | epoch 002: 2059 / 3002 loss=2.678, ppl=6.4, wps=5908, ups=0.09, wpb=64903, bsz=128, num_updates=5028, lr=9.99678e-05, gnorm=2.315, loss_scale=4, train_wall=11, gb_free=2.8, wall=57114 2021-06-19 10:30:51 | INFO | train_inner | epoch 002: 2060 / 3002 loss=2.715, ppl=6.56, wps=5727, ups=0.09, wpb=64885, bsz=128, num_updates=5029, lr=9.99678e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=57125 2021-06-19 10:31:02 | INFO | train_inner | epoch 002: 2061 / 3002 loss=2.684, ppl=6.43, wps=5741.1, ups=0.09, wpb=64826, bsz=128, num_updates=5030, lr=9.99678e-05, gnorm=2.691, loss_scale=4, train_wall=11, gb_free=2.8, wall=57137 2021-06-19 10:31:13 | INFO | train_inner | epoch 002: 2062 / 3002 loss=2.954, ppl=7.75, wps=5841.4, ups=0.09, wpb=64824, bsz=128, num_updates=5031, lr=9.99677e-05, gnorm=2.538, loss_scale=4, train_wall=11, gb_free=2.8, wall=57148 2021-06-19 10:31:24 | INFO | train_inner | epoch 002: 2063 / 3002 loss=2.906, ppl=7.5, wps=5926.4, ups=0.09, wpb=64879, bsz=128, num_updates=5032, lr=9.99677e-05, gnorm=3.189, loss_scale=4, train_wall=11, gb_free=2.8, wall=57159 2021-06-19 10:31:35 | INFO | train_inner | epoch 002: 2064 / 3002 loss=2.708, ppl=6.53, wps=5922, ups=0.09, wpb=64861, bsz=128, num_updates=5033, lr=9.99677e-05, gnorm=2.329, loss_scale=4, train_wall=10, gb_free=2.8, wall=57170 2021-06-19 10:31:46 | INFO | train_inner | epoch 002: 2065 / 3002 loss=2.639, ppl=6.23, wps=5975.7, ups=0.09, wpb=64800, bsz=128, num_updates=5034, lr=9.99677e-05, gnorm=2.239, loss_scale=4, train_wall=10, gb_free=2.8, wall=57180 2021-06-19 10:31:57 | INFO | train_inner | epoch 002: 2066 / 3002 loss=2.816, ppl=7.04, wps=5862.8, ups=0.09, wpb=64851, bsz=128, num_updates=5035, lr=9.99677e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=57192 2021-06-19 10:32:08 | INFO | train_inner | epoch 002: 2067 / 3002 loss=2.696, ppl=6.48, wps=5945.8, ups=0.09, wpb=64939, bsz=128, num_updates=5036, lr=9.99677e-05, gnorm=2.471, loss_scale=4, train_wall=10, gb_free=2.8, wall=57202 2021-06-19 10:32:19 | INFO | train_inner | epoch 002: 2068 / 3002 loss=2.741, ppl=6.69, wps=5822.7, ups=0.09, wpb=64861, bsz=128, num_updates=5037, lr=9.99677e-05, gnorm=2.272, loss_scale=4, train_wall=11, gb_free=2.8, wall=57214 2021-06-19 10:32:30 | INFO | train_inner | epoch 002: 2069 / 3002 loss=2.587, ppl=6.01, wps=6059.6, ups=0.09, wpb=64898, bsz=128, num_updates=5038, lr=9.99677e-05, gnorm=2.312, loss_scale=4, train_wall=10, gb_free=2.8, wall=57224 2021-06-19 10:32:41 | INFO | train_inner | epoch 002: 2070 / 3002 loss=2.796, ppl=6.95, wps=5848, ups=0.09, wpb=64858, bsz=128, num_updates=5039, lr=9.99677e-05, gnorm=2.379, loss_scale=4, train_wall=11, gb_free=2.8, wall=57235 2021-06-19 10:32:52 | INFO | train_inner | epoch 002: 2071 / 3002 loss=2.718, ppl=6.58, wps=5897.7, ups=0.09, wpb=64915, bsz=128, num_updates=5040, lr=9.99677e-05, gnorm=2.182, loss_scale=4, train_wall=11, gb_free=2.8, wall=57246 2021-06-19 10:33:03 | INFO | train_inner | epoch 002: 2072 / 3002 loss=2.651, ppl=6.28, wps=5941.7, ups=0.09, wpb=64879, bsz=128, num_updates=5041, lr=9.99677e-05, gnorm=2.225, loss_scale=4, train_wall=10, gb_free=2.8, wall=57257 2021-06-19 10:33:14 | INFO | train_inner | epoch 002: 2073 / 3002 loss=2.73, ppl=6.64, wps=5839.7, ups=0.09, wpb=64807, bsz=128, num_updates=5042, lr=9.99677e-05, gnorm=2.563, loss_scale=4, train_wall=11, gb_free=2.8, wall=57268 2021-06-19 10:33:25 | INFO | train_inner | epoch 002: 2074 / 3002 loss=2.726, ppl=6.62, wps=5752.1, ups=0.09, wpb=64793, bsz=128, num_updates=5043, lr=9.99677e-05, gnorm=2.182, loss_scale=4, train_wall=11, gb_free=2.8, wall=57280 2021-06-19 10:33:37 | INFO | train_inner | epoch 002: 2075 / 3002 loss=2.615, ppl=6.13, wps=5806.7, ups=0.09, wpb=64810, bsz=128, num_updates=5044, lr=9.99676e-05, gnorm=2.587, loss_scale=4, train_wall=11, gb_free=2.8, wall=57291 2021-06-19 10:33:48 | INFO | train_inner | epoch 002: 2076 / 3002 loss=2.789, ppl=6.91, wps=5706.6, ups=0.09, wpb=64809, bsz=128, num_updates=5045, lr=9.99676e-05, gnorm=2.229, loss_scale=4, train_wall=11, gb_free=2.8, wall=57302 2021-06-19 10:33:59 | INFO | train_inner | epoch 002: 2077 / 3002 loss=2.786, ppl=6.9, wps=5796.3, ups=0.09, wpb=64745, bsz=128, num_updates=5046, lr=9.99676e-05, gnorm=2.335, loss_scale=4, train_wall=11, gb_free=2.8, wall=57313 2021-06-19 10:34:10 | INFO | train_inner | epoch 002: 2078 / 3002 loss=2.966, ppl=7.81, wps=5808.9, ups=0.09, wpb=64821, bsz=128, num_updates=5047, lr=9.99676e-05, gnorm=2.321, loss_scale=4, train_wall=11, gb_free=2.8, wall=57325 2021-06-19 10:34:21 | INFO | train_inner | epoch 002: 2079 / 3002 loss=2.62, ppl=6.15, wps=5858.2, ups=0.09, wpb=64840, bsz=128, num_updates=5048, lr=9.99676e-05, gnorm=2.272, loss_scale=8, train_wall=11, gb_free=2.8, wall=57336 2021-06-19 10:34:32 | INFO | train_inner | epoch 002: 2080 / 3002 loss=2.667, ppl=6.35, wps=5866, ups=0.09, wpb=64827, bsz=128, num_updates=5049, lr=9.99676e-05, gnorm=2.224, loss_scale=8, train_wall=11, gb_free=2.8, wall=57347 2021-06-19 10:34:44 | INFO | train_inner | epoch 002: 2081 / 3002 loss=2.802, ppl=6.97, wps=5794.4, ups=0.09, wpb=64814, bsz=128, num_updates=5050, lr=9.99676e-05, gnorm=2.283, loss_scale=8, train_wall=11, gb_free=2.8, wall=57358 2021-06-19 10:34:54 | INFO | train_inner | epoch 002: 2082 / 3002 loss=2.703, ppl=6.51, wps=5971, ups=0.09, wpb=64880, bsz=128, num_updates=5051, lr=9.99676e-05, gnorm=2.386, loss_scale=8, train_wall=10, gb_free=2.8, wall=57369 2021-06-19 10:35:06 | INFO | train_inner | epoch 002: 2083 / 3002 loss=2.767, ppl=6.81, wps=5731.5, ups=0.09, wpb=64868, bsz=128, num_updates=5052, lr=9.99676e-05, gnorm=2.317, loss_scale=8, train_wall=11, gb_free=2.8, wall=57380 2021-06-19 10:35:17 | INFO | train_inner | epoch 002: 2084 / 3002 loss=2.669, ppl=6.36, wps=5780.6, ups=0.09, wpb=64818, bsz=128, num_updates=5053, lr=9.99676e-05, gnorm=2.627, loss_scale=8, train_wall=11, gb_free=2.8, wall=57391 2021-06-19 10:35:28 | INFO | train_inner | epoch 002: 2085 / 3002 loss=2.895, ppl=7.44, wps=5952.8, ups=0.09, wpb=64915, bsz=128, num_updates=5054, lr=9.99676e-05, gnorm=2.215, loss_scale=8, train_wall=10, gb_free=2.8, wall=57402 2021-06-19 10:35:39 | INFO | train_inner | epoch 002: 2086 / 3002 loss=2.751, ppl=6.73, wps=5903.4, ups=0.09, wpb=64880, bsz=128, num_updates=5055, lr=9.99676e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=57413 2021-06-19 10:35:50 | INFO | train_inner | epoch 002: 2087 / 3002 loss=2.605, ppl=6.08, wps=5865.1, ups=0.09, wpb=64852, bsz=128, num_updates=5056, lr=9.99675e-05, gnorm=2.329, loss_scale=8, train_wall=11, gb_free=2.8, wall=57424 2021-06-19 10:36:01 | INFO | train_inner | epoch 002: 2088 / 3002 loss=2.74, ppl=6.68, wps=5759.8, ups=0.09, wpb=64817, bsz=128, num_updates=5057, lr=9.99675e-05, gnorm=2.235, loss_scale=8, train_wall=11, gb_free=2.8, wall=57435 2021-06-19 10:36:12 | INFO | train_inner | epoch 002: 2089 / 3002 loss=2.894, ppl=7.43, wps=5852.2, ups=0.09, wpb=64841, bsz=128, num_updates=5058, lr=9.99675e-05, gnorm=2.265, loss_scale=8, train_wall=11, gb_free=2.8, wall=57447 2021-06-19 10:36:23 | INFO | train_inner | epoch 002: 2090 / 3002 loss=2.766, ppl=6.8, wps=5889.4, ups=0.09, wpb=64812, bsz=128, num_updates=5059, lr=9.99675e-05, gnorm=2.171, loss_scale=8, train_wall=11, gb_free=2.8, wall=57458 2021-06-19 10:36:34 | INFO | train_inner | epoch 002: 2091 / 3002 loss=2.889, ppl=7.41, wps=5755, ups=0.09, wpb=64892, bsz=128, num_updates=5060, lr=9.99675e-05, gnorm=2.268, loss_scale=8, train_wall=11, gb_free=2.8, wall=57469 2021-06-19 10:36:46 | INFO | train_inner | epoch 002: 2092 / 3002 loss=2.814, ppl=7.03, wps=5835.6, ups=0.09, wpb=64779, bsz=128, num_updates=5061, lr=9.99675e-05, gnorm=2.228, loss_scale=8, train_wall=11, gb_free=2.8, wall=57480 2021-06-19 10:36:57 | INFO | train_inner | epoch 002: 2093 / 3002 loss=2.704, ppl=6.52, wps=5893.3, ups=0.09, wpb=64769, bsz=128, num_updates=5062, lr=9.99675e-05, gnorm=2.495, loss_scale=8, train_wall=11, gb_free=2.8, wall=57491 2021-06-19 10:37:08 | INFO | train_inner | epoch 002: 2094 / 3002 loss=2.707, ppl=6.53, wps=5817.5, ups=0.09, wpb=64772, bsz=128, num_updates=5063, lr=9.99675e-05, gnorm=2.355, loss_scale=8, train_wall=11, gb_free=2.8, wall=57502 2021-06-19 10:37:19 | INFO | train_inner | epoch 002: 2095 / 3002 loss=2.688, ppl=6.44, wps=5824.9, ups=0.09, wpb=64791, bsz=128, num_updates=5064, lr=9.99675e-05, gnorm=2.469, loss_scale=8, train_wall=11, gb_free=2.8, wall=57513 2021-06-19 10:37:30 | INFO | train_inner | epoch 002: 2096 / 3002 loss=2.648, ppl=6.27, wps=5848.8, ups=0.09, wpb=64892, bsz=128, num_updates=5065, lr=9.99675e-05, gnorm=2.25, loss_scale=8, train_wall=11, gb_free=2.8, wall=57524 2021-06-19 10:37:41 | INFO | train_inner | epoch 002: 2097 / 3002 loss=2.762, ppl=6.78, wps=5835.6, ups=0.09, wpb=64842, bsz=128, num_updates=5066, lr=9.99675e-05, gnorm=2.51, loss_scale=8, train_wall=11, gb_free=2.8, wall=57535 2021-06-19 10:37:52 | INFO | train_inner | epoch 002: 2098 / 3002 loss=2.832, ppl=7.12, wps=5851.7, ups=0.09, wpb=64800, bsz=128, num_updates=5067, lr=9.99675e-05, gnorm=2.353, loss_scale=8, train_wall=11, gb_free=2.8, wall=57546 2021-06-19 10:38:03 | INFO | train_inner | epoch 002: 2099 / 3002 loss=2.744, ppl=6.7, wps=5873.8, ups=0.09, wpb=64846, bsz=128, num_updates=5068, lr=9.99675e-05, gnorm=2.274, loss_scale=8, train_wall=11, gb_free=2.8, wall=57557 2021-06-19 10:38:14 | INFO | train_inner | epoch 002: 2100 / 3002 loss=2.604, ppl=6.08, wps=5819.9, ups=0.09, wpb=64843, bsz=128, num_updates=5069, lr=9.99674e-05, gnorm=2.411, loss_scale=8, train_wall=11, gb_free=2.8, wall=57569 2021-06-19 10:38:25 | INFO | train_inner | epoch 002: 2101 / 3002 loss=2.685, ppl=6.43, wps=5846, ups=0.09, wpb=64845, bsz=128, num_updates=5070, lr=9.99674e-05, gnorm=2.228, loss_scale=8, train_wall=11, gb_free=2.8, wall=57580 2021-06-19 10:38:37 | INFO | train_inner | epoch 002: 2102 / 3002 loss=2.707, ppl=6.53, wps=5805.3, ups=0.09, wpb=64848, bsz=128, num_updates=5071, lr=9.99674e-05, gnorm=2.619, loss_scale=8, train_wall=11, gb_free=2.8, wall=57591 2021-06-19 10:38:48 | INFO | train_inner | epoch 002: 2103 / 3002 loss=2.758, ppl=6.77, wps=5776.9, ups=0.09, wpb=64774, bsz=128, num_updates=5072, lr=9.99674e-05, gnorm=2.315, loss_scale=8, train_wall=11, gb_free=2.8, wall=57602 2021-06-19 10:38:59 | INFO | train_inner | epoch 002: 2104 / 3002 loss=2.881, ppl=7.37, wps=5864.4, ups=0.09, wpb=64881, bsz=128, num_updates=5073, lr=9.99674e-05, gnorm=2.23, loss_scale=8, train_wall=11, gb_free=2.8, wall=57613 2021-06-19 10:39:10 | INFO | train_inner | epoch 002: 2105 / 3002 loss=2.686, ppl=6.44, wps=5771.4, ups=0.09, wpb=64783, bsz=128, num_updates=5074, lr=9.99674e-05, gnorm=2.18, loss_scale=8, train_wall=11, gb_free=2.8, wall=57624 2021-06-19 10:39:21 | INFO | train_inner | epoch 002: 2106 / 3002 loss=2.822, ppl=7.07, wps=5878.5, ups=0.09, wpb=64772, bsz=128, num_updates=5075, lr=9.99674e-05, gnorm=2.248, loss_scale=8, train_wall=11, gb_free=2.8, wall=57635 2021-06-19 10:39:32 | INFO | train_inner | epoch 002: 2107 / 3002 loss=2.869, ppl=7.31, wps=5922.3, ups=0.09, wpb=64813, bsz=128, num_updates=5076, lr=9.99674e-05, gnorm=2.154, loss_scale=8, train_wall=10, gb_free=2.8, wall=57646 2021-06-19 10:39:43 | INFO | train_inner | epoch 002: 2108 / 3002 loss=2.751, ppl=6.73, wps=5827.1, ups=0.09, wpb=64832, bsz=128, num_updates=5077, lr=9.99674e-05, gnorm=5.683, loss_scale=8, train_wall=11, gb_free=2.8, wall=57657 2021-06-19 10:39:54 | INFO | train_inner | epoch 002: 2109 / 3002 loss=2.777, ppl=6.85, wps=5793.1, ups=0.09, wpb=64751, bsz=128, num_updates=5078, lr=9.99674e-05, gnorm=2.202, loss_scale=8, train_wall=11, gb_free=2.8, wall=57669 2021-06-19 10:40:05 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-19 10:40:17 | INFO | train_inner | epoch 002: 2111 / 3002 loss=2.678, ppl=6.4, wps=2924.4, ups=0.05, wpb=64853, bsz=128, num_updates=5079, lr=9.99674e-05, gnorm=2.155, loss_scale=4, train_wall=21, gb_free=2.8, wall=57691 2021-06-19 10:40:28 | INFO | train_inner | epoch 002: 2112 / 3002 loss=2.741, ppl=6.69, wps=5774.1, ups=0.09, wpb=64821, bsz=128, num_updates=5080, lr=9.99674e-05, gnorm=2.309, loss_scale=4, train_wall=11, gb_free=2.8, wall=57702 2021-06-19 10:40:39 | INFO | train_inner | epoch 002: 2113 / 3002 loss=2.645, ppl=6.26, wps=5888, ups=0.09, wpb=64773, bsz=128, num_updates=5081, lr=9.99673e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=57713 2021-06-19 10:40:50 | INFO | train_inner | epoch 002: 2114 / 3002 loss=2.485, ppl=5.6, wps=5860.5, ups=0.09, wpb=64900, bsz=128, num_updates=5082, lr=9.99673e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=57724 2021-06-19 10:41:01 | INFO | train_inner | epoch 002: 2115 / 3002 loss=2.919, ppl=7.56, wps=5933.6, ups=0.09, wpb=64812, bsz=128, num_updates=5083, lr=9.99673e-05, gnorm=2.261, loss_scale=4, train_wall=10, gb_free=2.8, wall=57735 2021-06-19 10:41:12 | INFO | train_inner | epoch 002: 2116 / 3002 loss=2.731, ppl=6.64, wps=5858.7, ups=0.09, wpb=64815, bsz=128, num_updates=5084, lr=9.99673e-05, gnorm=2.24, loss_scale=4, train_wall=11, gb_free=2.8, wall=57746 2021-06-19 10:41:23 | INFO | train_inner | epoch 002: 2117 / 3002 loss=2.693, ppl=6.47, wps=5844, ups=0.09, wpb=64798, bsz=128, num_updates=5085, lr=9.99673e-05, gnorm=2.159, loss_scale=4, train_wall=11, gb_free=2.8, wall=57757 2021-06-19 10:41:34 | INFO | train_inner | epoch 002: 2118 / 3002 loss=2.711, ppl=6.55, wps=6017.1, ups=0.09, wpb=64905, bsz=128, num_updates=5086, lr=9.99673e-05, gnorm=2.176, loss_scale=4, train_wall=10, gb_free=2.8, wall=57768 2021-06-19 10:41:45 | INFO | train_inner | epoch 002: 2119 / 3002 loss=2.647, ppl=6.26, wps=5842.5, ups=0.09, wpb=64869, bsz=128, num_updates=5087, lr=9.99673e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=57779 2021-06-19 10:41:56 | INFO | train_inner | epoch 002: 2120 / 3002 loss=2.79, ppl=6.92, wps=5899.4, ups=0.09, wpb=64853, bsz=128, num_updates=5088, lr=9.99673e-05, gnorm=2.219, loss_scale=4, train_wall=11, gb_free=2.8, wall=57790 2021-06-19 10:42:07 | INFO | train_inner | epoch 002: 2121 / 3002 loss=2.853, ppl=7.22, wps=5944, ups=0.09, wpb=64805, bsz=128, num_updates=5089, lr=9.99673e-05, gnorm=2.269, loss_scale=4, train_wall=10, gb_free=2.8, wall=57801 2021-06-19 10:42:18 | INFO | train_inner | epoch 002: 2122 / 3002 loss=2.865, ppl=7.29, wps=5887.2, ups=0.09, wpb=64860, bsz=128, num_updates=5090, lr=9.99673e-05, gnorm=2.133, loss_scale=4, train_wall=11, gb_free=2.8, wall=57812 2021-06-19 10:42:29 | INFO | train_inner | epoch 002: 2123 / 3002 loss=2.791, ppl=6.92, wps=5941, ups=0.09, wpb=64792, bsz=128, num_updates=5091, lr=9.99673e-05, gnorm=2.226, loss_scale=4, train_wall=10, gb_free=2.8, wall=57823 2021-06-19 10:42:40 | INFO | train_inner | epoch 002: 2124 / 3002 loss=2.701, ppl=6.5, wps=5793, ups=0.09, wpb=64806, bsz=128, num_updates=5092, lr=9.99673e-05, gnorm=2.647, loss_scale=4, train_wall=11, gb_free=2.8, wall=57834 2021-06-19 10:42:51 | INFO | train_inner | epoch 002: 2125 / 3002 loss=2.739, ppl=6.68, wps=5771.3, ups=0.09, wpb=64817, bsz=128, num_updates=5093, lr=9.99673e-05, gnorm=2.302, loss_scale=4, train_wall=11, gb_free=2.8, wall=57845 2021-06-19 10:43:02 | INFO | train_inner | epoch 002: 2126 / 3002 loss=2.716, ppl=6.57, wps=5836.6, ups=0.09, wpb=64820, bsz=128, num_updates=5094, lr=9.99672e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=57856 2021-06-19 10:43:13 | INFO | train_inner | epoch 002: 2127 / 3002 loss=2.673, ppl=6.38, wps=5761.2, ups=0.09, wpb=64718, bsz=128, num_updates=5095, lr=9.99672e-05, gnorm=2.109, loss_scale=4, train_wall=11, gb_free=2.8, wall=57868 2021-06-19 10:43:24 | INFO | train_inner | epoch 002: 2128 / 3002 loss=2.784, ppl=6.89, wps=5838.3, ups=0.09, wpb=64788, bsz=128, num_updates=5096, lr=9.99672e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=57879 2021-06-19 10:43:35 | INFO | train_inner | epoch 002: 2129 / 3002 loss=2.69, ppl=6.45, wps=5937.5, ups=0.09, wpb=64892, bsz=128, num_updates=5097, lr=9.99672e-05, gnorm=2.183, loss_scale=4, train_wall=10, gb_free=2.8, wall=57890 2021-06-19 10:43:46 | INFO | train_inner | epoch 002: 2130 / 3002 loss=2.711, ppl=6.55, wps=5835.5, ups=0.09, wpb=64832, bsz=128, num_updates=5098, lr=9.99672e-05, gnorm=2.229, loss_scale=4, train_wall=11, gb_free=2.8, wall=57901 2021-06-19 10:43:58 | INFO | train_inner | epoch 002: 2131 / 3002 loss=2.606, ppl=6.09, wps=5866.4, ups=0.09, wpb=64864, bsz=128, num_updates=5099, lr=9.99672e-05, gnorm=2.533, loss_scale=4, train_wall=11, gb_free=2.8, wall=57912 2021-06-19 10:44:09 | INFO | train_inner | epoch 002: 2132 / 3002 loss=2.694, ppl=6.47, wps=5736, ups=0.09, wpb=64832, bsz=128, num_updates=5100, lr=9.99672e-05, gnorm=2.269, loss_scale=4, train_wall=11, gb_free=2.8, wall=57923 2021-06-19 10:44:20 | INFO | train_inner | epoch 002: 2133 / 3002 loss=2.635, ppl=6.21, wps=5859.6, ups=0.09, wpb=64831, bsz=128, num_updates=5101, lr=9.99672e-05, gnorm=2.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=57934 2021-06-19 10:44:31 | INFO | train_inner | epoch 002: 2134 / 3002 loss=2.835, ppl=7.13, wps=5744.2, ups=0.09, wpb=64852, bsz=128, num_updates=5102, lr=9.99672e-05, gnorm=2.494, loss_scale=4, train_wall=11, gb_free=2.8, wall=57946 2021-06-19 10:44:42 | INFO | train_inner | epoch 002: 2135 / 3002 loss=2.67, ppl=6.37, wps=5915.6, ups=0.09, wpb=64807, bsz=128, num_updates=5103, lr=9.99672e-05, gnorm=2.282, loss_scale=4, train_wall=11, gb_free=2.8, wall=57956 2021-06-19 10:44:53 | INFO | train_inner | epoch 002: 2136 / 3002 loss=2.774, ppl=6.84, wps=5861.1, ups=0.09, wpb=64724, bsz=128, num_updates=5104, lr=9.99672e-05, gnorm=2.408, loss_scale=4, train_wall=11, gb_free=2.8, wall=57968 2021-06-19 10:45:04 | INFO | train_inner | epoch 002: 2137 / 3002 loss=2.74, ppl=6.68, wps=5848.9, ups=0.09, wpb=64819, bsz=128, num_updates=5105, lr=9.99672e-05, gnorm=2.133, loss_scale=4, train_wall=11, gb_free=2.8, wall=57979 2021-06-19 10:45:15 | INFO | train_inner | epoch 002: 2138 / 3002 loss=2.585, ppl=6, wps=5824.7, ups=0.09, wpb=64777, bsz=128, num_updates=5106, lr=9.99671e-05, gnorm=2.231, loss_scale=4, train_wall=11, gb_free=2.8, wall=57990 2021-06-19 10:45:27 | INFO | train_inner | epoch 002: 2139 / 3002 loss=2.633, ppl=6.2, wps=5830.3, ups=0.09, wpb=64806, bsz=128, num_updates=5107, lr=9.99671e-05, gnorm=2.209, loss_scale=4, train_wall=11, gb_free=2.8, wall=58001 2021-06-19 10:45:38 | INFO | train_inner | epoch 002: 2140 / 3002 loss=2.632, ppl=6.2, wps=5837.4, ups=0.09, wpb=64881, bsz=128, num_updates=5108, lr=9.99671e-05, gnorm=2.115, loss_scale=4, train_wall=11, gb_free=2.8, wall=58012 2021-06-19 10:45:49 | INFO | train_inner | epoch 002: 2141 / 3002 loss=2.633, ppl=6.2, wps=5759.5, ups=0.09, wpb=64877, bsz=128, num_updates=5109, lr=9.99671e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=58023 2021-06-19 10:46:00 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-19 10:46:11 | INFO | train_inner | epoch 002: 2143 / 3002 loss=2.727, ppl=6.62, wps=2942.4, ups=0.05, wpb=64778, bsz=128, num_updates=5110, lr=9.99671e-05, gnorm=2.269, loss_scale=2, train_wall=21, gb_free=2.8, wall=58045 2021-06-19 10:46:22 | INFO | train_inner | epoch 002: 2144 / 3002 loss=2.702, ppl=6.51, wps=5821.1, ups=0.09, wpb=64858, bsz=128, num_updates=5111, lr=9.99671e-05, gnorm=2.221, loss_scale=2, train_wall=11, gb_free=2.8, wall=58056 2021-06-19 10:46:33 | INFO | train_inner | epoch 002: 2145 / 3002 loss=2.839, ppl=7.16, wps=5893.5, ups=0.09, wpb=64813, bsz=128, num_updates=5112, lr=9.99671e-05, gnorm=2.239, loss_scale=2, train_wall=11, gb_free=2.8, wall=58067 2021-06-19 10:46:44 | INFO | train_inner | epoch 002: 2146 / 3002 loss=2.674, ppl=6.38, wps=5936.9, ups=0.09, wpb=64831, bsz=128, num_updates=5113, lr=9.99671e-05, gnorm=2.25, loss_scale=2, train_wall=10, gb_free=2.8, wall=58078 2021-06-19 10:46:55 | INFO | train_inner | epoch 002: 2147 / 3002 loss=2.726, ppl=6.62, wps=5994.8, ups=0.09, wpb=64859, bsz=128, num_updates=5114, lr=9.99671e-05, gnorm=2.19, loss_scale=2, train_wall=10, gb_free=2.8, wall=58089 2021-06-19 10:47:06 | INFO | train_inner | epoch 002: 2148 / 3002 loss=2.731, ppl=6.64, wps=5856.7, ups=0.09, wpb=64781, bsz=128, num_updates=5115, lr=9.99671e-05, gnorm=2.284, loss_scale=2, train_wall=11, gb_free=2.8, wall=58100 2021-06-19 10:47:17 | INFO | train_inner | epoch 002: 2149 / 3002 loss=2.744, ppl=6.7, wps=5947.5, ups=0.09, wpb=64932, bsz=128, num_updates=5116, lr=9.99671e-05, gnorm=2.302, loss_scale=2, train_wall=10, gb_free=2.8, wall=58111 2021-06-19 10:47:28 | INFO | train_inner | epoch 002: 2150 / 3002 loss=2.572, ppl=5.95, wps=5850.6, ups=0.09, wpb=64789, bsz=128, num_updates=5117, lr=9.99671e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=58122 2021-06-19 10:47:39 | INFO | train_inner | epoch 002: 2151 / 3002 loss=2.596, ppl=6.05, wps=5940.7, ups=0.09, wpb=64859, bsz=128, num_updates=5118, lr=9.99671e-05, gnorm=2.331, loss_scale=2, train_wall=10, gb_free=2.8, wall=58133 2021-06-19 10:47:50 | INFO | train_inner | epoch 002: 2152 / 3002 loss=2.81, ppl=7.01, wps=5905.9, ups=0.09, wpb=64822, bsz=128, num_updates=5119, lr=9.9967e-05, gnorm=2.34, loss_scale=2, train_wall=11, gb_free=2.8, wall=58144 2021-06-19 10:48:01 | INFO | train_inner | epoch 002: 2153 / 3002 loss=2.764, ppl=6.79, wps=5740.4, ups=0.09, wpb=64819, bsz=128, num_updates=5120, lr=9.9967e-05, gnorm=2.251, loss_scale=2, train_wall=11, gb_free=2.8, wall=58155 2021-06-19 10:48:12 | INFO | train_inner | epoch 002: 2154 / 3002 loss=2.708, ppl=6.53, wps=5853.4, ups=0.09, wpb=64896, bsz=128, num_updates=5121, lr=9.9967e-05, gnorm=2.402, loss_scale=2, train_wall=11, gb_free=2.8, wall=58166 2021-06-19 10:48:23 | INFO | train_inner | epoch 002: 2155 / 3002 loss=2.689, ppl=6.45, wps=5738.8, ups=0.09, wpb=64808, bsz=128, num_updates=5122, lr=9.9967e-05, gnorm=2.225, loss_scale=2, train_wall=11, gb_free=2.8, wall=58178 2021-06-19 10:48:35 | INFO | train_inner | epoch 002: 2156 / 3002 loss=2.836, ppl=7.14, wps=5806.2, ups=0.09, wpb=64828, bsz=128, num_updates=5123, lr=9.9967e-05, gnorm=2.234, loss_scale=2, train_wall=11, gb_free=2.8, wall=58189 2021-06-19 10:48:46 | INFO | train_inner | epoch 002: 2157 / 3002 loss=2.819, ppl=7.05, wps=5937.1, ups=0.09, wpb=64859, bsz=128, num_updates=5124, lr=9.9967e-05, gnorm=2.207, loss_scale=2, train_wall=10, gb_free=2.8, wall=58200 2021-06-19 10:48:57 | INFO | train_inner | epoch 002: 2158 / 3002 loss=2.654, ppl=6.29, wps=5833.9, ups=0.09, wpb=65003, bsz=128, num_updates=5125, lr=9.9967e-05, gnorm=2.228, loss_scale=2, train_wall=11, gb_free=2.8, wall=58211 2021-06-19 10:49:08 | INFO | train_inner | epoch 002: 2159 / 3002 loss=2.702, ppl=6.51, wps=5836.3, ups=0.09, wpb=64922, bsz=128, num_updates=5126, lr=9.9967e-05, gnorm=2.241, loss_scale=2, train_wall=11, gb_free=2.8, wall=58222 2021-06-19 10:49:19 | INFO | train_inner | epoch 002: 2160 / 3002 loss=2.705, ppl=6.52, wps=5876.2, ups=0.09, wpb=64884, bsz=128, num_updates=5127, lr=9.9967e-05, gnorm=2.287, loss_scale=2, train_wall=11, gb_free=2.8, wall=58233 2021-06-19 10:49:30 | INFO | train_inner | epoch 002: 2161 / 3002 loss=2.799, ppl=6.96, wps=5817.8, ups=0.09, wpb=64742, bsz=128, num_updates=5128, lr=9.9967e-05, gnorm=2.277, loss_scale=2, train_wall=11, gb_free=2.8, wall=58244 2021-06-19 10:49:41 | INFO | train_inner | epoch 002: 2162 / 3002 loss=2.707, ppl=6.53, wps=5843.9, ups=0.09, wpb=64793, bsz=128, num_updates=5129, lr=9.9967e-05, gnorm=2.201, loss_scale=2, train_wall=11, gb_free=2.8, wall=58255 2021-06-19 10:49:52 | INFO | train_inner | epoch 002: 2163 / 3002 loss=2.903, ppl=7.48, wps=5920.8, ups=0.09, wpb=64814, bsz=128, num_updates=5130, lr=9.9967e-05, gnorm=2.254, loss_scale=2, train_wall=10, gb_free=2.8, wall=58266 2021-06-19 10:50:03 | INFO | train_inner | epoch 002: 2164 / 3002 loss=2.752, ppl=6.74, wps=5995, ups=0.09, wpb=64805, bsz=128, num_updates=5131, lr=9.99669e-05, gnorm=2.605, loss_scale=2, train_wall=10, gb_free=2.8, wall=58277 2021-06-19 10:50:14 | INFO | train_inner | epoch 002: 2165 / 3002 loss=2.738, ppl=6.67, wps=5851.8, ups=0.09, wpb=64890, bsz=128, num_updates=5132, lr=9.99669e-05, gnorm=2.352, loss_scale=2, train_wall=11, gb_free=2.8, wall=58288 2021-06-19 10:50:25 | INFO | train_inner | epoch 002: 2166 / 3002 loss=2.752, ppl=6.73, wps=5857, ups=0.09, wpb=64814, bsz=128, num_updates=5133, lr=9.99669e-05, gnorm=2.561, loss_scale=2, train_wall=11, gb_free=2.8, wall=58299 2021-06-19 10:50:36 | INFO | train_inner | epoch 002: 2167 / 3002 loss=2.781, ppl=6.88, wps=5862.2, ups=0.09, wpb=64858, bsz=128, num_updates=5134, lr=9.99669e-05, gnorm=2.25, loss_scale=2, train_wall=11, gb_free=2.8, wall=58310 2021-06-19 10:50:47 | INFO | train_inner | epoch 002: 2168 / 3002 loss=2.556, ppl=5.88, wps=5800, ups=0.09, wpb=64872, bsz=128, num_updates=5135, lr=9.99669e-05, gnorm=2.202, loss_scale=2, train_wall=11, gb_free=2.8, wall=58322 2021-06-19 10:50:58 | INFO | train_inner | epoch 002: 2169 / 3002 loss=2.738, ppl=6.67, wps=5867, ups=0.09, wpb=64857, bsz=128, num_updates=5136, lr=9.99669e-05, gnorm=2.551, loss_scale=2, train_wall=11, gb_free=2.8, wall=58333 2021-06-19 10:51:09 | INFO | train_inner | epoch 002: 2170 / 3002 loss=2.812, ppl=7.02, wps=5867, ups=0.09, wpb=64719, bsz=128, num_updates=5137, lr=9.99669e-05, gnorm=2.341, loss_scale=2, train_wall=11, gb_free=2.8, wall=58344 2021-06-19 10:51:20 | INFO | train_inner | epoch 002: 2171 / 3002 loss=2.705, ppl=6.52, wps=5796.7, ups=0.09, wpb=64859, bsz=128, num_updates=5138, lr=9.99669e-05, gnorm=2.889, loss_scale=2, train_wall=11, gb_free=2.8, wall=58355 2021-06-19 10:51:32 | INFO | train_inner | epoch 002: 2172 / 3002 loss=2.602, ppl=6.07, wps=5837.8, ups=0.09, wpb=64915, bsz=128, num_updates=5139, lr=9.99669e-05, gnorm=2.2, loss_scale=2, train_wall=11, gb_free=2.8, wall=58366 2021-06-19 10:51:43 | INFO | train_inner | epoch 002: 2173 / 3002 loss=2.741, ppl=6.69, wps=5802.3, ups=0.09, wpb=64854, bsz=128, num_updates=5140, lr=9.99669e-05, gnorm=5.739, loss_scale=2, train_wall=11, gb_free=2.8, wall=58377 2021-06-19 10:51:54 | INFO | train_inner | epoch 002: 2174 / 3002 loss=2.679, ppl=6.4, wps=5844, ups=0.09, wpb=64869, bsz=128, num_updates=5141, lr=9.99669e-05, gnorm=2.26, loss_scale=2, train_wall=11, gb_free=2.8, wall=58388 2021-06-19 10:52:05 | INFO | train_inner | epoch 002: 2175 / 3002 loss=2.796, ppl=6.95, wps=5924.6, ups=0.09, wpb=64900, bsz=128, num_updates=5142, lr=9.99669e-05, gnorm=2.228, loss_scale=2, train_wall=10, gb_free=2.8, wall=58399 2021-06-19 10:52:16 | INFO | train_inner | epoch 002: 2176 / 3002 loss=2.835, ppl=7.13, wps=5907.8, ups=0.09, wpb=64902, bsz=128, num_updates=5143, lr=9.99669e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=58410 2021-06-19 10:52:27 | INFO | train_inner | epoch 002: 2177 / 3002 loss=2.69, ppl=6.45, wps=5968.7, ups=0.09, wpb=64865, bsz=128, num_updates=5144, lr=9.99668e-05, gnorm=3.029, loss_scale=2, train_wall=10, gb_free=2.8, wall=58421 2021-06-19 10:52:38 | INFO | train_inner | epoch 002: 2178 / 3002 loss=2.605, ppl=6.08, wps=5761.2, ups=0.09, wpb=64761, bsz=128, num_updates=5145, lr=9.99668e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=58432 2021-06-19 10:52:49 | INFO | train_inner | epoch 002: 2179 / 3002 loss=2.794, ppl=6.94, wps=5785.1, ups=0.09, wpb=64824, bsz=128, num_updates=5146, lr=9.99668e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=58443 2021-06-19 10:53:00 | INFO | train_inner | epoch 002: 2180 / 3002 loss=2.782, ppl=6.88, wps=5880.7, ups=0.09, wpb=64897, bsz=128, num_updates=5147, lr=9.99668e-05, gnorm=2.174, loss_scale=2, train_wall=11, gb_free=2.8, wall=58454 2021-06-19 10:53:11 | INFO | train_inner | epoch 002: 2181 / 3002 loss=2.627, ppl=6.18, wps=5852.8, ups=0.09, wpb=64822, bsz=128, num_updates=5148, lr=9.99668e-05, gnorm=2.251, loss_scale=2, train_wall=11, gb_free=2.8, wall=58466 2021-06-19 10:53:22 | INFO | train_inner | epoch 002: 2182 / 3002 loss=2.722, ppl=6.6, wps=5887.9, ups=0.09, wpb=64776, bsz=128, num_updates=5149, lr=9.99668e-05, gnorm=2.201, loss_scale=2, train_wall=11, gb_free=2.8, wall=58477 2021-06-19 10:53:33 | INFO | train_inner | epoch 002: 2183 / 3002 loss=2.805, ppl=6.99, wps=5801.7, ups=0.09, wpb=64830, bsz=128, num_updates=5150, lr=9.99668e-05, gnorm=2.367, loss_scale=2, train_wall=11, gb_free=2.8, wall=58488 2021-06-19 10:53:45 | INFO | train_inner | epoch 002: 2184 / 3002 loss=2.61, ppl=6.11, wps=5783, ups=0.09, wpb=64808, bsz=128, num_updates=5151, lr=9.99668e-05, gnorm=2.521, loss_scale=2, train_wall=11, gb_free=2.8, wall=58499 2021-06-19 10:53:56 | INFO | train_inner | epoch 002: 2185 / 3002 loss=2.716, ppl=6.57, wps=5807, ups=0.09, wpb=64852, bsz=128, num_updates=5152, lr=9.99668e-05, gnorm=2.265, loss_scale=2, train_wall=11, gb_free=2.8, wall=58510 2021-06-19 10:54:07 | INFO | train_inner | epoch 002: 2186 / 3002 loss=2.785, ppl=6.89, wps=5837.2, ups=0.09, wpb=64802, bsz=128, num_updates=5153, lr=9.99668e-05, gnorm=3.566, loss_scale=2, train_wall=11, gb_free=2.8, wall=58521 2021-06-19 10:54:18 | INFO | train_inner | epoch 002: 2187 / 3002 loss=2.748, ppl=6.72, wps=5747.1, ups=0.09, wpb=64816, bsz=128, num_updates=5154, lr=9.99668e-05, gnorm=2.197, loss_scale=2, train_wall=11, gb_free=2.8, wall=58533 2021-06-19 10:54:29 | INFO | train_inner | epoch 002: 2188 / 3002 loss=2.745, ppl=6.7, wps=5855.3, ups=0.09, wpb=64815, bsz=128, num_updates=5155, lr=9.99668e-05, gnorm=2.253, loss_scale=2, train_wall=11, gb_free=2.8, wall=58544 2021-06-19 10:54:40 | INFO | train_inner | epoch 002: 2189 / 3002 loss=2.795, ppl=6.94, wps=5894.7, ups=0.09, wpb=64832, bsz=128, num_updates=5156, lr=9.99667e-05, gnorm=2.328, loss_scale=2, train_wall=11, gb_free=2.8, wall=58555 2021-06-19 10:54:51 | INFO | train_inner | epoch 002: 2190 / 3002 loss=2.876, ppl=7.34, wps=5786.1, ups=0.09, wpb=64912, bsz=128, num_updates=5157, lr=9.99667e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=58566 2021-06-19 10:55:02 | INFO | train_inner | epoch 002: 2191 / 3002 loss=2.657, ppl=6.31, wps=5954.7, ups=0.09, wpb=64846, bsz=128, num_updates=5158, lr=9.99667e-05, gnorm=2.165, loss_scale=2, train_wall=10, gb_free=2.8, wall=58577 2021-06-19 10:55:13 | INFO | train_inner | epoch 002: 2192 / 3002 loss=2.788, ppl=6.91, wps=5889.6, ups=0.09, wpb=64795, bsz=128, num_updates=5159, lr=9.99667e-05, gnorm=2.285, loss_scale=2, train_wall=11, gb_free=2.8, wall=58588 2021-06-19 10:55:24 | INFO | train_inner | epoch 002: 2193 / 3002 loss=2.565, ppl=5.92, wps=5858.4, ups=0.09, wpb=64787, bsz=128, num_updates=5160, lr=9.99667e-05, gnorm=2.349, loss_scale=2, train_wall=11, gb_free=2.8, wall=58599 2021-06-19 10:55:35 | INFO | train_inner | epoch 002: 2194 / 3002 loss=2.753, ppl=6.74, wps=5862.6, ups=0.09, wpb=64838, bsz=128, num_updates=5161, lr=9.99667e-05, gnorm=7.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=58610 2021-06-19 10:55:47 | INFO | train_inner | epoch 002: 2195 / 3002 loss=2.749, ppl=6.72, wps=5871.9, ups=0.09, wpb=64796, bsz=128, num_updates=5162, lr=9.99667e-05, gnorm=2.287, loss_scale=2, train_wall=11, gb_free=2.8, wall=58621 2021-06-19 10:55:58 | INFO | train_inner | epoch 002: 2196 / 3002 loss=2.737, ppl=6.67, wps=5834.5, ups=0.09, wpb=64861, bsz=128, num_updates=5163, lr=9.99667e-05, gnorm=2.573, loss_scale=2, train_wall=11, gb_free=2.8, wall=58632 2021-06-19 10:56:09 | INFO | train_inner | epoch 002: 2197 / 3002 loss=2.68, ppl=6.41, wps=5787.2, ups=0.09, wpb=64920, bsz=128, num_updates=5164, lr=9.99667e-05, gnorm=2.318, loss_scale=2, train_wall=11, gb_free=2.8, wall=58643 2021-06-19 10:56:20 | INFO | train_inner | epoch 002: 2198 / 3002 loss=2.804, ppl=6.98, wps=5906.8, ups=0.09, wpb=64791, bsz=128, num_updates=5165, lr=9.99667e-05, gnorm=2.404, loss_scale=2, train_wall=11, gb_free=2.8, wall=58654 2021-06-19 10:56:31 | INFO | train_inner | epoch 002: 2199 / 3002 loss=2.769, ppl=6.82, wps=5899.7, ups=0.09, wpb=64732, bsz=128, num_updates=5166, lr=9.99667e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=58665 2021-06-19 10:56:42 | INFO | train_inner | epoch 002: 2200 / 3002 loss=2.812, ppl=7.02, wps=5772.8, ups=0.09, wpb=64793, bsz=128, num_updates=5167, lr=9.99667e-05, gnorm=3.729, loss_scale=2, train_wall=11, gb_free=2.8, wall=58676 2021-06-19 10:56:53 | INFO | train_inner | epoch 002: 2201 / 3002 loss=2.692, ppl=6.46, wps=5921.8, ups=0.09, wpb=64839, bsz=128, num_updates=5168, lr=9.99667e-05, gnorm=2.291, loss_scale=2, train_wall=10, gb_free=2.8, wall=58687 2021-06-19 10:57:04 | INFO | train_inner | epoch 002: 2202 / 3002 loss=2.8, ppl=6.96, wps=5907.3, ups=0.09, wpb=64789, bsz=128, num_updates=5169, lr=9.99666e-05, gnorm=2.519, loss_scale=2, train_wall=10, gb_free=2.8, wall=58698 2021-06-19 10:57:15 | INFO | train_inner | epoch 002: 2203 / 3002 loss=2.798, ppl=6.95, wps=5794.7, ups=0.09, wpb=64809, bsz=128, num_updates=5170, lr=9.99666e-05, gnorm=2.417, loss_scale=2, train_wall=11, gb_free=2.8, wall=58709 2021-06-19 10:57:26 | INFO | train_inner | epoch 002: 2204 / 3002 loss=2.785, ppl=6.89, wps=5857.9, ups=0.09, wpb=64787, bsz=128, num_updates=5171, lr=9.99666e-05, gnorm=2.479, loss_scale=2, train_wall=11, gb_free=2.8, wall=58721 2021-06-19 10:57:37 | INFO | train_inner | epoch 002: 2205 / 3002 loss=2.661, ppl=6.33, wps=5878.5, ups=0.09, wpb=64778, bsz=128, num_updates=5172, lr=9.99666e-05, gnorm=2.479, loss_scale=2, train_wall=11, gb_free=2.8, wall=58732 2021-06-19 10:57:48 | INFO | train_inner | epoch 002: 2206 / 3002 loss=2.774, ppl=6.84, wps=5904.5, ups=0.09, wpb=64888, bsz=128, num_updates=5173, lr=9.99666e-05, gnorm=2.277, loss_scale=2, train_wall=11, gb_free=2.8, wall=58743 2021-06-19 10:57:59 | INFO | train_inner | epoch 002: 2207 / 3002 loss=2.779, ppl=6.86, wps=5815.8, ups=0.09, wpb=64777, bsz=128, num_updates=5174, lr=9.99666e-05, gnorm=2.383, loss_scale=2, train_wall=11, gb_free=2.8, wall=58754 2021-06-19 10:58:11 | INFO | train_inner | epoch 002: 2208 / 3002 loss=2.768, ppl=6.81, wps=5756.9, ups=0.09, wpb=64816, bsz=128, num_updates=5175, lr=9.99666e-05, gnorm=2.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=58765 2021-06-19 10:58:22 | INFO | train_inner | epoch 002: 2209 / 3002 loss=2.661, ppl=6.32, wps=5878.2, ups=0.09, wpb=64790, bsz=128, num_updates=5176, lr=9.99666e-05, gnorm=2.371, loss_scale=2, train_wall=11, gb_free=2.8, wall=58776 2021-06-19 10:58:33 | INFO | train_inner | epoch 002: 2210 / 3002 loss=2.882, ppl=7.37, wps=5776.5, ups=0.09, wpb=64711, bsz=128, num_updates=5177, lr=9.99666e-05, gnorm=2.374, loss_scale=2, train_wall=11, gb_free=2.8, wall=58787 2021-06-19 10:58:44 | INFO | train_inner | epoch 002: 2211 / 3002 loss=2.8, ppl=6.97, wps=5898, ups=0.09, wpb=64857, bsz=128, num_updates=5178, lr=9.99666e-05, gnorm=13.997, loss_scale=2, train_wall=11, gb_free=2.8, wall=58798 2021-06-19 10:58:55 | INFO | train_inner | epoch 002: 2212 / 3002 loss=2.793, ppl=6.93, wps=5811, ups=0.09, wpb=64770, bsz=128, num_updates=5179, lr=9.99666e-05, gnorm=4.25, loss_scale=2, train_wall=11, gb_free=2.8, wall=58809 2021-06-19 10:59:06 | INFO | train_inner | epoch 002: 2213 / 3002 loss=2.817, ppl=7.05, wps=5859.9, ups=0.09, wpb=64874, bsz=128, num_updates=5180, lr=9.99666e-05, gnorm=2.301, loss_scale=2, train_wall=11, gb_free=2.8, wall=58820 2021-06-19 10:59:17 | INFO | train_inner | epoch 002: 2214 / 3002 loss=2.789, ppl=6.91, wps=5884.7, ups=0.09, wpb=64864, bsz=128, num_updates=5181, lr=9.99665e-05, gnorm=2.322, loss_scale=2, train_wall=11, gb_free=2.8, wall=58831 2021-06-19 10:59:28 | INFO | train_inner | epoch 002: 2215 / 3002 loss=2.811, ppl=7.02, wps=5893.5, ups=0.09, wpb=64831, bsz=128, num_updates=5182, lr=9.99665e-05, gnorm=29.735, loss_scale=2, train_wall=11, gb_free=2.8, wall=58842 2021-06-19 10:59:39 | INFO | train_inner | epoch 002: 2216 / 3002 loss=2.724, ppl=6.61, wps=5746.8, ups=0.09, wpb=64750, bsz=128, num_updates=5183, lr=9.99665e-05, gnorm=2.397, loss_scale=2, train_wall=11, gb_free=2.8, wall=58854 2021-06-19 10:59:50 | INFO | train_inner | epoch 002: 2217 / 3002 loss=2.879, ppl=7.36, wps=5821.2, ups=0.09, wpb=64823, bsz=128, num_updates=5184, lr=9.99665e-05, gnorm=2.509, loss_scale=2, train_wall=11, gb_free=2.8, wall=58865 2021-06-19 11:00:02 | INFO | train_inner | epoch 002: 2218 / 3002 loss=2.728, ppl=6.63, wps=5775.7, ups=0.09, wpb=64763, bsz=128, num_updates=5185, lr=9.99665e-05, gnorm=2.389, loss_scale=2, train_wall=11, gb_free=2.8, wall=58876 2021-06-19 11:00:13 | INFO | train_inner | epoch 002: 2219 / 3002 loss=2.687, ppl=6.44, wps=5823.1, ups=0.09, wpb=64737, bsz=128, num_updates=5186, lr=9.99665e-05, gnorm=2.394, loss_scale=2, train_wall=11, gb_free=2.8, wall=58887 2021-06-19 11:00:24 | INFO | train_inner | epoch 002: 2220 / 3002 loss=2.552, ppl=5.86, wps=5853.4, ups=0.09, wpb=64803, bsz=128, num_updates=5187, lr=9.99665e-05, gnorm=2.317, loss_scale=2, train_wall=11, gb_free=2.8, wall=58898 2021-06-19 11:00:35 | INFO | train_inner | epoch 002: 2221 / 3002 loss=2.582, ppl=5.99, wps=5930.9, ups=0.09, wpb=64824, bsz=128, num_updates=5188, lr=9.99665e-05, gnorm=2.411, loss_scale=2, train_wall=10, gb_free=2.8, wall=58909 2021-06-19 11:00:46 | INFO | train_inner | epoch 002: 2222 / 3002 loss=2.897, ppl=7.45, wps=5750.2, ups=0.09, wpb=64788, bsz=128, num_updates=5189, lr=9.99665e-05, gnorm=5.022, loss_scale=2, train_wall=11, gb_free=2.8, wall=58920 2021-06-19 11:00:57 | INFO | train_inner | epoch 002: 2223 / 3002 loss=2.839, ppl=7.15, wps=5871.7, ups=0.09, wpb=64781, bsz=128, num_updates=5190, lr=9.99665e-05, gnorm=2.4, loss_scale=2, train_wall=11, gb_free=2.8, wall=58931 2021-06-19 11:01:08 | INFO | train_inner | epoch 002: 2224 / 3002 loss=2.883, ppl=7.38, wps=5890.7, ups=0.09, wpb=64855, bsz=128, num_updates=5191, lr=9.99665e-05, gnorm=2.439, loss_scale=2, train_wall=11, gb_free=2.8, wall=58942 2021-06-19 11:01:19 | INFO | train_inner | epoch 002: 2225 / 3002 loss=2.807, ppl=7, wps=5875.9, ups=0.09, wpb=64789, bsz=128, num_updates=5192, lr=9.99665e-05, gnorm=2.482, loss_scale=2, train_wall=11, gb_free=2.8, wall=58953 2021-06-19 11:01:30 | INFO | train_inner | epoch 002: 2226 / 3002 loss=2.665, ppl=6.34, wps=5848, ups=0.09, wpb=64732, bsz=128, num_updates=5193, lr=9.99665e-05, gnorm=2.305, loss_scale=2, train_wall=11, gb_free=2.8, wall=58965 2021-06-19 11:01:41 | INFO | train_inner | epoch 002: 2227 / 3002 loss=2.876, ppl=7.34, wps=5846.3, ups=0.09, wpb=64902, bsz=128, num_updates=5194, lr=9.99664e-05, gnorm=2.24, loss_scale=2, train_wall=11, gb_free=2.8, wall=58976 2021-06-19 11:01:52 | INFO | train_inner | epoch 002: 2228 / 3002 loss=2.719, ppl=6.59, wps=5831.1, ups=0.09, wpb=64780, bsz=128, num_updates=5195, lr=9.99664e-05, gnorm=2.982, loss_scale=2, train_wall=11, gb_free=2.8, wall=58987 2021-06-19 11:02:03 | INFO | train_inner | epoch 002: 2229 / 3002 loss=2.882, ppl=7.37, wps=5894.2, ups=0.09, wpb=64879, bsz=128, num_updates=5196, lr=9.99664e-05, gnorm=2.321, loss_scale=2, train_wall=11, gb_free=2.8, wall=58998 2021-06-19 11:02:14 | INFO | train_inner | epoch 002: 2230 / 3002 loss=2.906, ppl=7.49, wps=5930.5, ups=0.09, wpb=64693, bsz=128, num_updates=5197, lr=9.99664e-05, gnorm=2.305, loss_scale=2, train_wall=10, gb_free=2.8, wall=59009 2021-06-19 11:02:25 | INFO | train_inner | epoch 002: 2231 / 3002 loss=2.693, ppl=6.47, wps=5843.4, ups=0.09, wpb=64800, bsz=128, num_updates=5198, lr=9.99664e-05, gnorm=2.509, loss_scale=2, train_wall=11, gb_free=2.8, wall=59020 2021-06-19 11:02:36 | INFO | train_inner | epoch 002: 2232 / 3002 loss=2.786, ppl=6.9, wps=5939.1, ups=0.09, wpb=64860, bsz=128, num_updates=5199, lr=9.99664e-05, gnorm=2.387, loss_scale=2, train_wall=10, gb_free=2.8, wall=59031 2021-06-19 11:02:47 | INFO | train_inner | epoch 002: 2233 / 3002 loss=2.704, ppl=6.52, wps=5795.1, ups=0.09, wpb=64773, bsz=128, num_updates=5200, lr=9.99664e-05, gnorm=6.271, loss_scale=2, train_wall=11, gb_free=2.8, wall=59042 2021-06-19 11:02:59 | INFO | train_inner | epoch 002: 2234 / 3002 loss=2.784, ppl=6.89, wps=5869.4, ups=0.09, wpb=64782, bsz=128, num_updates=5201, lr=9.99664e-05, gnorm=2.265, loss_scale=2, train_wall=11, gb_free=2.8, wall=59053 2021-06-19 11:03:09 | INFO | train_inner | epoch 002: 2235 / 3002 loss=2.602, ppl=6.07, wps=5935.8, ups=0.09, wpb=64813, bsz=128, num_updates=5202, lr=9.99664e-05, gnorm=2.327, loss_scale=2, train_wall=10, gb_free=2.8, wall=59064 2021-06-19 11:03:21 | INFO | train_inner | epoch 002: 2236 / 3002 loss=2.761, ppl=6.78, wps=5814.6, ups=0.09, wpb=64827, bsz=128, num_updates=5203, lr=9.99664e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=59075 2021-06-19 11:03:32 | INFO | train_inner | epoch 002: 2237 / 3002 loss=2.824, ppl=7.08, wps=5890.2, ups=0.09, wpb=64851, bsz=128, num_updates=5204, lr=9.99664e-05, gnorm=2.314, loss_scale=2, train_wall=11, gb_free=2.8, wall=59086 2021-06-19 11:03:43 | INFO | train_inner | epoch 002: 2238 / 3002 loss=2.725, ppl=6.61, wps=5858.4, ups=0.09, wpb=64939, bsz=128, num_updates=5205, lr=9.99664e-05, gnorm=2.369, loss_scale=2, train_wall=11, gb_free=2.8, wall=59097 2021-06-19 11:03:54 | INFO | train_inner | epoch 002: 2239 / 3002 loss=2.823, ppl=7.08, wps=5837.8, ups=0.09, wpb=64869, bsz=128, num_updates=5206, lr=9.99663e-05, gnorm=2.328, loss_scale=2, train_wall=11, gb_free=2.8, wall=59108 2021-06-19 11:04:05 | INFO | train_inner | epoch 002: 2240 / 3002 loss=2.606, ppl=6.09, wps=5759.4, ups=0.09, wpb=64785, bsz=128, num_updates=5207, lr=9.99663e-05, gnorm=2.263, loss_scale=2, train_wall=11, gb_free=2.8, wall=59119 2021-06-19 11:04:16 | INFO | train_inner | epoch 002: 2241 / 3002 loss=2.64, ppl=6.23, wps=5742.7, ups=0.09, wpb=64837, bsz=128, num_updates=5208, lr=9.99663e-05, gnorm=2.197, loss_scale=2, train_wall=11, gb_free=2.8, wall=59131 2021-06-19 11:04:28 | INFO | train_inner | epoch 002: 2242 / 3002 loss=2.833, ppl=7.13, wps=5797, ups=0.09, wpb=64754, bsz=128, num_updates=5209, lr=9.99663e-05, gnorm=2.433, loss_scale=2, train_wall=11, gb_free=2.8, wall=59142 2021-06-19 11:04:39 | INFO | train_inner | epoch 002: 2243 / 3002 loss=2.805, ppl=6.99, wps=5877.6, ups=0.09, wpb=64752, bsz=128, num_updates=5210, lr=9.99663e-05, gnorm=2.298, loss_scale=2, train_wall=11, gb_free=2.8, wall=59153 2021-06-19 11:04:49 | INFO | train_inner | epoch 002: 2244 / 3002 loss=2.883, ppl=7.37, wps=5931.5, ups=0.09, wpb=64852, bsz=128, num_updates=5211, lr=9.99663e-05, gnorm=2.304, loss_scale=2, train_wall=10, gb_free=2.8, wall=59164 2021-06-19 11:05:01 | INFO | train_inner | epoch 002: 2245 / 3002 loss=2.711, ppl=6.55, wps=5797, ups=0.09, wpb=64790, bsz=128, num_updates=5212, lr=9.99663e-05, gnorm=2.317, loss_scale=2, train_wall=11, gb_free=2.8, wall=59175 2021-06-19 11:05:12 | INFO | train_inner | epoch 002: 2246 / 3002 loss=2.852, ppl=7.22, wps=5803.7, ups=0.09, wpb=64770, bsz=128, num_updates=5213, lr=9.99663e-05, gnorm=2.203, loss_scale=2, train_wall=11, gb_free=2.8, wall=59186 2021-06-19 11:05:23 | INFO | train_inner | epoch 002: 2247 / 3002 loss=2.633, ppl=6.2, wps=5717.4, ups=0.09, wpb=64784, bsz=128, num_updates=5214, lr=9.99663e-05, gnorm=14.736, loss_scale=2, train_wall=11, gb_free=2.8, wall=59197 2021-06-19 11:05:34 | INFO | train_inner | epoch 002: 2248 / 3002 loss=2.581, ppl=5.98, wps=5782.6, ups=0.09, wpb=64877, bsz=128, num_updates=5215, lr=9.99663e-05, gnorm=2.326, loss_scale=2, train_wall=11, gb_free=2.8, wall=59209 2021-06-19 11:05:46 | INFO | train_inner | epoch 002: 2249 / 3002 loss=2.758, ppl=6.76, wps=5774.9, ups=0.09, wpb=64809, bsz=128, num_updates=5216, lr=9.99663e-05, gnorm=2.262, loss_scale=2, train_wall=11, gb_free=2.8, wall=59220 2021-06-19 11:05:57 | INFO | train_inner | epoch 002: 2250 / 3002 loss=2.686, ppl=6.43, wps=5832.1, ups=0.09, wpb=64864, bsz=128, num_updates=5217, lr=9.99663e-05, gnorm=3.166, loss_scale=2, train_wall=11, gb_free=2.8, wall=59231 2021-06-19 11:06:08 | INFO | train_inner | epoch 002: 2251 / 3002 loss=2.812, ppl=7.02, wps=5769.2, ups=0.09, wpb=64798, bsz=128, num_updates=5218, lr=9.99663e-05, gnorm=2.312, loss_scale=2, train_wall=11, gb_free=2.8, wall=59242 2021-06-19 11:06:19 | INFO | train_inner | epoch 002: 2252 / 3002 loss=2.714, ppl=6.56, wps=5903.3, ups=0.09, wpb=64906, bsz=128, num_updates=5219, lr=9.99662e-05, gnorm=2.454, loss_scale=2, train_wall=11, gb_free=2.8, wall=59253 2021-06-19 11:06:30 | INFO | train_inner | epoch 002: 2253 / 3002 loss=2.811, ppl=7.02, wps=5720.7, ups=0.09, wpb=64864, bsz=128, num_updates=5220, lr=9.99662e-05, gnorm=2.349, loss_scale=2, train_wall=11, gb_free=2.8, wall=59265 2021-06-19 11:06:41 | INFO | train_inner | epoch 002: 2254 / 3002 loss=2.682, ppl=6.42, wps=5777, ups=0.09, wpb=64816, bsz=128, num_updates=5221, lr=9.99662e-05, gnorm=2.289, loss_scale=2, train_wall=11, gb_free=2.8, wall=59276 2021-06-19 11:06:53 | INFO | train_inner | epoch 002: 2255 / 3002 loss=2.781, ppl=6.87, wps=5866.2, ups=0.09, wpb=64803, bsz=128, num_updates=5222, lr=9.99662e-05, gnorm=2.285, loss_scale=2, train_wall=11, gb_free=2.8, wall=59287 2021-06-19 11:07:03 | INFO | train_inner | epoch 002: 2256 / 3002 loss=2.839, ppl=7.15, wps=5923.9, ups=0.09, wpb=64837, bsz=128, num_updates=5223, lr=9.99662e-05, gnorm=2.257, loss_scale=2, train_wall=10, gb_free=2.8, wall=59298 2021-06-19 11:07:15 | INFO | train_inner | epoch 002: 2257 / 3002 loss=2.931, ppl=7.63, wps=5870.5, ups=0.09, wpb=64781, bsz=128, num_updates=5224, lr=9.99662e-05, gnorm=2.426, loss_scale=2, train_wall=11, gb_free=2.8, wall=59309 2021-06-19 11:07:25 | INFO | train_inner | epoch 002: 2258 / 3002 loss=2.913, ppl=7.53, wps=5913.1, ups=0.09, wpb=64812, bsz=128, num_updates=5225, lr=9.99662e-05, gnorm=2.399, loss_scale=2, train_wall=10, gb_free=2.8, wall=59320 2021-06-19 11:07:36 | INFO | train_inner | epoch 002: 2259 / 3002 loss=2.782, ppl=6.88, wps=5988.4, ups=0.09, wpb=64769, bsz=128, num_updates=5226, lr=9.99662e-05, gnorm=2.195, loss_scale=2, train_wall=10, gb_free=2.8, wall=59331 2021-06-19 11:07:47 | INFO | train_inner | epoch 002: 2260 / 3002 loss=2.986, ppl=7.92, wps=5906.5, ups=0.09, wpb=64832, bsz=128, num_updates=5227, lr=9.99662e-05, gnorm=2.207, loss_scale=2, train_wall=11, gb_free=2.8, wall=59342 2021-06-19 11:07:58 | INFO | train_inner | epoch 002: 2261 / 3002 loss=2.763, ppl=6.79, wps=5815.2, ups=0.09, wpb=64878, bsz=128, num_updates=5228, lr=9.99662e-05, gnorm=2.219, loss_scale=2, train_wall=11, gb_free=2.8, wall=59353 2021-06-19 11:08:09 | INFO | train_inner | epoch 002: 2262 / 3002 loss=2.706, ppl=6.53, wps=5898.3, ups=0.09, wpb=64827, bsz=128, num_updates=5229, lr=9.99662e-05, gnorm=2.338, loss_scale=2, train_wall=11, gb_free=2.8, wall=59364 2021-06-19 11:08:20 | INFO | train_inner | epoch 002: 2263 / 3002 loss=2.802, ppl=6.97, wps=5990.7, ups=0.09, wpb=64820, bsz=128, num_updates=5230, lr=9.99662e-05, gnorm=2.258, loss_scale=2, train_wall=10, gb_free=2.8, wall=59375 2021-06-19 11:08:31 | INFO | train_inner | epoch 002: 2264 / 3002 loss=2.731, ppl=6.64, wps=5903.7, ups=0.09, wpb=64912, bsz=128, num_updates=5231, lr=9.99661e-05, gnorm=2.325, loss_scale=2, train_wall=11, gb_free=2.8, wall=59386 2021-06-19 11:08:42 | INFO | train_inner | epoch 002: 2265 / 3002 loss=2.654, ppl=6.29, wps=5882.2, ups=0.09, wpb=64820, bsz=128, num_updates=5232, lr=9.99661e-05, gnorm=2.238, loss_scale=2, train_wall=11, gb_free=2.8, wall=59397 2021-06-19 11:08:53 | INFO | train_inner | epoch 002: 2266 / 3002 loss=2.803, ppl=6.98, wps=6079.6, ups=0.09, wpb=64887, bsz=128, num_updates=5233, lr=9.99661e-05, gnorm=2.242, loss_scale=2, train_wall=10, gb_free=2.8, wall=59407 2021-06-19 11:09:04 | INFO | train_inner | epoch 002: 2267 / 3002 loss=2.766, ppl=6.8, wps=5844.4, ups=0.09, wpb=64913, bsz=128, num_updates=5234, lr=9.99661e-05, gnorm=2.294, loss_scale=2, train_wall=11, gb_free=2.8, wall=59418 2021-06-19 11:09:15 | INFO | train_inner | epoch 002: 2268 / 3002 loss=2.804, ppl=6.98, wps=5868.2, ups=0.09, wpb=64788, bsz=128, num_updates=5235, lr=9.99661e-05, gnorm=2.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=59429 2021-06-19 11:09:26 | INFO | train_inner | epoch 002: 2269 / 3002 loss=2.781, ppl=6.88, wps=5790.7, ups=0.09, wpb=64787, bsz=128, num_updates=5236, lr=9.99661e-05, gnorm=2.183, loss_scale=2, train_wall=11, gb_free=2.8, wall=59441 2021-06-19 11:09:38 | INFO | train_inner | epoch 002: 2270 / 3002 loss=2.673, ppl=6.38, wps=5718.3, ups=0.09, wpb=64798, bsz=128, num_updates=5237, lr=9.99661e-05, gnorm=2.255, loss_scale=4, train_wall=11, gb_free=2.8, wall=59452 2021-06-19 11:09:49 | INFO | train_inner | epoch 002: 2271 / 3002 loss=2.76, ppl=6.77, wps=5777.3, ups=0.09, wpb=64789, bsz=128, num_updates=5238, lr=9.99661e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=59463 2021-06-19 11:10:00 | INFO | train_inner | epoch 002: 2272 / 3002 loss=2.631, ppl=6.19, wps=5807.8, ups=0.09, wpb=64865, bsz=128, num_updates=5239, lr=9.99661e-05, gnorm=2.082, loss_scale=4, train_wall=11, gb_free=2.8, wall=59474 2021-06-19 11:10:11 | INFO | train_inner | epoch 002: 2273 / 3002 loss=2.571, ppl=5.94, wps=5840.7, ups=0.09, wpb=64824, bsz=128, num_updates=5240, lr=9.99661e-05, gnorm=2.259, loss_scale=4, train_wall=11, gb_free=2.8, wall=59485 2021-06-19 11:10:22 | INFO | train_inner | epoch 002: 2274 / 3002 loss=2.778, ppl=6.86, wps=5948.8, ups=0.09, wpb=64857, bsz=128, num_updates=5241, lr=9.99661e-05, gnorm=2.236, loss_scale=4, train_wall=10, gb_free=2.8, wall=59496 2021-06-19 11:10:33 | INFO | train_inner | epoch 002: 2275 / 3002 loss=2.8, ppl=6.96, wps=5783.6, ups=0.09, wpb=64848, bsz=128, num_updates=5242, lr=9.99661e-05, gnorm=2.21, loss_scale=4, train_wall=11, gb_free=2.8, wall=59508 2021-06-19 11:10:44 | INFO | train_inner | epoch 002: 2276 / 3002 loss=2.622, ppl=6.15, wps=5864.7, ups=0.09, wpb=64811, bsz=128, num_updates=5243, lr=9.99661e-05, gnorm=2.275, loss_scale=4, train_wall=11, gb_free=2.8, wall=59519 2021-06-19 11:10:55 | INFO | train_inner | epoch 002: 2277 / 3002 loss=2.775, ppl=6.85, wps=5871, ups=0.09, wpb=64884, bsz=128, num_updates=5244, lr=9.9966e-05, gnorm=2.269, loss_scale=4, train_wall=11, gb_free=2.8, wall=59530 2021-06-19 11:11:06 | INFO | train_inner | epoch 002: 2278 / 3002 loss=2.819, ppl=7.05, wps=5947.1, ups=0.09, wpb=64776, bsz=128, num_updates=5245, lr=9.9966e-05, gnorm=2.256, loss_scale=4, train_wall=10, gb_free=2.8, wall=59541 2021-06-19 11:11:17 | INFO | train_inner | epoch 002: 2279 / 3002 loss=2.649, ppl=6.27, wps=5935.8, ups=0.09, wpb=64815, bsz=128, num_updates=5246, lr=9.9966e-05, gnorm=2.213, loss_scale=4, train_wall=10, gb_free=2.8, wall=59551 2021-06-19 11:11:28 | INFO | train_inner | epoch 002: 2280 / 3002 loss=2.722, ppl=6.6, wps=5887.4, ups=0.09, wpb=64810, bsz=128, num_updates=5247, lr=9.9966e-05, gnorm=2.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=59562 2021-06-19 11:11:39 | INFO | train_inner | epoch 002: 2281 / 3002 loss=2.682, ppl=6.42, wps=5915, ups=0.09, wpb=64793, bsz=128, num_updates=5248, lr=9.9966e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=59573 2021-06-19 11:11:50 | INFO | train_inner | epoch 002: 2282 / 3002 loss=2.786, ppl=6.9, wps=5878.5, ups=0.09, wpb=64883, bsz=128, num_updates=5249, lr=9.9966e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=59584 2021-06-19 11:12:01 | INFO | train_inner | epoch 002: 2283 / 3002 loss=2.758, ppl=6.77, wps=5925.5, ups=0.09, wpb=64792, bsz=128, num_updates=5250, lr=9.9966e-05, gnorm=2.166, loss_scale=4, train_wall=10, gb_free=2.8, wall=59595 2021-06-19 11:12:12 | INFO | train_inner | epoch 002: 2284 / 3002 loss=2.774, ppl=6.84, wps=5863.8, ups=0.09, wpb=64848, bsz=128, num_updates=5251, lr=9.9966e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=59606 2021-06-19 11:12:23 | INFO | train_inner | epoch 002: 2285 / 3002 loss=2.673, ppl=6.38, wps=5779.8, ups=0.09, wpb=64815, bsz=128, num_updates=5252, lr=9.9966e-05, gnorm=10.724, loss_scale=4, train_wall=11, gb_free=2.8, wall=59618 2021-06-19 11:12:34 | INFO | train_inner | epoch 002: 2286 / 3002 loss=2.709, ppl=6.54, wps=5835.6, ups=0.09, wpb=64787, bsz=128, num_updates=5253, lr=9.9966e-05, gnorm=2.318, loss_scale=4, train_wall=11, gb_free=2.8, wall=59629 2021-06-19 11:12:45 | INFO | train_inner | epoch 002: 2287 / 3002 loss=2.661, ppl=6.32, wps=5889.4, ups=0.09, wpb=64865, bsz=128, num_updates=5254, lr=9.9966e-05, gnorm=2.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=59640 2021-06-19 11:12:56 | INFO | train_inner | epoch 002: 2288 / 3002 loss=2.74, ppl=6.68, wps=5890.7, ups=0.09, wpb=64881, bsz=128, num_updates=5255, lr=9.9966e-05, gnorm=2.212, loss_scale=4, train_wall=11, gb_free=2.8, wall=59651 2021-06-19 11:13:07 | INFO | train_inner | epoch 002: 2289 / 3002 loss=2.885, ppl=7.39, wps=5935.5, ups=0.09, wpb=64879, bsz=128, num_updates=5256, lr=9.99659e-05, gnorm=2.371, loss_scale=4, train_wall=10, gb_free=2.8, wall=59662 2021-06-19 11:13:18 | INFO | train_inner | epoch 002: 2290 / 3002 loss=2.614, ppl=6.12, wps=5857.3, ups=0.09, wpb=64692, bsz=128, num_updates=5257, lr=9.99659e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=59673 2021-06-19 11:13:30 | INFO | train_inner | epoch 002: 2291 / 3002 loss=2.759, ppl=6.77, wps=5839.1, ups=0.09, wpb=64784, bsz=128, num_updates=5258, lr=9.99659e-05, gnorm=2.757, loss_scale=4, train_wall=11, gb_free=2.8, wall=59684 2021-06-19 11:13:41 | INFO | train_inner | epoch 002: 2292 / 3002 loss=2.853, ppl=7.23, wps=5875.1, ups=0.09, wpb=64745, bsz=128, num_updates=5259, lr=9.99659e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=59695 2021-06-19 11:13:52 | INFO | train_inner | epoch 002: 2293 / 3002 loss=2.752, ppl=6.74, wps=5703.4, ups=0.09, wpb=64856, bsz=128, num_updates=5260, lr=9.99659e-05, gnorm=2.717, loss_scale=4, train_wall=11, gb_free=2.8, wall=59706 2021-06-19 11:14:03 | INFO | train_inner | epoch 002: 2294 / 3002 loss=2.767, ppl=6.81, wps=5730.7, ups=0.09, wpb=64842, bsz=128, num_updates=5261, lr=9.99659e-05, gnorm=2.198, loss_scale=4, train_wall=11, gb_free=2.8, wall=59718 2021-06-19 11:14:14 | INFO | train_inner | epoch 002: 2295 / 3002 loss=2.814, ppl=7.03, wps=5799.3, ups=0.09, wpb=64844, bsz=128, num_updates=5262, lr=9.99659e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=59729 2021-06-19 11:14:25 | INFO | train_inner | epoch 002: 2296 / 3002 loss=2.717, ppl=6.58, wps=5881.9, ups=0.09, wpb=64769, bsz=128, num_updates=5263, lr=9.99659e-05, gnorm=2.126, loss_scale=4, train_wall=11, gb_free=2.8, wall=59740 2021-06-19 11:14:37 | INFO | train_inner | epoch 002: 2297 / 3002 loss=2.912, ppl=7.53, wps=5806.8, ups=0.09, wpb=64756, bsz=128, num_updates=5264, lr=9.99659e-05, gnorm=2.261, loss_scale=4, train_wall=11, gb_free=2.8, wall=59751 2021-06-19 11:14:48 | INFO | train_inner | epoch 002: 2298 / 3002 loss=2.813, ppl=7.03, wps=5894.7, ups=0.09, wpb=64793, bsz=128, num_updates=5265, lr=9.99659e-05, gnorm=2.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=59762 2021-06-19 11:14:59 | INFO | train_inner | epoch 002: 2299 / 3002 loss=2.942, ppl=7.68, wps=5904.2, ups=0.09, wpb=64896, bsz=128, num_updates=5266, lr=9.99659e-05, gnorm=2.448, loss_scale=4, train_wall=11, gb_free=2.8, wall=59773 2021-06-19 11:15:10 | INFO | train_inner | epoch 002: 2300 / 3002 loss=2.704, ppl=6.52, wps=5828, ups=0.09, wpb=64792, bsz=128, num_updates=5267, lr=9.99659e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=59784 2021-06-19 11:15:21 | INFO | train_inner | epoch 002: 2301 / 3002 loss=2.798, ppl=6.95, wps=5738.8, ups=0.09, wpb=64880, bsz=128, num_updates=5268, lr=9.99659e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=59795 2021-06-19 11:15:32 | INFO | train_inner | epoch 002: 2302 / 3002 loss=2.74, ppl=6.68, wps=5872, ups=0.09, wpb=64904, bsz=128, num_updates=5269, lr=9.99658e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=59806 2021-06-19 11:15:43 | INFO | train_inner | epoch 002: 2303 / 3002 loss=2.757, ppl=6.76, wps=5877.6, ups=0.09, wpb=64857, bsz=128, num_updates=5270, lr=9.99658e-05, gnorm=2.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=59817 2021-06-19 11:15:54 | INFO | train_inner | epoch 002: 2304 / 3002 loss=2.737, ppl=6.67, wps=5674.3, ups=0.09, wpb=64826, bsz=128, num_updates=5271, lr=9.99658e-05, gnorm=2.133, loss_scale=4, train_wall=11, gb_free=2.8, wall=59829 2021-06-19 11:16:06 | INFO | train_inner | epoch 002: 2305 / 3002 loss=2.729, ppl=6.63, wps=5850.4, ups=0.09, wpb=64860, bsz=128, num_updates=5272, lr=9.99658e-05, gnorm=2.156, loss_scale=4, train_wall=11, gb_free=2.8, wall=59840 2021-06-19 11:16:17 | INFO | train_inner | epoch 002: 2306 / 3002 loss=2.674, ppl=6.38, wps=5913.9, ups=0.09, wpb=64890, bsz=128, num_updates=5273, lr=9.99658e-05, gnorm=2.197, loss_scale=4, train_wall=10, gb_free=2.8, wall=59851 2021-06-19 11:16:28 | INFO | train_inner | epoch 002: 2307 / 3002 loss=2.829, ppl=7.11, wps=5838.5, ups=0.09, wpb=64873, bsz=128, num_updates=5274, lr=9.99658e-05, gnorm=2.251, loss_scale=4, train_wall=11, gb_free=2.8, wall=59862 2021-06-19 11:16:39 | INFO | train_inner | epoch 002: 2308 / 3002 loss=2.609, ppl=6.1, wps=5840.6, ups=0.09, wpb=64789, bsz=128, num_updates=5275, lr=9.99658e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=59873 2021-06-19 11:16:50 | INFO | train_inner | epoch 002: 2309 / 3002 loss=2.735, ppl=6.66, wps=5910.4, ups=0.09, wpb=64851, bsz=128, num_updates=5276, lr=9.99658e-05, gnorm=2.267, loss_scale=4, train_wall=11, gb_free=2.8, wall=59884 2021-06-19 11:17:01 | INFO | train_inner | epoch 002: 2310 / 3002 loss=2.862, ppl=7.27, wps=5786, ups=0.09, wpb=64792, bsz=128, num_updates=5277, lr=9.99658e-05, gnorm=2.286, loss_scale=4, train_wall=11, gb_free=2.8, wall=59895 2021-06-19 11:17:12 | INFO | train_inner | epoch 002: 2311 / 3002 loss=2.801, ppl=6.97, wps=5816.7, ups=0.09, wpb=64788, bsz=128, num_updates=5278, lr=9.99658e-05, gnorm=2.313, loss_scale=4, train_wall=11, gb_free=2.8, wall=59906 2021-06-19 11:17:23 | INFO | train_inner | epoch 002: 2312 / 3002 loss=2.74, ppl=6.68, wps=5857.5, ups=0.09, wpb=64727, bsz=128, num_updates=5279, lr=9.99658e-05, gnorm=2.171, loss_scale=4, train_wall=11, gb_free=2.8, wall=59917 2021-06-19 11:17:34 | INFO | train_inner | epoch 002: 2313 / 3002 loss=2.725, ppl=6.61, wps=5876, ups=0.09, wpb=64785, bsz=128, num_updates=5280, lr=9.99658e-05, gnorm=2.138, loss_scale=4, train_wall=11, gb_free=2.8, wall=59928 2021-06-19 11:17:45 | INFO | train_inner | epoch 002: 2314 / 3002 loss=2.688, ppl=6.44, wps=5912.1, ups=0.09, wpb=64932, bsz=128, num_updates=5281, lr=9.99657e-05, gnorm=2.191, loss_scale=4, train_wall=11, gb_free=2.8, wall=59939 2021-06-19 11:17:56 | INFO | train_inner | epoch 002: 2315 / 3002 loss=2.567, ppl=5.93, wps=5791.3, ups=0.09, wpb=64801, bsz=128, num_updates=5282, lr=9.99657e-05, gnorm=2.105, loss_scale=4, train_wall=11, gb_free=2.8, wall=59951 2021-06-19 11:18:07 | INFO | train_inner | epoch 002: 2316 / 3002 loss=2.639, ppl=6.23, wps=5920.5, ups=0.09, wpb=64839, bsz=128, num_updates=5283, lr=9.99657e-05, gnorm=2.114, loss_scale=4, train_wall=11, gb_free=2.8, wall=59962 2021-06-19 11:18:18 | INFO | train_inner | epoch 002: 2317 / 3002 loss=2.824, ppl=7.08, wps=5894.4, ups=0.09, wpb=64838, bsz=128, num_updates=5284, lr=9.99657e-05, gnorm=2.227, loss_scale=4, train_wall=11, gb_free=2.8, wall=59973 2021-06-19 11:18:29 | INFO | train_inner | epoch 002: 2318 / 3002 loss=2.793, ppl=6.93, wps=5960.8, ups=0.09, wpb=64941, bsz=128, num_updates=5285, lr=9.99657e-05, gnorm=2.253, loss_scale=4, train_wall=10, gb_free=2.8, wall=59983 2021-06-19 11:18:40 | INFO | train_inner | epoch 002: 2319 / 3002 loss=2.746, ppl=6.71, wps=5984.2, ups=0.09, wpb=64892, bsz=128, num_updates=5286, lr=9.99657e-05, gnorm=2.246, loss_scale=4, train_wall=10, gb_free=2.8, wall=59994 2021-06-19 11:18:51 | INFO | train_inner | epoch 002: 2320 / 3002 loss=2.708, ppl=6.53, wps=5889.4, ups=0.09, wpb=64811, bsz=128, num_updates=5287, lr=9.99657e-05, gnorm=2.247, loss_scale=4, train_wall=11, gb_free=2.8, wall=60005 2021-06-19 11:19:02 | INFO | train_inner | epoch 002: 2321 / 3002 loss=2.707, ppl=6.53, wps=5948, ups=0.09, wpb=64846, bsz=128, num_updates=5288, lr=9.99657e-05, gnorm=2.354, loss_scale=4, train_wall=10, gb_free=2.8, wall=60016 2021-06-19 11:19:13 | INFO | train_inner | epoch 002: 2322 / 3002 loss=2.575, ppl=5.96, wps=5863.3, ups=0.09, wpb=64865, bsz=128, num_updates=5289, lr=9.99657e-05, gnorm=2.211, loss_scale=4, train_wall=11, gb_free=2.8, wall=60027 2021-06-19 11:19:24 | INFO | train_inner | epoch 002: 2323 / 3002 loss=2.623, ppl=6.16, wps=5872.4, ups=0.09, wpb=64796, bsz=128, num_updates=5290, lr=9.99657e-05, gnorm=2.819, loss_scale=4, train_wall=11, gb_free=2.8, wall=60038 2021-06-19 11:19:35 | INFO | train_inner | epoch 002: 2324 / 3002 loss=2.777, ppl=6.85, wps=5900.8, ups=0.09, wpb=64846, bsz=128, num_updates=5291, lr=9.99657e-05, gnorm=3.056, loss_scale=4, train_wall=11, gb_free=2.8, wall=60049 2021-06-19 11:19:46 | INFO | train_inner | epoch 002: 2325 / 3002 loss=2.936, ppl=7.66, wps=5856.5, ups=0.09, wpb=64884, bsz=128, num_updates=5292, lr=9.99657e-05, gnorm=2.304, loss_scale=4, train_wall=11, gb_free=2.8, wall=60060 2021-06-19 11:19:57 | INFO | train_inner | epoch 002: 2326 / 3002 loss=2.768, ppl=6.81, wps=5821.6, ups=0.09, wpb=64825, bsz=128, num_updates=5293, lr=9.99657e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=60072 2021-06-19 11:20:09 | INFO | train_inner | epoch 002: 2327 / 3002 loss=2.629, ppl=6.18, wps=5725.1, ups=0.09, wpb=64799, bsz=128, num_updates=5294, lr=9.99656e-05, gnorm=2.122, loss_scale=4, train_wall=11, gb_free=2.8, wall=60083 2021-06-19 11:20:20 | INFO | train_inner | epoch 002: 2328 / 3002 loss=2.654, ppl=6.29, wps=5866.5, ups=0.09, wpb=64842, bsz=128, num_updates=5295, lr=9.99656e-05, gnorm=2.251, loss_scale=4, train_wall=11, gb_free=2.8, wall=60094 2021-06-19 11:20:31 | INFO | train_inner | epoch 002: 2329 / 3002 loss=2.69, ppl=6.45, wps=5885.8, ups=0.09, wpb=64841, bsz=128, num_updates=5296, lr=9.99656e-05, gnorm=2.291, loss_scale=4, train_wall=11, gb_free=2.8, wall=60105 2021-06-19 11:20:42 | INFO | train_inner | epoch 002: 2330 / 3002 loss=2.543, ppl=5.83, wps=5794.8, ups=0.09, wpb=64836, bsz=128, num_updates=5297, lr=9.99656e-05, gnorm=2.28, loss_scale=4, train_wall=11, gb_free=2.8, wall=60116 2021-06-19 11:20:53 | INFO | train_inner | epoch 002: 2331 / 3002 loss=2.738, ppl=6.67, wps=5833.6, ups=0.09, wpb=64829, bsz=128, num_updates=5298, lr=9.99656e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=60127 2021-06-19 11:21:04 | INFO | train_inner | epoch 002: 2332 / 3002 loss=2.63, ppl=6.19, wps=5844.2, ups=0.09, wpb=64706, bsz=128, num_updates=5299, lr=9.99656e-05, gnorm=2.858, loss_scale=4, train_wall=11, gb_free=2.8, wall=60138 2021-06-19 11:21:15 | INFO | train_inner | epoch 002: 2333 / 3002 loss=2.799, ppl=6.96, wps=5765.3, ups=0.09, wpb=64789, bsz=128, num_updates=5300, lr=9.99656e-05, gnorm=2.283, loss_scale=4, train_wall=11, gb_free=2.8, wall=60150 2021-06-19 11:21:26 | INFO | train_inner | epoch 002: 2334 / 3002 loss=2.768, ppl=6.81, wps=5757.6, ups=0.09, wpb=64759, bsz=128, num_updates=5301, lr=9.99656e-05, gnorm=2.18, loss_scale=4, train_wall=11, gb_free=2.8, wall=60161 2021-06-19 11:21:38 | INFO | train_inner | epoch 002: 2335 / 3002 loss=2.669, ppl=6.36, wps=5792.9, ups=0.09, wpb=64880, bsz=128, num_updates=5302, lr=9.99656e-05, gnorm=2.192, loss_scale=4, train_wall=11, gb_free=2.8, wall=60172 2021-06-19 11:21:49 | INFO | train_inner | epoch 002: 2336 / 3002 loss=2.629, ppl=6.19, wps=5845.6, ups=0.09, wpb=64924, bsz=128, num_updates=5303, lr=9.99656e-05, gnorm=2.229, loss_scale=4, train_wall=11, gb_free=2.8, wall=60183 2021-06-19 11:22:00 | INFO | train_inner | epoch 002: 2337 / 3002 loss=2.722, ppl=6.6, wps=5831.3, ups=0.09, wpb=64826, bsz=128, num_updates=5304, lr=9.99656e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=60194 2021-06-19 11:22:11 | INFO | train_inner | epoch 002: 2338 / 3002 loss=2.594, ppl=6.04, wps=5828.6, ups=0.09, wpb=64861, bsz=128, num_updates=5305, lr=9.99656e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=60205 2021-06-19 11:22:22 | INFO | train_inner | epoch 002: 2339 / 3002 loss=2.713, ppl=6.56, wps=5845.4, ups=0.09, wpb=64849, bsz=128, num_updates=5306, lr=9.99655e-05, gnorm=2.257, loss_scale=4, train_wall=11, gb_free=2.8, wall=60216 2021-06-19 11:22:33 | INFO | train_inner | epoch 002: 2340 / 3002 loss=2.738, ppl=6.67, wps=5806.9, ups=0.09, wpb=64744, bsz=128, num_updates=5307, lr=9.99655e-05, gnorm=2.264, loss_scale=4, train_wall=11, gb_free=2.8, wall=60228 2021-06-19 11:22:45 | INFO | train_inner | epoch 002: 2341 / 3002 loss=2.511, ppl=5.7, wps=5766.9, ups=0.09, wpb=64847, bsz=128, num_updates=5308, lr=9.99655e-05, gnorm=2.366, loss_scale=4, train_wall=11, gb_free=2.8, wall=60239 2021-06-19 11:22:56 | INFO | train_inner | epoch 002: 2342 / 3002 loss=2.671, ppl=6.37, wps=5897.5, ups=0.09, wpb=64875, bsz=128, num_updates=5309, lr=9.99655e-05, gnorm=2.318, loss_scale=4, train_wall=11, gb_free=2.8, wall=60250 2021-06-19 11:23:06 | INFO | train_inner | epoch 002: 2343 / 3002 loss=2.724, ppl=6.61, wps=5929.5, ups=0.09, wpb=64889, bsz=128, num_updates=5310, lr=9.99655e-05, gnorm=2.236, loss_scale=4, train_wall=10, gb_free=2.8, wall=60261 2021-06-19 11:23:18 | INFO | train_inner | epoch 002: 2344 / 3002 loss=2.611, ppl=6.11, wps=5861, ups=0.09, wpb=64797, bsz=128, num_updates=5311, lr=9.99655e-05, gnorm=2.716, loss_scale=4, train_wall=11, gb_free=2.8, wall=60272 2021-06-19 11:23:29 | INFO | train_inner | epoch 002: 2345 / 3002 loss=2.771, ppl=6.83, wps=5884.6, ups=0.09, wpb=64788, bsz=128, num_updates=5312, lr=9.99655e-05, gnorm=2.339, loss_scale=4, train_wall=11, gb_free=2.8, wall=60283 2021-06-19 11:23:40 | INFO | train_inner | epoch 002: 2346 / 3002 loss=2.888, ppl=7.4, wps=5762, ups=0.09, wpb=64596, bsz=128, num_updates=5313, lr=9.99655e-05, gnorm=2.228, loss_scale=4, train_wall=11, gb_free=2.8, wall=60294 2021-06-19 11:23:51 | INFO | train_inner | epoch 002: 2347 / 3002 loss=2.681, ppl=6.41, wps=5821.1, ups=0.09, wpb=64778, bsz=128, num_updates=5314, lr=9.99655e-05, gnorm=2.182, loss_scale=4, train_wall=11, gb_free=2.8, wall=60305 2021-06-19 11:24:02 | INFO | train_inner | epoch 002: 2348 / 3002 loss=2.739, ppl=6.68, wps=5837.9, ups=0.09, wpb=64828, bsz=128, num_updates=5315, lr=9.99655e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=60316 2021-06-19 11:24:13 | INFO | train_inner | epoch 002: 2349 / 3002 loss=2.84, ppl=7.16, wps=5931.3, ups=0.09, wpb=64861, bsz=128, num_updates=5316, lr=9.99655e-05, gnorm=2.224, loss_scale=4, train_wall=10, gb_free=2.8, wall=60327 2021-06-19 11:24:24 | INFO | train_inner | epoch 002: 2350 / 3002 loss=2.651, ppl=6.28, wps=5818.5, ups=0.09, wpb=64808, bsz=128, num_updates=5317, lr=9.99655e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=60338 2021-06-19 11:24:35 | INFO | train_inner | epoch 002: 2351 / 3002 loss=2.788, ppl=6.91, wps=5799.1, ups=0.09, wpb=64818, bsz=128, num_updates=5318, lr=9.99655e-05, gnorm=2.291, loss_scale=4, train_wall=11, gb_free=2.8, wall=60350 2021-06-19 11:24:47 | INFO | train_inner | epoch 002: 2352 / 3002 loss=2.684, ppl=6.42, wps=5731, ups=0.09, wpb=64799, bsz=128, num_updates=5319, lr=9.99654e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=60361 2021-06-19 11:24:58 | INFO | train_inner | epoch 002: 2353 / 3002 loss=2.799, ppl=6.96, wps=5859.4, ups=0.09, wpb=64837, bsz=128, num_updates=5320, lr=9.99654e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=60372 2021-06-19 11:25:09 | INFO | train_inner | epoch 002: 2354 / 3002 loss=2.765, ppl=6.8, wps=5830.6, ups=0.09, wpb=64829, bsz=128, num_updates=5321, lr=9.99654e-05, gnorm=2.264, loss_scale=4, train_wall=11, gb_free=2.8, wall=60383 2021-06-19 11:25:20 | INFO | train_inner | epoch 002: 2355 / 3002 loss=2.785, ppl=6.89, wps=5892.1, ups=0.09, wpb=64865, bsz=128, num_updates=5322, lr=9.99654e-05, gnorm=2.227, loss_scale=4, train_wall=11, gb_free=2.8, wall=60394 2021-06-19 11:25:31 | INFO | train_inner | epoch 002: 2356 / 3002 loss=2.762, ppl=6.78, wps=5698.5, ups=0.09, wpb=64646, bsz=128, num_updates=5323, lr=9.99654e-05, gnorm=3.012, loss_scale=4, train_wall=11, gb_free=2.8, wall=60405 2021-06-19 11:25:42 | INFO | train_inner | epoch 002: 2357 / 3002 loss=2.865, ppl=7.29, wps=5732.9, ups=0.09, wpb=64837, bsz=128, num_updates=5324, lr=9.99654e-05, gnorm=2.214, loss_scale=4, train_wall=11, gb_free=2.8, wall=60417 2021-06-19 11:25:54 | INFO | train_inner | epoch 002: 2358 / 3002 loss=2.642, ppl=6.24, wps=5777.3, ups=0.09, wpb=64862, bsz=128, num_updates=5325, lr=9.99654e-05, gnorm=2.17, loss_scale=4, train_wall=11, gb_free=2.8, wall=60428 2021-06-19 11:26:05 | INFO | train_inner | epoch 002: 2359 / 3002 loss=2.797, ppl=6.95, wps=5798.3, ups=0.09, wpb=64867, bsz=128, num_updates=5326, lr=9.99654e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=60439 2021-06-19 11:26:16 | INFO | train_inner | epoch 002: 2360 / 3002 loss=2.739, ppl=6.68, wps=5711, ups=0.09, wpb=64841, bsz=128, num_updates=5327, lr=9.99654e-05, gnorm=2.198, loss_scale=4, train_wall=11, gb_free=2.8, wall=60450 2021-06-19 11:26:27 | INFO | train_inner | epoch 002: 2361 / 3002 loss=2.645, ppl=6.26, wps=5847.2, ups=0.09, wpb=64839, bsz=128, num_updates=5328, lr=9.99654e-05, gnorm=2.16, loss_scale=4, train_wall=11, gb_free=2.8, wall=60462 2021-06-19 11:26:38 | INFO | train_inner | epoch 002: 2362 / 3002 loss=2.741, ppl=6.69, wps=5919.9, ups=0.09, wpb=64898, bsz=128, num_updates=5329, lr=9.99654e-05, gnorm=2.257, loss_scale=4, train_wall=11, gb_free=2.8, wall=60473 2021-06-19 11:26:49 | INFO | train_inner | epoch 002: 2363 / 3002 loss=2.684, ppl=6.43, wps=5954.8, ups=0.09, wpb=64906, bsz=128, num_updates=5330, lr=9.99654e-05, gnorm=2.115, loss_scale=4, train_wall=10, gb_free=2.8, wall=60483 2021-06-19 11:27:00 | INFO | train_inner | epoch 002: 2364 / 3002 loss=2.656, ppl=6.3, wps=5764.5, ups=0.09, wpb=64852, bsz=128, num_updates=5331, lr=9.99653e-05, gnorm=4.62, loss_scale=4, train_wall=11, gb_free=2.8, wall=60495 2021-06-19 11:27:11 | INFO | train_inner | epoch 002: 2365 / 3002 loss=2.76, ppl=6.77, wps=5839.9, ups=0.09, wpb=64807, bsz=128, num_updates=5332, lr=9.99653e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=60506 2021-06-19 11:27:22 | INFO | train_inner | epoch 002: 2366 / 3002 loss=2.684, ppl=6.43, wps=5934.3, ups=0.09, wpb=64823, bsz=128, num_updates=5333, lr=9.99653e-05, gnorm=2.174, loss_scale=4, train_wall=10, gb_free=2.8, wall=60517 2021-06-19 11:27:33 | INFO | train_inner | epoch 002: 2367 / 3002 loss=2.788, ppl=6.9, wps=5937.4, ups=0.09, wpb=64801, bsz=128, num_updates=5334, lr=9.99653e-05, gnorm=2.172, loss_scale=4, train_wall=10, gb_free=2.8, wall=60528 2021-06-19 11:27:44 | INFO | train_inner | epoch 002: 2368 / 3002 loss=2.686, ppl=6.43, wps=5843.3, ups=0.09, wpb=64921, bsz=128, num_updates=5335, lr=9.99653e-05, gnorm=2.103, loss_scale=4, train_wall=11, gb_free=2.8, wall=60539 2021-06-19 11:27:55 | INFO | train_inner | epoch 002: 2369 / 3002 loss=2.716, ppl=6.57, wps=5899.9, ups=0.09, wpb=64847, bsz=128, num_updates=5336, lr=9.99653e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=60550 2021-06-19 11:28:06 | INFO | train_inner | epoch 002: 2370 / 3002 loss=2.845, ppl=7.19, wps=5877.4, ups=0.09, wpb=64847, bsz=128, num_updates=5337, lr=9.99653e-05, gnorm=2.388, loss_scale=4, train_wall=11, gb_free=2.8, wall=60561 2021-06-19 11:28:17 | INFO | train_inner | epoch 002: 2371 / 3002 loss=2.784, ppl=6.89, wps=5862.1, ups=0.09, wpb=64586, bsz=128, num_updates=5338, lr=9.99653e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=60572 2021-06-19 11:28:29 | INFO | train_inner | epoch 002: 2372 / 3002 loss=2.905, ppl=7.49, wps=5834.7, ups=0.09, wpb=64833, bsz=128, num_updates=5339, lr=9.99653e-05, gnorm=2.246, loss_scale=4, train_wall=11, gb_free=2.8, wall=60583 2021-06-19 11:28:40 | INFO | train_inner | epoch 002: 2373 / 3002 loss=2.784, ppl=6.89, wps=5881.7, ups=0.09, wpb=64865, bsz=128, num_updates=5340, lr=9.99653e-05, gnorm=2.135, loss_scale=4, train_wall=11, gb_free=2.8, wall=60594 2021-06-19 11:28:51 | INFO | train_inner | epoch 002: 2374 / 3002 loss=2.778, ppl=6.86, wps=5832.6, ups=0.09, wpb=64821, bsz=128, num_updates=5341, lr=9.99653e-05, gnorm=2.142, loss_scale=4, train_wall=11, gb_free=2.8, wall=60605 2021-06-19 11:29:02 | INFO | train_inner | epoch 002: 2375 / 3002 loss=2.755, ppl=6.75, wps=5830, ups=0.09, wpb=64783, bsz=128, num_updates=5342, lr=9.99653e-05, gnorm=2.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=60616 2021-06-19 11:29:13 | INFO | train_inner | epoch 002: 2376 / 3002 loss=2.689, ppl=6.45, wps=5822.8, ups=0.09, wpb=64851, bsz=128, num_updates=5343, lr=9.99653e-05, gnorm=2.314, loss_scale=4, train_wall=11, gb_free=2.8, wall=60627 2021-06-19 11:29:24 | INFO | train_inner | epoch 002: 2377 / 3002 loss=2.85, ppl=7.21, wps=5890.4, ups=0.09, wpb=64883, bsz=128, num_updates=5344, lr=9.99652e-05, gnorm=2.145, loss_scale=4, train_wall=11, gb_free=2.8, wall=60638 2021-06-19 11:29:35 | INFO | train_inner | epoch 002: 2378 / 3002 loss=2.701, ppl=6.5, wps=5817.8, ups=0.09, wpb=64761, bsz=128, num_updates=5345, lr=9.99652e-05, gnorm=2.295, loss_scale=4, train_wall=11, gb_free=2.8, wall=60649 2021-06-19 11:29:46 | INFO | train_inner | epoch 002: 2379 / 3002 loss=2.659, ppl=6.32, wps=5925, ups=0.09, wpb=64877, bsz=128, num_updates=5346, lr=9.99652e-05, gnorm=2.317, loss_scale=4, train_wall=11, gb_free=2.8, wall=60660 2021-06-19 11:29:57 | INFO | train_inner | epoch 002: 2380 / 3002 loss=2.854, ppl=7.23, wps=5953.3, ups=0.09, wpb=64759, bsz=128, num_updates=5347, lr=9.99652e-05, gnorm=2.278, loss_scale=4, train_wall=10, gb_free=2.8, wall=60671 2021-06-19 11:30:08 | INFO | train_inner | epoch 002: 2381 / 3002 loss=2.659, ppl=6.32, wps=5879.1, ups=0.09, wpb=64776, bsz=128, num_updates=5348, lr=9.99652e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=60682 2021-06-19 11:30:19 | INFO | train_inner | epoch 002: 2382 / 3002 loss=2.676, ppl=6.39, wps=5768.3, ups=0.09, wpb=64794, bsz=128, num_updates=5349, lr=9.99652e-05, gnorm=2.264, loss_scale=4, train_wall=11, gb_free=2.8, wall=60693 2021-06-19 11:30:30 | INFO | train_inner | epoch 002: 2383 / 3002 loss=2.647, ppl=6.26, wps=5798, ups=0.09, wpb=64942, bsz=128, num_updates=5350, lr=9.99652e-05, gnorm=2.268, loss_scale=4, train_wall=11, gb_free=2.8, wall=60705 2021-06-19 11:30:42 | INFO | train_inner | epoch 002: 2384 / 3002 loss=2.536, ppl=5.8, wps=5765.6, ups=0.09, wpb=64870, bsz=128, num_updates=5351, lr=9.99652e-05, gnorm=2.093, loss_scale=4, train_wall=11, gb_free=2.8, wall=60716 2021-06-19 11:30:53 | INFO | train_inner | epoch 002: 2385 / 3002 loss=2.713, ppl=6.56, wps=5836.7, ups=0.09, wpb=64766, bsz=128, num_updates=5352, lr=9.99652e-05, gnorm=2.144, loss_scale=4, train_wall=11, gb_free=2.8, wall=60727 2021-06-19 11:31:04 | INFO | train_inner | epoch 002: 2386 / 3002 loss=2.66, ppl=6.32, wps=5879.1, ups=0.09, wpb=64740, bsz=128, num_updates=5353, lr=9.99652e-05, gnorm=2.231, loss_scale=4, train_wall=11, gb_free=2.8, wall=60738 2021-06-19 11:31:15 | INFO | train_inner | epoch 002: 2387 / 3002 loss=2.699, ppl=6.49, wps=5940.9, ups=0.09, wpb=64851, bsz=128, num_updates=5354, lr=9.99652e-05, gnorm=2.234, loss_scale=4, train_wall=10, gb_free=2.8, wall=60749 2021-06-19 11:31:26 | INFO | train_inner | epoch 002: 2388 / 3002 loss=2.715, ppl=6.56, wps=5735.1, ups=0.09, wpb=64739, bsz=128, num_updates=5355, lr=9.99652e-05, gnorm=2.664, loss_scale=4, train_wall=11, gb_free=2.8, wall=60760 2021-06-19 11:31:37 | INFO | train_inner | epoch 002: 2389 / 3002 loss=2.698, ppl=6.49, wps=5801.1, ups=0.09, wpb=64732, bsz=128, num_updates=5356, lr=9.99651e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=60771 2021-06-19 11:31:48 | INFO | train_inner | epoch 002: 2390 / 3002 loss=2.733, ppl=6.65, wps=5997.9, ups=0.09, wpb=64774, bsz=128, num_updates=5357, lr=9.99651e-05, gnorm=2.341, loss_scale=4, train_wall=10, gb_free=2.8, wall=60782 2021-06-19 11:31:59 | INFO | train_inner | epoch 002: 2391 / 3002 loss=2.652, ppl=6.28, wps=5882.8, ups=0.09, wpb=64793, bsz=128, num_updates=5358, lr=9.99651e-05, gnorm=2.421, loss_scale=4, train_wall=11, gb_free=2.8, wall=60793 2021-06-19 11:32:10 | INFO | train_inner | epoch 002: 2392 / 3002 loss=2.484, ppl=5.6, wps=5814.1, ups=0.09, wpb=64863, bsz=128, num_updates=5359, lr=9.99651e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=60804 2021-06-19 11:32:21 | INFO | train_inner | epoch 002: 2393 / 3002 loss=2.768, ppl=6.81, wps=5905.5, ups=0.09, wpb=64708, bsz=128, num_updates=5360, lr=9.99651e-05, gnorm=2.185, loss_scale=4, train_wall=10, gb_free=2.8, wall=60815 2021-06-19 11:32:32 | INFO | train_inner | epoch 002: 2394 / 3002 loss=2.802, ppl=6.97, wps=5874, ups=0.09, wpb=64841, bsz=128, num_updates=5361, lr=9.99651e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=60826 2021-06-19 11:32:43 | INFO | train_inner | epoch 002: 2395 / 3002 loss=2.787, ppl=6.9, wps=5892.2, ups=0.09, wpb=64865, bsz=128, num_updates=5362, lr=9.99651e-05, gnorm=2.216, loss_scale=4, train_wall=11, gb_free=2.8, wall=60837 2021-06-19 11:32:54 | INFO | train_inner | epoch 002: 2396 / 3002 loss=2.768, ppl=6.81, wps=5850, ups=0.09, wpb=64829, bsz=128, num_updates=5363, lr=9.99651e-05, gnorm=2.331, loss_scale=4, train_wall=11, gb_free=2.8, wall=60848 2021-06-19 11:33:05 | INFO | train_inner | epoch 002: 2397 / 3002 loss=2.634, ppl=6.21, wps=5976.8, ups=0.09, wpb=64923, bsz=128, num_updates=5364, lr=9.99651e-05, gnorm=2.1, loss_scale=4, train_wall=10, gb_free=2.8, wall=60859 2021-06-19 11:33:16 | INFO | train_inner | epoch 002: 2398 / 3002 loss=2.788, ppl=6.91, wps=5843, ups=0.09, wpb=64806, bsz=128, num_updates=5365, lr=9.99651e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=60870 2021-06-19 11:33:27 | INFO | train_inner | epoch 002: 2399 / 3002 loss=2.735, ppl=6.66, wps=5823, ups=0.09, wpb=64851, bsz=128, num_updates=5366, lr=9.99651e-05, gnorm=2.173, loss_scale=8, train_wall=11, gb_free=2.8, wall=60882 2021-06-19 11:33:38 | INFO | train_inner | epoch 002: 2400 / 3002 loss=2.841, ppl=7.16, wps=5876, ups=0.09, wpb=64774, bsz=128, num_updates=5367, lr=9.99651e-05, gnorm=2.166, loss_scale=8, train_wall=11, gb_free=2.8, wall=60893 2021-06-19 11:33:49 | INFO | train_inner | epoch 002: 2401 / 3002 loss=2.805, ppl=6.99, wps=5872.7, ups=0.09, wpb=64908, bsz=128, num_updates=5368, lr=9.99651e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=60904 2021-06-19 11:34:00 | INFO | train_inner | epoch 002: 2402 / 3002 loss=2.793, ppl=6.93, wps=5916.6, ups=0.09, wpb=64715, bsz=128, num_updates=5369, lr=9.9965e-05, gnorm=2.257, loss_scale=8, train_wall=10, gb_free=2.8, wall=60915 2021-06-19 11:34:11 | INFO | train_inner | epoch 002: 2403 / 3002 loss=2.696, ppl=6.48, wps=5790.6, ups=0.09, wpb=64793, bsz=128, num_updates=5370, lr=9.9965e-05, gnorm=2.319, loss_scale=8, train_wall=11, gb_free=2.8, wall=60926 2021-06-19 11:34:23 | INFO | train_inner | epoch 002: 2404 / 3002 loss=2.544, ppl=5.83, wps=5777.2, ups=0.09, wpb=64756, bsz=128, num_updates=5371, lr=9.9965e-05, gnorm=2.244, loss_scale=8, train_wall=11, gb_free=2.8, wall=60937 2021-06-19 11:34:34 | INFO | train_inner | epoch 002: 2405 / 3002 loss=2.691, ppl=6.46, wps=5884.8, ups=0.09, wpb=64920, bsz=128, num_updates=5372, lr=9.9965e-05, gnorm=2.224, loss_scale=8, train_wall=11, gb_free=2.8, wall=60948 2021-06-19 11:34:45 | INFO | train_inner | epoch 002: 2406 / 3002 loss=2.821, ppl=7.07, wps=5932.1, ups=0.09, wpb=64829, bsz=128, num_updates=5373, lr=9.9965e-05, gnorm=2.21, loss_scale=8, train_wall=11, gb_free=2.8, wall=60959 2021-06-19 11:34:56 | INFO | train_inner | epoch 002: 2407 / 3002 loss=2.717, ppl=6.57, wps=5896.6, ups=0.09, wpb=64873, bsz=128, num_updates=5374, lr=9.9965e-05, gnorm=3.615, loss_scale=8, train_wall=11, gb_free=2.8, wall=60970 2021-06-19 11:35:06 | INFO | train_inner | epoch 002: 2408 / 3002 loss=2.729, ppl=6.63, wps=6021.9, ups=0.09, wpb=64887, bsz=128, num_updates=5375, lr=9.9965e-05, gnorm=2.273, loss_scale=8, train_wall=10, gb_free=2.8, wall=60981 2021-06-19 11:35:18 | INFO | train_inner | epoch 002: 2409 / 3002 loss=2.796, ppl=6.94, wps=5804.2, ups=0.09, wpb=64805, bsz=128, num_updates=5376, lr=9.9965e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=60992 2021-06-19 11:35:29 | INFO | train_inner | epoch 002: 2410 / 3002 loss=2.557, ppl=5.88, wps=5807.7, ups=0.09, wpb=64818, bsz=128, num_updates=5377, lr=9.9965e-05, gnorm=2.254, loss_scale=8, train_wall=11, gb_free=2.8, wall=61003 2021-06-19 11:35:40 | INFO | train_inner | epoch 002: 2411 / 3002 loss=2.931, ppl=7.62, wps=5976.2, ups=0.09, wpb=64841, bsz=128, num_updates=5378, lr=9.9965e-05, gnorm=2.274, loss_scale=8, train_wall=10, gb_free=2.8, wall=61014 2021-06-19 11:35:51 | INFO | train_inner | epoch 002: 2412 / 3002 loss=2.672, ppl=6.37, wps=5853.6, ups=0.09, wpb=64867, bsz=128, num_updates=5379, lr=9.9965e-05, gnorm=2.29, loss_scale=8, train_wall=11, gb_free=2.8, wall=61025 2021-06-19 11:36:02 | INFO | train_inner | epoch 002: 2413 / 3002 loss=2.68, ppl=6.41, wps=5909.6, ups=0.09, wpb=64853, bsz=128, num_updates=5380, lr=9.9965e-05, gnorm=2.383, loss_scale=8, train_wall=11, gb_free=2.8, wall=61036 2021-06-19 11:36:13 | INFO | train_inner | epoch 002: 2414 / 3002 loss=2.903, ppl=7.48, wps=5767.3, ups=0.09, wpb=64841, bsz=128, num_updates=5381, lr=9.99649e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=61047 2021-06-19 11:36:24 | INFO | train_inner | epoch 002: 2415 / 3002 loss=2.681, ppl=6.42, wps=5901.5, ups=0.09, wpb=64866, bsz=128, num_updates=5382, lr=9.99649e-05, gnorm=2.46, loss_scale=8, train_wall=10, gb_free=2.8, wall=61058 2021-06-19 11:36:35 | INFO | train_inner | epoch 002: 2416 / 3002 loss=2.832, ppl=7.12, wps=5798, ups=0.09, wpb=64867, bsz=128, num_updates=5383, lr=9.99649e-05, gnorm=2.27, loss_scale=8, train_wall=11, gb_free=2.8, wall=61069 2021-06-19 11:36:46 | INFO | train_inner | epoch 002: 2417 / 3002 loss=2.5, ppl=5.66, wps=5764.2, ups=0.09, wpb=64884, bsz=128, num_updates=5384, lr=9.99649e-05, gnorm=2.563, loss_scale=8, train_wall=11, gb_free=2.8, wall=61081 2021-06-19 11:36:57 | INFO | train_inner | epoch 002: 2418 / 3002 loss=2.708, ppl=6.53, wps=5884.2, ups=0.09, wpb=64849, bsz=128, num_updates=5385, lr=9.99649e-05, gnorm=2.27, loss_scale=8, train_wall=11, gb_free=2.8, wall=61092 2021-06-19 11:37:08 | INFO | train_inner | epoch 002: 2419 / 3002 loss=2.629, ppl=6.19, wps=5877.3, ups=0.09, wpb=64823, bsz=128, num_updates=5386, lr=9.99649e-05, gnorm=2.248, loss_scale=8, train_wall=11, gb_free=2.8, wall=61103 2021-06-19 11:37:19 | INFO | train_inner | epoch 002: 2420 / 3002 loss=2.611, ppl=6.11, wps=5922.1, ups=0.09, wpb=64844, bsz=128, num_updates=5387, lr=9.99649e-05, gnorm=2.292, loss_scale=8, train_wall=11, gb_free=2.8, wall=61114 2021-06-19 11:37:30 | INFO | train_inner | epoch 002: 2421 / 3002 loss=2.671, ppl=6.37, wps=5821, ups=0.09, wpb=64858, bsz=128, num_updates=5388, lr=9.99649e-05, gnorm=2.182, loss_scale=8, train_wall=11, gb_free=2.8, wall=61125 2021-06-19 11:37:41 | INFO | train_inner | epoch 002: 2422 / 3002 loss=2.591, ppl=6.03, wps=5935.4, ups=0.09, wpb=64862, bsz=128, num_updates=5389, lr=9.99649e-05, gnorm=3.056, loss_scale=8, train_wall=10, gb_free=2.8, wall=61136 2021-06-19 11:37:52 | INFO | train_inner | epoch 002: 2423 / 3002 loss=2.708, ppl=6.53, wps=6021.9, ups=0.09, wpb=64856, bsz=128, num_updates=5390, lr=9.99649e-05, gnorm=2.21, loss_scale=8, train_wall=10, gb_free=2.8, wall=61146 2021-06-19 11:38:03 | INFO | train_inner | epoch 002: 2424 / 3002 loss=2.647, ppl=6.27, wps=5874.2, ups=0.09, wpb=64877, bsz=128, num_updates=5391, lr=9.99649e-05, gnorm=2.188, loss_scale=8, train_wall=11, gb_free=2.8, wall=61158 2021-06-19 11:38:14 | INFO | train_inner | epoch 002: 2425 / 3002 loss=2.73, ppl=6.63, wps=5900.5, ups=0.09, wpb=64815, bsz=128, num_updates=5392, lr=9.99649e-05, gnorm=6.259, loss_scale=8, train_wall=11, gb_free=2.8, wall=61169 2021-06-19 11:38:25 | INFO | train_inner | epoch 002: 2426 / 3002 loss=2.663, ppl=6.34, wps=5836.5, ups=0.09, wpb=64870, bsz=128, num_updates=5393, lr=9.99649e-05, gnorm=2.251, loss_scale=8, train_wall=11, gb_free=2.8, wall=61180 2021-06-19 11:38:37 | INFO | train_inner | epoch 002: 2427 / 3002 loss=2.8, ppl=6.97, wps=5689.2, ups=0.09, wpb=64761, bsz=128, num_updates=5394, lr=9.99648e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=61191 2021-06-19 11:38:48 | INFO | train_inner | epoch 002: 2428 / 3002 loss=2.7, ppl=6.5, wps=5805.1, ups=0.09, wpb=64890, bsz=128, num_updates=5395, lr=9.99648e-05, gnorm=2.505, loss_scale=8, train_wall=11, gb_free=2.8, wall=61202 2021-06-19 11:38:59 | INFO | train_inner | epoch 002: 2429 / 3002 loss=2.634, ppl=6.21, wps=5958.8, ups=0.09, wpb=64826, bsz=128, num_updates=5396, lr=9.99648e-05, gnorm=2.258, loss_scale=8, train_wall=10, gb_free=2.8, wall=61213 2021-06-19 11:39:10 | INFO | train_inner | epoch 002: 2430 / 3002 loss=2.473, ppl=5.55, wps=5809.6, ups=0.09, wpb=64855, bsz=128, num_updates=5397, lr=9.99648e-05, gnorm=2.242, loss_scale=8, train_wall=11, gb_free=2.8, wall=61224 2021-06-19 11:39:21 | INFO | train_inner | epoch 002: 2431 / 3002 loss=2.694, ppl=6.47, wps=5854.9, ups=0.09, wpb=64830, bsz=128, num_updates=5398, lr=9.99648e-05, gnorm=2.301, loss_scale=8, train_wall=11, gb_free=2.8, wall=61235 2021-06-19 11:39:32 | INFO | train_inner | epoch 002: 2432 / 3002 loss=2.647, ppl=6.26, wps=5897.3, ups=0.09, wpb=64770, bsz=128, num_updates=5399, lr=9.99648e-05, gnorm=2.307, loss_scale=8, train_wall=11, gb_free=2.8, wall=61246 2021-06-19 11:39:43 | INFO | train_inner | epoch 002: 2433 / 3002 loss=2.695, ppl=6.48, wps=5849.6, ups=0.09, wpb=64886, bsz=128, num_updates=5400, lr=9.99648e-05, gnorm=3.639, loss_scale=8, train_wall=11, gb_free=2.8, wall=61257 2021-06-19 11:39:54 | INFO | train_inner | epoch 002: 2434 / 3002 loss=2.743, ppl=6.69, wps=5815, ups=0.09, wpb=64851, bsz=128, num_updates=5401, lr=9.99648e-05, gnorm=2.326, loss_scale=8, train_wall=11, gb_free=2.8, wall=61269 2021-06-19 11:40:05 | INFO | train_inner | epoch 002: 2435 / 3002 loss=2.659, ppl=6.32, wps=5854.7, ups=0.09, wpb=64816, bsz=128, num_updates=5402, lr=9.99648e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=61280 2021-06-19 11:40:16 | INFO | train_inner | epoch 002: 2436 / 3002 loss=2.583, ppl=5.99, wps=5804.6, ups=0.09, wpb=64793, bsz=128, num_updates=5403, lr=9.99648e-05, gnorm=2.15, loss_scale=8, train_wall=11, gb_free=2.8, wall=61291 2021-06-19 11:40:28 | INFO | train_inner | epoch 002: 2437 / 3002 loss=2.545, ppl=5.84, wps=5829.3, ups=0.09, wpb=64792, bsz=128, num_updates=5404, lr=9.99648e-05, gnorm=2.19, loss_scale=8, train_wall=11, gb_free=2.8, wall=61302 2021-06-19 11:40:39 | INFO | train_inner | epoch 002: 2438 / 3002 loss=2.868, ppl=7.3, wps=5804.5, ups=0.09, wpb=64860, bsz=128, num_updates=5405, lr=9.99648e-05, gnorm=2.268, loss_scale=8, train_wall=11, gb_free=2.8, wall=61313 2021-06-19 11:40:50 | INFO | train_inner | epoch 002: 2439 / 3002 loss=2.694, ppl=6.47, wps=5988.4, ups=0.09, wpb=64854, bsz=128, num_updates=5406, lr=9.99647e-05, gnorm=2.264, loss_scale=8, train_wall=10, gb_free=2.8, wall=61324 2021-06-19 11:41:01 | INFO | train_inner | epoch 002: 2440 / 3002 loss=2.753, ppl=6.74, wps=5873.3, ups=0.09, wpb=64819, bsz=128, num_updates=5407, lr=9.99647e-05, gnorm=2.171, loss_scale=8, train_wall=11, gb_free=2.8, wall=61335 2021-06-19 11:41:12 | INFO | train_inner | epoch 002: 2441 / 3002 loss=2.662, ppl=6.33, wps=5856.6, ups=0.09, wpb=64792, bsz=128, num_updates=5408, lr=9.99647e-05, gnorm=2.247, loss_scale=8, train_wall=11, gb_free=2.8, wall=61346 2021-06-19 11:41:23 | INFO | train_inner | epoch 002: 2442 / 3002 loss=2.769, ppl=6.82, wps=5766.8, ups=0.09, wpb=64887, bsz=128, num_updates=5409, lr=9.99647e-05, gnorm=2.281, loss_scale=8, train_wall=11, gb_free=2.8, wall=61357 2021-06-19 11:41:34 | INFO | train_inner | epoch 002: 2443 / 3002 loss=2.706, ppl=6.52, wps=5864, ups=0.09, wpb=64867, bsz=128, num_updates=5410, lr=9.99647e-05, gnorm=2.304, loss_scale=8, train_wall=11, gb_free=2.8, wall=61368 2021-06-19 11:41:45 | INFO | train_inner | epoch 002: 2444 / 3002 loss=2.78, ppl=6.87, wps=5864.8, ups=0.09, wpb=64821, bsz=128, num_updates=5411, lr=9.99647e-05, gnorm=2.166, loss_scale=8, train_wall=11, gb_free=2.8, wall=61379 2021-06-19 11:41:56 | INFO | train_inner | epoch 002: 2445 / 3002 loss=2.811, ppl=7.02, wps=5746.1, ups=0.09, wpb=64703, bsz=128, num_updates=5412, lr=9.99647e-05, gnorm=2.34, loss_scale=8, train_wall=11, gb_free=2.8, wall=61391 2021-06-19 11:42:07 | INFO | train_inner | epoch 002: 2446 / 3002 loss=2.83, ppl=7.11, wps=5789.3, ups=0.09, wpb=64864, bsz=128, num_updates=5413, lr=9.99647e-05, gnorm=2.244, loss_scale=8, train_wall=11, gb_free=2.8, wall=61402 2021-06-19 11:42:19 | INFO | train_inner | epoch 002: 2447 / 3002 loss=2.595, ppl=6.04, wps=5829.6, ups=0.09, wpb=64844, bsz=128, num_updates=5414, lr=9.99647e-05, gnorm=2.165, loss_scale=8, train_wall=11, gb_free=2.8, wall=61413 2021-06-19 11:42:30 | INFO | train_inner | epoch 002: 2448 / 3002 loss=2.811, ppl=7.02, wps=5872.3, ups=0.09, wpb=64817, bsz=128, num_updates=5415, lr=9.99647e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=61424 2021-06-19 11:42:41 | INFO | train_inner | epoch 002: 2449 / 3002 loss=2.912, ppl=7.53, wps=5830.8, ups=0.09, wpb=64762, bsz=128, num_updates=5416, lr=9.99647e-05, gnorm=2.313, loss_scale=8, train_wall=11, gb_free=2.8, wall=61435 2021-06-19 11:42:52 | INFO | train_inner | epoch 002: 2450 / 3002 loss=2.653, ppl=6.29, wps=5733.5, ups=0.09, wpb=64763, bsz=128, num_updates=5417, lr=9.99647e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=61446 2021-06-19 11:43:03 | INFO | train_inner | epoch 002: 2451 / 3002 loss=2.595, ppl=6.04, wps=5828.7, ups=0.09, wpb=64837, bsz=128, num_updates=5418, lr=9.99647e-05, gnorm=2.164, loss_scale=8, train_wall=11, gb_free=2.8, wall=61458 2021-06-19 11:43:14 | INFO | train_inner | epoch 002: 2452 / 3002 loss=2.827, ppl=7.1, wps=5920.2, ups=0.09, wpb=64857, bsz=128, num_updates=5419, lr=9.99646e-05, gnorm=2.223, loss_scale=8, train_wall=11, gb_free=2.8, wall=61468 2021-06-19 11:43:25 | INFO | train_inner | epoch 002: 2453 / 3002 loss=2.741, ppl=6.68, wps=6037, ups=0.09, wpb=64868, bsz=128, num_updates=5420, lr=9.99646e-05, gnorm=2.237, loss_scale=8, train_wall=10, gb_free=2.8, wall=61479 2021-06-19 11:43:36 | INFO | train_inner | epoch 002: 2454 / 3002 loss=2.651, ppl=6.28, wps=5791.6, ups=0.09, wpb=64859, bsz=128, num_updates=5421, lr=9.99646e-05, gnorm=2.12, loss_scale=8, train_wall=11, gb_free=2.8, wall=61490 2021-06-19 11:43:47 | INFO | train_inner | epoch 002: 2455 / 3002 loss=2.66, ppl=6.32, wps=5881.6, ups=0.09, wpb=64813, bsz=128, num_updates=5422, lr=9.99646e-05, gnorm=2.359, loss_scale=8, train_wall=11, gb_free=2.8, wall=61501 2021-06-19 11:43:58 | INFO | train_inner | epoch 002: 2456 / 3002 loss=2.754, ppl=6.74, wps=5874.3, ups=0.09, wpb=64838, bsz=128, num_updates=5423, lr=9.99646e-05, gnorm=2.244, loss_scale=8, train_wall=11, gb_free=2.8, wall=61512 2021-06-19 11:44:09 | INFO | train_inner | epoch 002: 2457 / 3002 loss=2.647, ppl=6.26, wps=5876.2, ups=0.09, wpb=64843, bsz=128, num_updates=5424, lr=9.99646e-05, gnorm=2.245, loss_scale=8, train_wall=11, gb_free=2.8, wall=61523 2021-06-19 11:44:20 | INFO | train_inner | epoch 002: 2458 / 3002 loss=2.801, ppl=6.97, wps=5749.2, ups=0.09, wpb=64827, bsz=128, num_updates=5425, lr=9.99646e-05, gnorm=2.149, loss_scale=8, train_wall=11, gb_free=2.8, wall=61535 2021-06-19 11:44:32 | INFO | train_inner | epoch 002: 2459 / 3002 loss=2.836, ppl=7.14, wps=5855.7, ups=0.09, wpb=64908, bsz=128, num_updates=5426, lr=9.99646e-05, gnorm=2.227, loss_scale=8, train_wall=11, gb_free=2.8, wall=61546 2021-06-19 11:44:43 | INFO | train_inner | epoch 002: 2460 / 3002 loss=2.743, ppl=6.69, wps=5792.2, ups=0.09, wpb=64828, bsz=128, num_updates=5427, lr=9.99646e-05, gnorm=2.289, loss_scale=8, train_wall=11, gb_free=2.8, wall=61557 2021-06-19 11:44:54 | INFO | train_inner | epoch 002: 2461 / 3002 loss=2.819, ppl=7.05, wps=5916.8, ups=0.09, wpb=64849, bsz=128, num_updates=5428, lr=9.99646e-05, gnorm=2.084, loss_scale=8, train_wall=11, gb_free=2.8, wall=61568 2021-06-19 11:45:05 | INFO | train_inner | epoch 002: 2462 / 3002 loss=2.705, ppl=6.52, wps=5824.6, ups=0.09, wpb=64743, bsz=128, num_updates=5429, lr=9.99646e-05, gnorm=2.406, loss_scale=8, train_wall=11, gb_free=2.8, wall=61579 2021-06-19 11:45:16 | INFO | train_inner | epoch 002: 2463 / 3002 loss=2.702, ppl=6.51, wps=5818.6, ups=0.09, wpb=64851, bsz=128, num_updates=5430, lr=9.99646e-05, gnorm=2.503, loss_scale=8, train_wall=11, gb_free=2.8, wall=61590 2021-06-19 11:45:27 | INFO | train_inner | epoch 002: 2464 / 3002 loss=2.776, ppl=6.85, wps=5794.5, ups=0.09, wpb=64896, bsz=128, num_updates=5431, lr=9.99645e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=61601 2021-06-19 11:45:38 | INFO | train_inner | epoch 002: 2465 / 3002 loss=2.623, ppl=6.16, wps=5901.7, ups=0.09, wpb=64914, bsz=128, num_updates=5432, lr=9.99645e-05, gnorm=2.33, loss_scale=8, train_wall=11, gb_free=2.8, wall=61612 2021-06-19 11:45:49 | INFO | train_inner | epoch 002: 2466 / 3002 loss=2.687, ppl=6.44, wps=5956.3, ups=0.09, wpb=64855, bsz=128, num_updates=5433, lr=9.99645e-05, gnorm=2.237, loss_scale=8, train_wall=10, gb_free=2.8, wall=61623 2021-06-19 11:46:00 | INFO | train_inner | epoch 002: 2467 / 3002 loss=2.697, ppl=6.49, wps=5814.5, ups=0.09, wpb=64781, bsz=128, num_updates=5434, lr=9.99645e-05, gnorm=2.258, loss_scale=8, train_wall=11, gb_free=2.8, wall=61635 2021-06-19 11:46:11 | INFO | train_inner | epoch 002: 2468 / 3002 loss=2.714, ppl=6.56, wps=5798.6, ups=0.09, wpb=64796, bsz=128, num_updates=5435, lr=9.99645e-05, gnorm=2.205, loss_scale=8, train_wall=11, gb_free=2.8, wall=61646 2021-06-19 11:46:22 | INFO | train_inner | epoch 002: 2469 / 3002 loss=2.668, ppl=6.35, wps=5817.2, ups=0.09, wpb=64817, bsz=128, num_updates=5436, lr=9.99645e-05, gnorm=2.15, loss_scale=8, train_wall=11, gb_free=2.8, wall=61657 2021-06-19 11:46:34 | INFO | train_inner | epoch 002: 2470 / 3002 loss=2.648, ppl=6.27, wps=5825, ups=0.09, wpb=64819, bsz=128, num_updates=5437, lr=9.99645e-05, gnorm=2.147, loss_scale=8, train_wall=11, gb_free=2.8, wall=61668 2021-06-19 11:46:45 | INFO | train_inner | epoch 002: 2471 / 3002 loss=2.706, ppl=6.52, wps=5661.6, ups=0.09, wpb=64785, bsz=128, num_updates=5438, lr=9.99645e-05, gnorm=2.132, loss_scale=8, train_wall=11, gb_free=2.8, wall=61679 2021-06-19 11:46:56 | INFO | train_inner | epoch 002: 2472 / 3002 loss=2.808, ppl=7, wps=5870.4, ups=0.09, wpb=64896, bsz=128, num_updates=5439, lr=9.99645e-05, gnorm=2.538, loss_scale=8, train_wall=11, gb_free=2.8, wall=61690 2021-06-19 11:47:07 | INFO | train_inner | epoch 002: 2473 / 3002 loss=2.749, ppl=6.72, wps=5870.3, ups=0.09, wpb=64854, bsz=128, num_updates=5440, lr=9.99645e-05, gnorm=2.26, loss_scale=8, train_wall=11, gb_free=2.8, wall=61701 2021-06-19 11:47:18 | INFO | train_inner | epoch 002: 2474 / 3002 loss=2.805, ppl=6.99, wps=5935, ups=0.09, wpb=64885, bsz=128, num_updates=5441, lr=9.99645e-05, gnorm=2.253, loss_scale=8, train_wall=10, gb_free=2.8, wall=61712 2021-06-19 11:47:29 | INFO | train_inner | epoch 002: 2475 / 3002 loss=2.734, ppl=6.65, wps=5959.7, ups=0.09, wpb=64733, bsz=128, num_updates=5442, lr=9.99645e-05, gnorm=2.687, loss_scale=8, train_wall=10, gb_free=2.8, wall=61723 2021-06-19 11:47:40 | INFO | train_inner | epoch 002: 2476 / 3002 loss=2.665, ppl=6.34, wps=5990.9, ups=0.09, wpb=64834, bsz=128, num_updates=5443, lr=9.99645e-05, gnorm=2.29, loss_scale=8, train_wall=10, gb_free=2.8, wall=61734 2021-06-19 11:47:51 | INFO | train_inner | epoch 002: 2477 / 3002 loss=2.735, ppl=6.66, wps=5883.2, ups=0.09, wpb=64834, bsz=128, num_updates=5444, lr=9.99644e-05, gnorm=3.089, loss_scale=8, train_wall=11, gb_free=2.8, wall=61745 2021-06-19 11:48:02 | INFO | train_inner | epoch 002: 2478 / 3002 loss=2.736, ppl=6.66, wps=5838.4, ups=0.09, wpb=64853, bsz=128, num_updates=5445, lr=9.99644e-05, gnorm=2.117, loss_scale=8, train_wall=11, gb_free=2.8, wall=61756 2021-06-19 11:48:13 | INFO | train_inner | epoch 002: 2479 / 3002 loss=2.815, ppl=7.04, wps=5939, ups=0.09, wpb=64822, bsz=128, num_updates=5446, lr=9.99644e-05, gnorm=2.327, loss_scale=8, train_wall=10, gb_free=2.8, wall=61767 2021-06-19 11:48:24 | INFO | train_inner | epoch 002: 2480 / 3002 loss=2.725, ppl=6.61, wps=5932.8, ups=0.09, wpb=64810, bsz=128, num_updates=5447, lr=9.99644e-05, gnorm=2.325, loss_scale=8, train_wall=10, gb_free=2.8, wall=61778 2021-06-19 11:48:35 | INFO | train_inner | epoch 002: 2481 / 3002 loss=2.631, ppl=6.19, wps=5938.2, ups=0.09, wpb=64882, bsz=128, num_updates=5448, lr=9.99644e-05, gnorm=2.067, loss_scale=8, train_wall=10, gb_free=2.8, wall=61789 2021-06-19 11:48:46 | INFO | train_inner | epoch 002: 2482 / 3002 loss=2.755, ppl=6.75, wps=5873.7, ups=0.09, wpb=64809, bsz=128, num_updates=5449, lr=9.99644e-05, gnorm=2.352, loss_scale=8, train_wall=11, gb_free=2.8, wall=61800 2021-06-19 11:48:57 | INFO | train_inner | epoch 002: 2483 / 3002 loss=2.535, ppl=5.79, wps=5821.1, ups=0.09, wpb=64886, bsz=128, num_updates=5450, lr=9.99644e-05, gnorm=2.294, loss_scale=8, train_wall=11, gb_free=2.8, wall=61811 2021-06-19 11:49:08 | INFO | train_inner | epoch 002: 2484 / 3002 loss=2.871, ppl=7.32, wps=5822.7, ups=0.09, wpb=64725, bsz=128, num_updates=5451, lr=9.99644e-05, gnorm=2.355, loss_scale=8, train_wall=11, gb_free=2.8, wall=61822 2021-06-19 11:49:19 | INFO | train_inner | epoch 002: 2485 / 3002 loss=2.787, ppl=6.9, wps=5801.1, ups=0.09, wpb=64890, bsz=128, num_updates=5452, lr=9.99644e-05, gnorm=2.414, loss_scale=8, train_wall=11, gb_free=2.8, wall=61833 2021-06-19 11:49:30 | INFO | train_inner | epoch 002: 2486 / 3002 loss=2.895, ppl=7.44, wps=5926.3, ups=0.09, wpb=64923, bsz=128, num_updates=5453, lr=9.99644e-05, gnorm=2.296, loss_scale=8, train_wall=11, gb_free=2.8, wall=61844 2021-06-19 11:49:41 | INFO | train_inner | epoch 002: 2487 / 3002 loss=2.741, ppl=6.68, wps=5914.3, ups=0.09, wpb=64862, bsz=128, num_updates=5454, lr=9.99644e-05, gnorm=2.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=61855 2021-06-19 11:49:52 | INFO | train_inner | epoch 002: 2488 / 3002 loss=2.802, ppl=6.98, wps=5919.7, ups=0.09, wpb=64880, bsz=128, num_updates=5455, lr=9.99644e-05, gnorm=2.258, loss_scale=8, train_wall=11, gb_free=2.8, wall=61866 2021-06-19 11:50:03 | INFO | train_inner | epoch 002: 2489 / 3002 loss=2.767, ppl=6.81, wps=5850.5, ups=0.09, wpb=64860, bsz=128, num_updates=5456, lr=9.99643e-05, gnorm=2.246, loss_scale=8, train_wall=11, gb_free=2.8, wall=61877 2021-06-19 11:50:14 | INFO | train_inner | epoch 002: 2490 / 3002 loss=2.686, ppl=6.44, wps=5796.7, ups=0.09, wpb=64764, bsz=128, num_updates=5457, lr=9.99643e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=61889 2021-06-19 11:50:25 | INFO | train_inner | epoch 002: 2491 / 3002 loss=2.652, ppl=6.29, wps=5916.9, ups=0.09, wpb=64745, bsz=128, num_updates=5458, lr=9.99643e-05, gnorm=2.349, loss_scale=8, train_wall=10, gb_free=2.8, wall=61900 2021-06-19 11:50:36 | INFO | train_inner | epoch 002: 2492 / 3002 loss=2.583, ppl=5.99, wps=5764.3, ups=0.09, wpb=64863, bsz=128, num_updates=5459, lr=9.99643e-05, gnorm=2.434, loss_scale=8, train_wall=11, gb_free=2.8, wall=61911 2021-06-19 11:50:48 | INFO | train_inner | epoch 002: 2493 / 3002 loss=2.701, ppl=6.5, wps=5843.6, ups=0.09, wpb=64863, bsz=128, num_updates=5460, lr=9.99643e-05, gnorm=2.372, loss_scale=8, train_wall=11, gb_free=2.8, wall=61922 2021-06-19 11:50:59 | INFO | train_inner | epoch 002: 2494 / 3002 loss=2.653, ppl=6.29, wps=5875.8, ups=0.09, wpb=64856, bsz=128, num_updates=5461, lr=9.99643e-05, gnorm=2.809, loss_scale=8, train_wall=11, gb_free=2.8, wall=61933 2021-06-19 11:51:10 | INFO | train_inner | epoch 002: 2495 / 3002 loss=2.809, ppl=7.01, wps=5845, ups=0.09, wpb=64771, bsz=128, num_updates=5462, lr=9.99643e-05, gnorm=2.287, loss_scale=8, train_wall=11, gb_free=2.8, wall=61944 2021-06-19 11:51:21 | INFO | train_inner | epoch 002: 2496 / 3002 loss=2.703, ppl=6.51, wps=5751.2, ups=0.09, wpb=64827, bsz=128, num_updates=5463, lr=9.99643e-05, gnorm=2.259, loss_scale=8, train_wall=11, gb_free=2.8, wall=61955 2021-06-19 11:51:32 | INFO | train_inner | epoch 002: 2497 / 3002 loss=2.867, ppl=7.29, wps=6069.3, ups=0.09, wpb=64863, bsz=128, num_updates=5464, lr=9.99643e-05, gnorm=2.343, loss_scale=8, train_wall=10, gb_free=2.8, wall=61966 2021-06-19 11:51:43 | INFO | train_inner | epoch 002: 2498 / 3002 loss=2.687, ppl=6.44, wps=5718.5, ups=0.09, wpb=64838, bsz=128, num_updates=5465, lr=9.99643e-05, gnorm=2.404, loss_scale=8, train_wall=11, gb_free=2.8, wall=61977 2021-06-19 11:51:54 | INFO | train_inner | epoch 002: 2499 / 3002 loss=2.644, ppl=6.25, wps=5720.2, ups=0.09, wpb=64886, bsz=128, num_updates=5466, lr=9.99643e-05, gnorm=2.276, loss_scale=8, train_wall=11, gb_free=2.8, wall=61989 2021-06-19 11:52:05 | INFO | train_inner | epoch 002: 2500 / 3002 loss=2.61, ppl=6.11, wps=5832.3, ups=0.09, wpb=64823, bsz=128, num_updates=5467, lr=9.99643e-05, gnorm=2.444, loss_scale=8, train_wall=11, gb_free=2.8, wall=62000 2021-06-19 11:52:16 | INFO | train_inner | epoch 002: 2501 / 3002 loss=2.654, ppl=6.29, wps=5908.4, ups=0.09, wpb=64923, bsz=128, num_updates=5468, lr=9.99643e-05, gnorm=3.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=62011 2021-06-19 11:52:28 | INFO | train_inner | epoch 002: 2502 / 3002 loss=2.589, ppl=6.02, wps=5810.7, ups=0.09, wpb=64834, bsz=128, num_updates=5469, lr=9.99642e-05, gnorm=2.199, loss_scale=8, train_wall=11, gb_free=2.8, wall=62022 2021-06-19 11:52:39 | INFO | train_inner | epoch 002: 2503 / 3002 loss=2.767, ppl=6.81, wps=5949.8, ups=0.09, wpb=64890, bsz=128, num_updates=5470, lr=9.99642e-05, gnorm=2.44, loss_scale=8, train_wall=10, gb_free=2.8, wall=62033 2021-06-19 11:52:50 | INFO | train_inner | epoch 002: 2504 / 3002 loss=2.673, ppl=6.38, wps=5860.6, ups=0.09, wpb=64881, bsz=128, num_updates=5471, lr=9.99642e-05, gnorm=2.295, loss_scale=8, train_wall=11, gb_free=2.8, wall=62044 2021-06-19 11:53:01 | INFO | train_inner | epoch 002: 2505 / 3002 loss=2.732, ppl=6.64, wps=5808.5, ups=0.09, wpb=64787, bsz=128, num_updates=5472, lr=9.99642e-05, gnorm=2.607, loss_scale=8, train_wall=11, gb_free=2.8, wall=62055 2021-06-19 11:53:12 | INFO | train_inner | epoch 002: 2506 / 3002 loss=2.675, ppl=6.39, wps=5915.2, ups=0.09, wpb=64867, bsz=128, num_updates=5473, lr=9.99642e-05, gnorm=2.24, loss_scale=8, train_wall=10, gb_free=2.8, wall=62066 2021-06-19 11:53:23 | INFO | train_inner | epoch 002: 2507 / 3002 loss=2.819, ppl=7.06, wps=5865.4, ups=0.09, wpb=64897, bsz=128, num_updates=5474, lr=9.99642e-05, gnorm=2.259, loss_scale=8, train_wall=11, gb_free=2.8, wall=62077 2021-06-19 11:53:34 | INFO | train_inner | epoch 002: 2508 / 3002 loss=2.737, ppl=6.67, wps=5798.5, ups=0.09, wpb=64903, bsz=128, num_updates=5475, lr=9.99642e-05, gnorm=2.216, loss_scale=8, train_wall=11, gb_free=2.8, wall=62088 2021-06-19 11:53:45 | INFO | train_inner | epoch 002: 2509 / 3002 loss=2.76, ppl=6.77, wps=5893.6, ups=0.09, wpb=64781, bsz=128, num_updates=5476, lr=9.99642e-05, gnorm=2.358, loss_scale=8, train_wall=11, gb_free=2.8, wall=62099 2021-06-19 11:53:56 | INFO | train_inner | epoch 002: 2510 / 3002 loss=2.776, ppl=6.85, wps=5825, ups=0.09, wpb=64843, bsz=128, num_updates=5477, lr=9.99642e-05, gnorm=2.431, loss_scale=8, train_wall=11, gb_free=2.8, wall=62110 2021-06-19 11:54:07 | INFO | train_inner | epoch 002: 2511 / 3002 loss=2.724, ppl=6.61, wps=5869, ups=0.09, wpb=64851, bsz=128, num_updates=5478, lr=9.99642e-05, gnorm=2.291, loss_scale=8, train_wall=11, gb_free=2.8, wall=62121 2021-06-19 11:54:18 | INFO | train_inner | epoch 002: 2512 / 3002 loss=2.816, ppl=7.04, wps=5811, ups=0.09, wpb=64877, bsz=128, num_updates=5479, lr=9.99642e-05, gnorm=2.479, loss_scale=8, train_wall=11, gb_free=2.8, wall=62133 2021-06-19 11:54:29 | INFO | train_inner | epoch 002: 2513 / 3002 loss=2.682, ppl=6.42, wps=5976.8, ups=0.09, wpb=64776, bsz=128, num_updates=5480, lr=9.99642e-05, gnorm=2.261, loss_scale=8, train_wall=10, gb_free=2.8, wall=62143 2021-06-19 11:54:40 | INFO | train_inner | epoch 002: 2514 / 3002 loss=2.618, ppl=6.14, wps=5862.4, ups=0.09, wpb=64920, bsz=128, num_updates=5481, lr=9.99641e-05, gnorm=2.11, loss_scale=8, train_wall=11, gb_free=2.8, wall=62155 2021-06-19 11:54:51 | INFO | train_inner | epoch 002: 2515 / 3002 loss=2.77, ppl=6.82, wps=5876.8, ups=0.09, wpb=64842, bsz=128, num_updates=5482, lr=9.99641e-05, gnorm=2.204, loss_scale=8, train_wall=11, gb_free=2.8, wall=62166 2021-06-19 11:55:02 | INFO | train_inner | epoch 002: 2516 / 3002 loss=2.667, ppl=6.35, wps=5853.2, ups=0.09, wpb=64852, bsz=128, num_updates=5483, lr=9.99641e-05, gnorm=2.19, loss_scale=8, train_wall=11, gb_free=2.8, wall=62177 2021-06-19 11:55:13 | INFO | train_inner | epoch 002: 2517 / 3002 loss=2.752, ppl=6.74, wps=5856.9, ups=0.09, wpb=64828, bsz=128, num_updates=5484, lr=9.99641e-05, gnorm=2.252, loss_scale=8, train_wall=11, gb_free=2.8, wall=62188 2021-06-19 11:55:24 | INFO | train_inner | epoch 002: 2518 / 3002 loss=2.541, ppl=5.82, wps=5890.9, ups=0.09, wpb=64815, bsz=128, num_updates=5485, lr=9.99641e-05, gnorm=2.323, loss_scale=8, train_wall=11, gb_free=2.8, wall=62199 2021-06-19 11:55:36 | INFO | train_inner | epoch 002: 2519 / 3002 loss=2.75, ppl=6.73, wps=5776, ups=0.09, wpb=64886, bsz=128, num_updates=5486, lr=9.99641e-05, gnorm=2.374, loss_scale=8, train_wall=11, gb_free=2.8, wall=62210 2021-06-19 11:55:46 | INFO | train_inner | epoch 002: 2520 / 3002 loss=2.749, ppl=6.72, wps=5989.3, ups=0.09, wpb=64800, bsz=128, num_updates=5487, lr=9.99641e-05, gnorm=2.204, loss_scale=8, train_wall=10, gb_free=2.8, wall=62221 2021-06-19 11:55:58 | INFO | train_inner | epoch 002: 2521 / 3002 loss=2.738, ppl=6.67, wps=5832.6, ups=0.09, wpb=64778, bsz=128, num_updates=5488, lr=9.99641e-05, gnorm=2.216, loss_scale=8, train_wall=11, gb_free=2.8, wall=62232 2021-06-19 11:56:09 | INFO | train_inner | epoch 002: 2522 / 3002 loss=2.702, ppl=6.51, wps=5838.4, ups=0.09, wpb=64724, bsz=128, num_updates=5489, lr=9.99641e-05, gnorm=3.194, loss_scale=8, train_wall=11, gb_free=2.8, wall=62243 2021-06-19 11:56:20 | INFO | train_inner | epoch 002: 2523 / 3002 loss=2.774, ppl=6.84, wps=5767.2, ups=0.09, wpb=64837, bsz=128, num_updates=5490, lr=9.99641e-05, gnorm=2.291, loss_scale=8, train_wall=11, gb_free=2.8, wall=62254 2021-06-19 11:56:31 | INFO | train_inner | epoch 002: 2524 / 3002 loss=2.69, ppl=6.45, wps=5908, ups=0.09, wpb=64772, bsz=128, num_updates=5491, lr=9.99641e-05, gnorm=2.312, loss_scale=8, train_wall=10, gb_free=2.8, wall=62265 2021-06-19 11:56:42 | INFO | train_inner | epoch 002: 2525 / 3002 loss=2.833, ppl=7.13, wps=5812.6, ups=0.09, wpb=64879, bsz=128, num_updates=5492, lr=9.99641e-05, gnorm=2.25, loss_scale=8, train_wall=11, gb_free=2.8, wall=62276 2021-06-19 11:56:53 | INFO | train_inner | epoch 002: 2526 / 3002 loss=2.865, ppl=7.29, wps=5742.5, ups=0.09, wpb=64805, bsz=128, num_updates=5493, lr=9.99641e-05, gnorm=2.221, loss_scale=16, train_wall=11, gb_free=2.8, wall=62288 2021-06-19 11:57:05 | INFO | train_inner | epoch 002: 2527 / 3002 loss=2.735, ppl=6.66, wps=5795.9, ups=0.09, wpb=64898, bsz=128, num_updates=5494, lr=9.9964e-05, gnorm=2.175, loss_scale=16, train_wall=11, gb_free=2.8, wall=62299 2021-06-19 11:57:16 | INFO | train_inner | epoch 002: 2528 / 3002 loss=2.794, ppl=6.94, wps=5888.9, ups=0.09, wpb=64822, bsz=128, num_updates=5495, lr=9.9964e-05, gnorm=2.206, loss_scale=16, train_wall=11, gb_free=2.8, wall=62310 2021-06-19 11:57:27 | INFO | train_inner | epoch 002: 2529 / 3002 loss=2.704, ppl=6.51, wps=5719.4, ups=0.09, wpb=64797, bsz=128, num_updates=5496, lr=9.9964e-05, gnorm=2.224, loss_scale=16, train_wall=11, gb_free=2.8, wall=62321 2021-06-19 11:57:38 | INFO | train_inner | epoch 002: 2530 / 3002 loss=2.596, ppl=6.05, wps=5912.5, ups=0.09, wpb=64821, bsz=128, num_updates=5497, lr=9.9964e-05, gnorm=2.215, loss_scale=16, train_wall=11, gb_free=2.8, wall=62332 2021-06-19 11:57:49 | INFO | train_inner | epoch 002: 2531 / 3002 loss=2.615, ppl=6.13, wps=5786.7, ups=0.09, wpb=64829, bsz=128, num_updates=5498, lr=9.9964e-05, gnorm=2.865, loss_scale=16, train_wall=11, gb_free=2.8, wall=62343 2021-06-19 11:58:00 | INFO | train_inner | epoch 002: 2532 / 3002 loss=2.712, ppl=6.55, wps=5845.7, ups=0.09, wpb=64776, bsz=128, num_updates=5499, lr=9.9964e-05, gnorm=2.339, loss_scale=16, train_wall=11, gb_free=2.8, wall=62354 2021-06-19 11:58:11 | INFO | train_inner | epoch 002: 2533 / 3002 loss=2.699, ppl=6.49, wps=5800.5, ups=0.09, wpb=64871, bsz=128, num_updates=5500, lr=9.9964e-05, gnorm=2.8, loss_scale=16, train_wall=11, gb_free=2.8, wall=62366 2021-06-19 11:58:22 | INFO | train_inner | epoch 002: 2534 / 3002 loss=2.74, ppl=6.68, wps=5907.7, ups=0.09, wpb=64903, bsz=128, num_updates=5501, lr=9.9964e-05, gnorm=2.25, loss_scale=16, train_wall=11, gb_free=2.8, wall=62377 2021-06-19 11:58:33 | INFO | train_inner | epoch 002: 2535 / 3002 loss=2.701, ppl=6.5, wps=5789.6, ups=0.09, wpb=64861, bsz=128, num_updates=5502, lr=9.9964e-05, gnorm=2.834, loss_scale=16, train_wall=11, gb_free=2.8, wall=62388 2021-06-19 11:58:45 | INFO | train_inner | epoch 002: 2536 / 3002 loss=2.575, ppl=5.96, wps=5830.6, ups=0.09, wpb=64798, bsz=128, num_updates=5503, lr=9.9964e-05, gnorm=2.254, loss_scale=16, train_wall=11, gb_free=2.8, wall=62399 2021-06-19 11:58:55 | INFO | train_inner | epoch 002: 2537 / 3002 loss=2.676, ppl=6.39, wps=5939.5, ups=0.09, wpb=64841, bsz=128, num_updates=5504, lr=9.9964e-05, gnorm=2.306, loss_scale=16, train_wall=10, gb_free=2.8, wall=62410 2021-06-19 11:59:07 | INFO | train_inner | epoch 002: 2538 / 3002 loss=2.66, ppl=6.32, wps=5788.2, ups=0.09, wpb=64773, bsz=128, num_updates=5505, lr=9.9964e-05, gnorm=2.28, loss_scale=16, train_wall=11, gb_free=2.8, wall=62421 2021-06-19 11:59:18 | INFO | train_inner | epoch 002: 2539 / 3002 loss=2.685, ppl=6.43, wps=5783.8, ups=0.09, wpb=64793, bsz=128, num_updates=5506, lr=9.99639e-05, gnorm=2.212, loss_scale=16, train_wall=11, gb_free=2.8, wall=62432 2021-06-19 11:59:29 | INFO | train_inner | epoch 002: 2540 / 3002 loss=2.877, ppl=7.34, wps=5815.9, ups=0.09, wpb=64776, bsz=128, num_updates=5507, lr=9.99639e-05, gnorm=2.258, loss_scale=16, train_wall=11, gb_free=2.8, wall=62443 2021-06-19 11:59:40 | INFO | train_inner | epoch 002: 2541 / 3002 loss=2.677, ppl=6.39, wps=5919.6, ups=0.09, wpb=64953, bsz=128, num_updates=5508, lr=9.99639e-05, gnorm=2.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=62454 2021-06-19 11:59:51 | INFO | train_inner | epoch 002: 2542 / 3002 loss=2.769, ppl=6.82, wps=5838.4, ups=0.09, wpb=64816, bsz=128, num_updates=5509, lr=9.99639e-05, gnorm=2.216, loss_scale=16, train_wall=11, gb_free=2.8, wall=62465 2021-06-19 12:00:02 | INFO | train_inner | epoch 002: 2543 / 3002 loss=2.738, ppl=6.67, wps=5910.8, ups=0.09, wpb=64843, bsz=128, num_updates=5510, lr=9.99639e-05, gnorm=2.32, loss_scale=16, train_wall=11, gb_free=2.8, wall=62476 2021-06-19 12:00:13 | INFO | train_inner | epoch 002: 2544 / 3002 loss=2.697, ppl=6.49, wps=6018.5, ups=0.09, wpb=64794, bsz=128, num_updates=5511, lr=9.99639e-05, gnorm=2.211, loss_scale=16, train_wall=10, gb_free=2.8, wall=62487 2021-06-19 12:00:24 | INFO | train_inner | epoch 002: 2545 / 3002 loss=2.651, ppl=6.28, wps=5808.9, ups=0.09, wpb=64861, bsz=128, num_updates=5512, lr=9.99639e-05, gnorm=2.319, loss_scale=16, train_wall=11, gb_free=2.8, wall=62498 2021-06-19 12:00:35 | INFO | train_inner | epoch 002: 2546 / 3002 loss=2.742, ppl=6.69, wps=5950.6, ups=0.09, wpb=64842, bsz=128, num_updates=5513, lr=9.99639e-05, gnorm=2.222, loss_scale=16, train_wall=10, gb_free=2.8, wall=62509 2021-06-19 12:00:46 | INFO | train_inner | epoch 002: 2547 / 3002 loss=2.758, ppl=6.77, wps=5757, ups=0.09, wpb=64749, bsz=128, num_updates=5514, lr=9.99639e-05, gnorm=2.231, loss_scale=16, train_wall=11, gb_free=2.8, wall=62520 2021-06-19 12:00:57 | INFO | train_inner | epoch 002: 2548 / 3002 loss=2.764, ppl=6.79, wps=5920.8, ups=0.09, wpb=64900, bsz=128, num_updates=5515, lr=9.99639e-05, gnorm=2.16, loss_scale=16, train_wall=10, gb_free=2.8, wall=62531 2021-06-19 12:01:08 | INFO | train_inner | epoch 002: 2549 / 3002 loss=2.829, ppl=7.1, wps=5752.7, ups=0.09, wpb=64841, bsz=128, num_updates=5516, lr=9.99639e-05, gnorm=2.151, loss_scale=16, train_wall=11, gb_free=2.8, wall=62543 2021-06-19 12:01:19 | INFO | train_inner | epoch 002: 2550 / 3002 loss=2.787, ppl=6.9, wps=5835.3, ups=0.09, wpb=64805, bsz=128, num_updates=5517, lr=9.99639e-05, gnorm=2.204, loss_scale=16, train_wall=11, gb_free=2.8, wall=62554 2021-06-19 12:01:30 | INFO | train_inner | epoch 002: 2551 / 3002 loss=2.672, ppl=6.37, wps=5921.9, ups=0.09, wpb=64871, bsz=128, num_updates=5518, lr=9.99639e-05, gnorm=2.079, loss_scale=16, train_wall=10, gb_free=2.8, wall=62565 2021-06-19 12:01:42 | INFO | train_inner | epoch 002: 2552 / 3002 loss=2.553, ppl=5.87, wps=5708.1, ups=0.09, wpb=64804, bsz=128, num_updates=5519, lr=9.99638e-05, gnorm=2.119, loss_scale=16, train_wall=11, gb_free=2.8, wall=62576 2021-06-19 12:01:53 | INFO | train_inner | epoch 002: 2553 / 3002 loss=2.83, ppl=7.11, wps=5761.3, ups=0.09, wpb=64807, bsz=128, num_updates=5520, lr=9.99638e-05, gnorm=2.165, loss_scale=16, train_wall=11, gb_free=2.8, wall=62587 2021-06-19 12:02:04 | INFO | train_inner | epoch 002: 2554 / 3002 loss=2.836, ppl=7.14, wps=5872.6, ups=0.09, wpb=64838, bsz=128, num_updates=5521, lr=9.99638e-05, gnorm=2.256, loss_scale=16, train_wall=11, gb_free=2.8, wall=62598 2021-06-19 12:02:15 | INFO | train_inner | epoch 002: 2555 / 3002 loss=2.547, ppl=5.84, wps=5903.8, ups=0.09, wpb=64743, bsz=128, num_updates=5522, lr=9.99638e-05, gnorm=2.247, loss_scale=16, train_wall=10, gb_free=2.8, wall=62609 2021-06-19 12:02:26 | INFO | train_inner | epoch 002: 2556 / 3002 loss=2.732, ppl=6.64, wps=5789.6, ups=0.09, wpb=64861, bsz=128, num_updates=5523, lr=9.99638e-05, gnorm=2.217, loss_scale=16, train_wall=11, gb_free=2.8, wall=62621 2021-06-19 12:02:37 | INFO | train_inner | epoch 002: 2557 / 3002 loss=2.737, ppl=6.67, wps=5839.2, ups=0.09, wpb=64839, bsz=128, num_updates=5524, lr=9.99638e-05, gnorm=2.291, loss_scale=16, train_wall=11, gb_free=2.8, wall=62632 2021-06-19 12:02:49 | INFO | train_inner | epoch 002: 2558 / 3002 loss=2.851, ppl=7.21, wps=5795.2, ups=0.09, wpb=64838, bsz=128, num_updates=5525, lr=9.99638e-05, gnorm=2.18, loss_scale=16, train_wall=11, gb_free=2.8, wall=62643 2021-06-19 12:03:00 | INFO | train_inner | epoch 002: 2559 / 3002 loss=2.778, ppl=6.86, wps=5779.7, ups=0.09, wpb=64796, bsz=128, num_updates=5526, lr=9.99638e-05, gnorm=2.219, loss_scale=16, train_wall=11, gb_free=2.8, wall=62654 2021-06-19 12:03:11 | INFO | train_inner | epoch 002: 2560 / 3002 loss=2.803, ppl=6.98, wps=5821.4, ups=0.09, wpb=64728, bsz=128, num_updates=5527, lr=9.99638e-05, gnorm=2.133, loss_scale=16, train_wall=11, gb_free=2.8, wall=62665 2021-06-19 12:03:22 | INFO | train_inner | epoch 002: 2561 / 3002 loss=2.588, ppl=6.01, wps=5795.4, ups=0.09, wpb=64817, bsz=128, num_updates=5528, lr=9.99638e-05, gnorm=2.081, loss_scale=16, train_wall=11, gb_free=2.8, wall=62676 2021-06-19 12:03:33 | INFO | train_inner | epoch 002: 2562 / 3002 loss=2.605, ppl=6.08, wps=5778.4, ups=0.09, wpb=64850, bsz=128, num_updates=5529, lr=9.99638e-05, gnorm=2.144, loss_scale=16, train_wall=11, gb_free=2.8, wall=62688 2021-06-19 12:03:45 | INFO | train_inner | epoch 002: 2563 / 3002 loss=2.821, ppl=7.07, wps=5739.5, ups=0.09, wpb=64856, bsz=128, num_updates=5530, lr=9.99638e-05, gnorm=2.159, loss_scale=16, train_wall=11, gb_free=2.8, wall=62699 2021-06-19 12:03:56 | INFO | train_inner | epoch 002: 2564 / 3002 loss=2.664, ppl=6.34, wps=5916.2, ups=0.09, wpb=64896, bsz=128, num_updates=5531, lr=9.99637e-05, gnorm=2.189, loss_scale=16, train_wall=10, gb_free=2.8, wall=62710 2021-06-19 12:04:07 | INFO | train_inner | epoch 002: 2565 / 3002 loss=2.646, ppl=6.26, wps=5900.1, ups=0.09, wpb=64856, bsz=128, num_updates=5532, lr=9.99637e-05, gnorm=2.154, loss_scale=16, train_wall=11, gb_free=2.8, wall=62721 2021-06-19 12:04:18 | INFO | train_inner | epoch 002: 2566 / 3002 loss=2.644, ppl=6.25, wps=5833.6, ups=0.09, wpb=64818, bsz=128, num_updates=5533, lr=9.99637e-05, gnorm=2.243, loss_scale=16, train_wall=11, gb_free=2.8, wall=62732 2021-06-19 12:04:29 | INFO | train_inner | epoch 002: 2567 / 3002 loss=2.751, ppl=6.73, wps=5799.2, ups=0.09, wpb=64860, bsz=128, num_updates=5534, lr=9.99637e-05, gnorm=2.259, loss_scale=16, train_wall=11, gb_free=2.8, wall=62743 2021-06-19 12:04:40 | INFO | train_inner | epoch 002: 2568 / 3002 loss=2.66, ppl=6.32, wps=5862.9, ups=0.09, wpb=64733, bsz=128, num_updates=5535, lr=9.99637e-05, gnorm=2.225, loss_scale=16, train_wall=11, gb_free=2.8, wall=62754 2021-06-19 12:04:51 | INFO | train_inner | epoch 002: 2569 / 3002 loss=2.864, ppl=7.28, wps=5806.5, ups=0.09, wpb=64896, bsz=128, num_updates=5536, lr=9.99637e-05, gnorm=2.252, loss_scale=16, train_wall=11, gb_free=2.8, wall=62765 2021-06-19 12:05:02 | INFO | train_inner | epoch 002: 2570 / 3002 loss=2.687, ppl=6.44, wps=5885.2, ups=0.09, wpb=64900, bsz=128, num_updates=5537, lr=9.99637e-05, gnorm=2.201, loss_scale=16, train_wall=11, gb_free=2.8, wall=62776 2021-06-19 12:05:13 | INFO | train_inner | epoch 002: 2571 / 3002 loss=2.667, ppl=6.35, wps=5729.4, ups=0.09, wpb=64797, bsz=128, num_updates=5538, lr=9.99637e-05, gnorm=2.175, loss_scale=16, train_wall=11, gb_free=2.8, wall=62788 2021-06-19 12:05:25 | INFO | train_inner | epoch 002: 2572 / 3002 loss=2.708, ppl=6.53, wps=5818.2, ups=0.09, wpb=64852, bsz=128, num_updates=5539, lr=9.99637e-05, gnorm=2.193, loss_scale=16, train_wall=11, gb_free=2.8, wall=62799 2021-06-19 12:05:36 | INFO | train_inner | epoch 002: 2573 / 3002 loss=2.744, ppl=6.7, wps=5846.7, ups=0.09, wpb=64793, bsz=128, num_updates=5540, lr=9.99637e-05, gnorm=2.17, loss_scale=16, train_wall=11, gb_free=2.8, wall=62810 2021-06-19 12:05:47 | INFO | train_inner | epoch 002: 2574 / 3002 loss=2.682, ppl=6.42, wps=5873.7, ups=0.09, wpb=64832, bsz=128, num_updates=5541, lr=9.99637e-05, gnorm=2.191, loss_scale=16, train_wall=11, gb_free=2.8, wall=62821 2021-06-19 12:05:58 | INFO | train_inner | epoch 002: 2575 / 3002 loss=2.509, ppl=5.69, wps=5778.2, ups=0.09, wpb=64900, bsz=128, num_updates=5542, lr=9.99637e-05, gnorm=2.136, loss_scale=16, train_wall=11, gb_free=2.8, wall=62832 2021-06-19 12:06:09 | INFO | train_inner | epoch 002: 2576 / 3002 loss=2.693, ppl=6.47, wps=5788.3, ups=0.09, wpb=64791, bsz=128, num_updates=5543, lr=9.99637e-05, gnorm=2.181, loss_scale=16, train_wall=11, gb_free=2.8, wall=62843 2021-06-19 12:06:20 | INFO | train_inner | epoch 002: 2577 / 3002 loss=2.658, ppl=6.31, wps=5918.8, ups=0.09, wpb=64883, bsz=128, num_updates=5544, lr=9.99636e-05, gnorm=2.284, loss_scale=16, train_wall=11, gb_free=2.8, wall=62854 2021-06-19 12:06:31 | INFO | train_inner | epoch 002: 2578 / 3002 loss=2.592, ppl=6.03, wps=5878.1, ups=0.09, wpb=64730, bsz=128, num_updates=5545, lr=9.99636e-05, gnorm=2.088, loss_scale=16, train_wall=11, gb_free=2.8, wall=62865 2021-06-19 12:06:42 | INFO | train_inner | epoch 002: 2579 / 3002 loss=2.577, ppl=5.97, wps=5894.8, ups=0.09, wpb=64822, bsz=128, num_updates=5546, lr=9.99636e-05, gnorm=2.136, loss_scale=16, train_wall=11, gb_free=2.8, wall=62876 2021-06-19 12:06:53 | INFO | train_inner | epoch 002: 2580 / 3002 loss=2.839, ppl=7.15, wps=5867.3, ups=0.09, wpb=64806, bsz=128, num_updates=5547, lr=9.99636e-05, gnorm=2.554, loss_scale=16, train_wall=11, gb_free=2.8, wall=62887 2021-06-19 12:07:04 | INFO | train_inner | epoch 002: 2581 / 3002 loss=2.542, ppl=5.83, wps=5974.3, ups=0.09, wpb=64912, bsz=128, num_updates=5548, lr=9.99636e-05, gnorm=2.223, loss_scale=16, train_wall=10, gb_free=2.8, wall=62898 2021-06-19 12:07:15 | INFO | train_inner | epoch 002: 2582 / 3002 loss=2.676, ppl=6.39, wps=5904.7, ups=0.09, wpb=64771, bsz=128, num_updates=5549, lr=9.99636e-05, gnorm=2.161, loss_scale=16, train_wall=11, gb_free=2.8, wall=62909 2021-06-19 12:07:26 | INFO | train_inner | epoch 002: 2583 / 3002 loss=2.702, ppl=6.51, wps=5842.1, ups=0.09, wpb=64767, bsz=128, num_updates=5550, lr=9.99636e-05, gnorm=2.15, loss_scale=16, train_wall=11, gb_free=2.8, wall=62920 2021-06-19 12:07:37 | INFO | train_inner | epoch 002: 2584 / 3002 loss=2.779, ppl=6.86, wps=5746.9, ups=0.09, wpb=64864, bsz=128, num_updates=5551, lr=9.99636e-05, gnorm=2.063, loss_scale=16, train_wall=11, gb_free=2.8, wall=62932 2021-06-19 12:07:48 | INFO | train_inner | epoch 002: 2585 / 3002 loss=2.813, ppl=7.03, wps=5974.5, ups=0.09, wpb=64735, bsz=128, num_updates=5552, lr=9.99636e-05, gnorm=2.189, loss_scale=16, train_wall=10, gb_free=2.8, wall=62942 2021-06-19 12:07:59 | INFO | train_inner | epoch 002: 2586 / 3002 loss=2.713, ppl=6.56, wps=5811, ups=0.09, wpb=64917, bsz=128, num_updates=5553, lr=9.99636e-05, gnorm=2.144, loss_scale=16, train_wall=11, gb_free=2.8, wall=62954 2021-06-19 12:08:10 | INFO | train_inner | epoch 002: 2587 / 3002 loss=2.801, ppl=6.97, wps=5810, ups=0.09, wpb=64806, bsz=128, num_updates=5554, lr=9.99636e-05, gnorm=2.196, loss_scale=16, train_wall=11, gb_free=2.8, wall=62965 2021-06-19 12:08:22 | INFO | train_inner | epoch 002: 2588 / 3002 loss=2.646, ppl=6.26, wps=5842.8, ups=0.09, wpb=64806, bsz=128, num_updates=5555, lr=9.99636e-05, gnorm=2.213, loss_scale=16, train_wall=11, gb_free=2.8, wall=62976 2021-06-19 12:08:33 | INFO | train_inner | epoch 002: 2589 / 3002 loss=2.764, ppl=6.8, wps=5872.8, ups=0.09, wpb=64776, bsz=128, num_updates=5556, lr=9.99635e-05, gnorm=2.268, loss_scale=16, train_wall=11, gb_free=2.8, wall=62987 2021-06-19 12:08:44 | INFO | train_inner | epoch 002: 2590 / 3002 loss=2.628, ppl=6.18, wps=5733.4, ups=0.09, wpb=64767, bsz=128, num_updates=5557, lr=9.99635e-05, gnorm=2.41, loss_scale=16, train_wall=11, gb_free=2.8, wall=62998 2021-06-19 12:08:55 | INFO | train_inner | epoch 002: 2591 / 3002 loss=2.736, ppl=6.66, wps=5921, ups=0.09, wpb=64894, bsz=128, num_updates=5558, lr=9.99635e-05, gnorm=2.2, loss_scale=16, train_wall=11, gb_free=2.8, wall=63009 2021-06-19 12:09:06 | INFO | train_inner | epoch 002: 2592 / 3002 loss=2.705, ppl=6.52, wps=5866.4, ups=0.09, wpb=64846, bsz=128, num_updates=5559, lr=9.99635e-05, gnorm=2.198, loss_scale=16, train_wall=11, gb_free=2.8, wall=63020 2021-06-19 12:09:17 | INFO | train_inner | epoch 002: 2593 / 3002 loss=2.66, ppl=6.32, wps=5830.8, ups=0.09, wpb=64858, bsz=128, num_updates=5560, lr=9.99635e-05, gnorm=4.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=63031 2021-06-19 12:09:28 | INFO | train_inner | epoch 002: 2594 / 3002 loss=2.659, ppl=6.31, wps=5831.6, ups=0.09, wpb=64800, bsz=128, num_updates=5561, lr=9.99635e-05, gnorm=3.656, loss_scale=16, train_wall=11, gb_free=2.8, wall=63042 2021-06-19 12:09:39 | INFO | train_inner | epoch 002: 2595 / 3002 loss=2.743, ppl=6.69, wps=5877.8, ups=0.09, wpb=64788, bsz=128, num_updates=5562, lr=9.99635e-05, gnorm=2.131, loss_scale=16, train_wall=11, gb_free=2.8, wall=63054 2021-06-19 12:09:50 | INFO | train_inner | epoch 002: 2596 / 3002 loss=2.771, ppl=6.83, wps=5837.3, ups=0.09, wpb=64842, bsz=128, num_updates=5563, lr=9.99635e-05, gnorm=2.271, loss_scale=16, train_wall=11, gb_free=2.8, wall=63065 2021-06-19 12:10:01 | INFO | train_inner | epoch 002: 2597 / 3002 loss=2.836, ppl=7.14, wps=5794.1, ups=0.09, wpb=64854, bsz=128, num_updates=5564, lr=9.99635e-05, gnorm=2.226, loss_scale=16, train_wall=11, gb_free=2.8, wall=63076 2021-06-19 12:10:13 | INFO | train_inner | epoch 002: 2598 / 3002 loss=2.634, ppl=6.21, wps=5805.5, ups=0.09, wpb=64880, bsz=128, num_updates=5565, lr=9.99635e-05, gnorm=2.264, loss_scale=16, train_wall=11, gb_free=2.8, wall=63087 2021-06-19 12:10:24 | INFO | train_inner | epoch 002: 2599 / 3002 loss=2.829, ppl=7.11, wps=5903.2, ups=0.09, wpb=64745, bsz=128, num_updates=5566, lr=9.99635e-05, gnorm=2.24, loss_scale=16, train_wall=11, gb_free=2.8, wall=63098 2021-06-19 12:10:35 | INFO | train_inner | epoch 002: 2600 / 3002 loss=2.759, ppl=6.77, wps=5846.5, ups=0.09, wpb=64800, bsz=128, num_updates=5567, lr=9.99635e-05, gnorm=2.308, loss_scale=16, train_wall=11, gb_free=2.8, wall=63109 2021-06-19 12:10:46 | INFO | train_inner | epoch 002: 2601 / 3002 loss=2.905, ppl=7.49, wps=5734.8, ups=0.09, wpb=64799, bsz=128, num_updates=5568, lr=9.99635e-05, gnorm=2.377, loss_scale=16, train_wall=11, gb_free=2.8, wall=63120 2021-06-19 12:10:57 | INFO | train_inner | epoch 002: 2602 / 3002 loss=2.788, ppl=6.91, wps=5951.1, ups=0.09, wpb=64787, bsz=128, num_updates=5569, lr=9.99634e-05, gnorm=2.496, loss_scale=16, train_wall=10, gb_free=2.8, wall=63131 2021-06-19 12:11:08 | INFO | train_inner | epoch 002: 2603 / 3002 loss=2.783, ppl=6.88, wps=5884.3, ups=0.09, wpb=64819, bsz=128, num_updates=5570, lr=9.99634e-05, gnorm=2.156, loss_scale=16, train_wall=11, gb_free=2.8, wall=63142 2021-06-19 12:11:19 | INFO | train_inner | epoch 002: 2604 / 3002 loss=2.698, ppl=6.49, wps=5909.9, ups=0.09, wpb=64863, bsz=128, num_updates=5571, lr=9.99634e-05, gnorm=2.336, loss_scale=16, train_wall=11, gb_free=2.8, wall=63153 2021-06-19 12:11:30 | INFO | train_inner | epoch 002: 2605 / 3002 loss=2.75, ppl=6.73, wps=5931.6, ups=0.09, wpb=64847, bsz=128, num_updates=5572, lr=9.99634e-05, gnorm=2.217, loss_scale=16, train_wall=10, gb_free=2.8, wall=63164 2021-06-19 12:11:41 | INFO | train_inner | epoch 002: 2606 / 3002 loss=2.755, ppl=6.75, wps=5869.5, ups=0.09, wpb=64819, bsz=128, num_updates=5573, lr=9.99634e-05, gnorm=2.268, loss_scale=16, train_wall=11, gb_free=2.8, wall=63175 2021-06-19 12:11:52 | INFO | train_inner | epoch 002: 2607 / 3002 loss=2.768, ppl=6.81, wps=5957.1, ups=0.09, wpb=64836, bsz=128, num_updates=5574, lr=9.99634e-05, gnorm=2.177, loss_scale=16, train_wall=10, gb_free=2.8, wall=63186 2021-06-19 12:12:03 | INFO | train_inner | epoch 002: 2608 / 3002 loss=2.771, ppl=6.83, wps=6002.8, ups=0.09, wpb=64892, bsz=128, num_updates=5575, lr=9.99634e-05, gnorm=2.258, loss_scale=16, train_wall=10, gb_free=2.8, wall=63197 2021-06-19 12:12:14 | INFO | train_inner | epoch 002: 2609 / 3002 loss=2.691, ppl=6.46, wps=5878.3, ups=0.09, wpb=64866, bsz=128, num_updates=5576, lr=9.99634e-05, gnorm=2.364, loss_scale=16, train_wall=11, gb_free=2.8, wall=63208 2021-06-19 12:12:25 | INFO | train_inner | epoch 002: 2610 / 3002 loss=2.602, ppl=6.07, wps=5862, ups=0.09, wpb=64811, bsz=128, num_updates=5577, lr=9.99634e-05, gnorm=2.201, loss_scale=16, train_wall=11, gb_free=2.8, wall=63219 2021-06-19 12:12:36 | INFO | train_inner | epoch 002: 2611 / 3002 loss=2.626, ppl=6.17, wps=5871.3, ups=0.09, wpb=64813, bsz=128, num_updates=5578, lr=9.99634e-05, gnorm=2.219, loss_scale=16, train_wall=11, gb_free=2.8, wall=63230 2021-06-19 12:12:47 | INFO | train_inner | epoch 002: 2612 / 3002 loss=2.596, ppl=6.05, wps=5813.9, ups=0.09, wpb=64835, bsz=128, num_updates=5579, lr=9.99634e-05, gnorm=2.261, loss_scale=16, train_wall=11, gb_free=2.8, wall=63241 2021-06-19 12:12:58 | INFO | train_inner | epoch 002: 2613 / 3002 loss=2.749, ppl=6.72, wps=5876.6, ups=0.09, wpb=64731, bsz=128, num_updates=5580, lr=9.99634e-05, gnorm=2.25, loss_scale=16, train_wall=11, gb_free=2.8, wall=63252 2021-06-19 12:13:09 | INFO | train_inner | epoch 002: 2614 / 3002 loss=2.738, ppl=6.67, wps=5892.8, ups=0.09, wpb=64810, bsz=128, num_updates=5581, lr=9.99633e-05, gnorm=2.155, loss_scale=16, train_wall=11, gb_free=2.8, wall=63263 2021-06-19 12:13:20 | INFO | train_inner | epoch 002: 2615 / 3002 loss=2.695, ppl=6.47, wps=5830.1, ups=0.09, wpb=64799, bsz=128, num_updates=5582, lr=9.99633e-05, gnorm=2.196, loss_scale=16, train_wall=11, gb_free=2.8, wall=63274 2021-06-19 12:13:31 | INFO | train_inner | epoch 002: 2616 / 3002 loss=2.872, ppl=7.32, wps=5940.7, ups=0.09, wpb=64767, bsz=128, num_updates=5583, lr=9.99633e-05, gnorm=2.306, loss_scale=16, train_wall=10, gb_free=2.8, wall=63285 2021-06-19 12:13:42 | INFO | train_inner | epoch 002: 2617 / 3002 loss=2.592, ppl=6.03, wps=5867.3, ups=0.09, wpb=64905, bsz=128, num_updates=5584, lr=9.99633e-05, gnorm=2.163, loss_scale=16, train_wall=11, gb_free=2.8, wall=63296 2021-06-19 12:13:53 | INFO | train_inner | epoch 002: 2618 / 3002 loss=2.631, ppl=6.19, wps=5812.6, ups=0.09, wpb=64819, bsz=128, num_updates=5585, lr=9.99633e-05, gnorm=2.15, loss_scale=16, train_wall=11, gb_free=2.8, wall=63307 2021-06-19 12:14:04 | INFO | train_inner | epoch 002: 2619 / 3002 loss=2.746, ppl=6.71, wps=5759.7, ups=0.09, wpb=64808, bsz=128, num_updates=5586, lr=9.99633e-05, gnorm=2.234, loss_scale=16, train_wall=11, gb_free=2.8, wall=63319 2021-06-19 12:14:15 | INFO | train_inner | epoch 002: 2620 / 3002 loss=2.75, ppl=6.73, wps=5897.5, ups=0.09, wpb=64834, bsz=128, num_updates=5587, lr=9.99633e-05, gnorm=2.403, loss_scale=16, train_wall=11, gb_free=2.8, wall=63330 2021-06-19 12:14:26 | INFO | train_inner | epoch 002: 2621 / 3002 loss=2.626, ppl=6.17, wps=5861.1, ups=0.09, wpb=64768, bsz=128, num_updates=5588, lr=9.99633e-05, gnorm=2.144, loss_scale=16, train_wall=11, gb_free=2.8, wall=63341 2021-06-19 12:14:37 | INFO | train_inner | epoch 002: 2622 / 3002 loss=2.623, ppl=6.16, wps=5942.8, ups=0.09, wpb=64864, bsz=128, num_updates=5589, lr=9.99633e-05, gnorm=2.194, loss_scale=16, train_wall=10, gb_free=2.8, wall=63352 2021-06-19 12:14:48 | INFO | train_inner | epoch 002: 2623 / 3002 loss=2.817, ppl=7.05, wps=5983.5, ups=0.09, wpb=64735, bsz=128, num_updates=5590, lr=9.99633e-05, gnorm=2.255, loss_scale=16, train_wall=10, gb_free=2.8, wall=63362 2021-06-19 12:14:59 | INFO | train_inner | epoch 002: 2624 / 3002 loss=2.548, ppl=5.85, wps=5756.1, ups=0.09, wpb=64788, bsz=128, num_updates=5591, lr=9.99633e-05, gnorm=7.313, loss_scale=16, train_wall=11, gb_free=2.8, wall=63374 2021-06-19 12:15:10 | INFO | train_inner | epoch 002: 2625 / 3002 loss=2.668, ppl=6.35, wps=5884.1, ups=0.09, wpb=64900, bsz=128, num_updates=5592, lr=9.99633e-05, gnorm=2.077, loss_scale=16, train_wall=11, gb_free=2.8, wall=63385 2021-06-19 12:15:22 | INFO | train_inner | epoch 002: 2626 / 3002 loss=2.749, ppl=6.72, wps=5790.1, ups=0.09, wpb=64737, bsz=128, num_updates=5593, lr=9.99633e-05, gnorm=3.189, loss_scale=16, train_wall=11, gb_free=2.8, wall=63396 2021-06-19 12:15:32 | INFO | train_inner | epoch 002: 2627 / 3002 loss=2.738, ppl=6.67, wps=5963.5, ups=0.09, wpb=64854, bsz=128, num_updates=5594, lr=9.99632e-05, gnorm=2.199, loss_scale=16, train_wall=10, gb_free=2.8, wall=63407 2021-06-19 12:15:44 | INFO | train_inner | epoch 002: 2628 / 3002 loss=2.637, ppl=6.22, wps=5738.2, ups=0.09, wpb=64862, bsz=128, num_updates=5595, lr=9.99632e-05, gnorm=2.158, loss_scale=16, train_wall=11, gb_free=2.8, wall=63418 2021-06-19 12:15:55 | INFO | train_inner | epoch 002: 2629 / 3002 loss=2.628, ppl=6.18, wps=5720, ups=0.09, wpb=64855, bsz=128, num_updates=5596, lr=9.99632e-05, gnorm=2.096, loss_scale=16, train_wall=11, gb_free=2.8, wall=63429 2021-06-19 12:16:06 | INFO | train_inner | epoch 002: 2630 / 3002 loss=2.784, ppl=6.89, wps=5939.5, ups=0.09, wpb=64843, bsz=128, num_updates=5597, lr=9.99632e-05, gnorm=2.202, loss_scale=16, train_wall=10, gb_free=2.8, wall=63440 2021-06-19 12:16:17 | INFO | train_inner | epoch 002: 2631 / 3002 loss=2.612, ppl=6.11, wps=6004.3, ups=0.09, wpb=64825, bsz=128, num_updates=5598, lr=9.99632e-05, gnorm=2.172, loss_scale=16, train_wall=10, gb_free=2.8, wall=63451 2021-06-19 12:16:28 | INFO | train_inner | epoch 002: 2632 / 3002 loss=2.8, ppl=6.97, wps=5951.3, ups=0.09, wpb=64794, bsz=128, num_updates=5599, lr=9.99632e-05, gnorm=2.189, loss_scale=16, train_wall=10, gb_free=2.8, wall=63462 2021-06-19 12:16:39 | INFO | train_inner | epoch 002: 2633 / 3002 loss=2.676, ppl=6.39, wps=5938.4, ups=0.09, wpb=64826, bsz=128, num_updates=5600, lr=9.99632e-05, gnorm=2.225, loss_scale=16, train_wall=10, gb_free=2.8, wall=63473 2021-06-19 12:16:50 | INFO | train_inner | epoch 002: 2634 / 3002 loss=2.591, ppl=6.02, wps=5846.1, ups=0.09, wpb=64798, bsz=128, num_updates=5601, lr=9.99632e-05, gnorm=2.139, loss_scale=16, train_wall=11, gb_free=2.8, wall=63484 2021-06-19 12:17:01 | INFO | train_inner | epoch 002: 2635 / 3002 loss=2.717, ppl=6.57, wps=5772.6, ups=0.09, wpb=64812, bsz=128, num_updates=5602, lr=9.99632e-05, gnorm=2.353, loss_scale=16, train_wall=11, gb_free=2.8, wall=63495 2021-06-19 12:17:12 | INFO | train_inner | epoch 002: 2636 / 3002 loss=2.574, ppl=5.95, wps=5815.1, ups=0.09, wpb=64812, bsz=128, num_updates=5603, lr=9.99632e-05, gnorm=2.607, loss_scale=16, train_wall=11, gb_free=2.8, wall=63506 2021-06-19 12:17:23 | INFO | train_inner | epoch 002: 2637 / 3002 loss=2.778, ppl=6.86, wps=5822.1, ups=0.09, wpb=64891, bsz=128, num_updates=5604, lr=9.99632e-05, gnorm=2.367, loss_scale=16, train_wall=11, gb_free=2.8, wall=63518 2021-06-19 12:17:34 | INFO | train_inner | epoch 002: 2638 / 3002 loss=2.61, ppl=6.11, wps=5793.1, ups=0.09, wpb=64874, bsz=128, num_updates=5605, lr=9.99632e-05, gnorm=2.199, loss_scale=16, train_wall=11, gb_free=2.8, wall=63529 2021-06-19 12:17:45 | INFO | train_inner | epoch 002: 2639 / 3002 loss=2.689, ppl=6.45, wps=5847.8, ups=0.09, wpb=64797, bsz=128, num_updates=5606, lr=9.99631e-05, gnorm=2.542, loss_scale=16, train_wall=11, gb_free=2.8, wall=63540 2021-06-19 12:17:57 | INFO | train_inner | epoch 002: 2640 / 3002 loss=2.824, ppl=7.08, wps=5748, ups=0.09, wpb=64805, bsz=128, num_updates=5607, lr=9.99631e-05, gnorm=2.438, loss_scale=16, train_wall=11, gb_free=2.8, wall=63551 2021-06-19 12:18:08 | INFO | train_inner | epoch 002: 2641 / 3002 loss=2.676, ppl=6.39, wps=5793.5, ups=0.09, wpb=64853, bsz=128, num_updates=5608, lr=9.99631e-05, gnorm=2.392, loss_scale=16, train_wall=11, gb_free=2.8, wall=63562 2021-06-19 12:18:19 | INFO | train_inner | epoch 002: 2642 / 3002 loss=2.88, ppl=7.36, wps=5918, ups=0.09, wpb=64821, bsz=128, num_updates=5609, lr=9.99631e-05, gnorm=2.187, loss_scale=16, train_wall=10, gb_free=2.8, wall=63573 2021-06-19 12:18:30 | INFO | train_inner | epoch 002: 2643 / 3002 loss=2.618, ppl=6.14, wps=5969.7, ups=0.09, wpb=64843, bsz=128, num_updates=5610, lr=9.99631e-05, gnorm=2.095, loss_scale=16, train_wall=10, gb_free=2.8, wall=63584 2021-06-19 12:18:41 | INFO | train_inner | epoch 002: 2644 / 3002 loss=2.687, ppl=6.44, wps=5798.2, ups=0.09, wpb=64795, bsz=128, num_updates=5611, lr=9.99631e-05, gnorm=2.149, loss_scale=16, train_wall=11, gb_free=2.8, wall=63595 2021-06-19 12:18:52 | INFO | train_inner | epoch 002: 2645 / 3002 loss=2.72, ppl=6.59, wps=5843.6, ups=0.09, wpb=64840, bsz=128, num_updates=5612, lr=9.99631e-05, gnorm=2.132, loss_scale=16, train_wall=11, gb_free=2.8, wall=63606 2021-06-19 12:19:03 | INFO | train_inner | epoch 002: 2646 / 3002 loss=2.791, ppl=6.92, wps=5912.9, ups=0.09, wpb=64875, bsz=128, num_updates=5613, lr=9.99631e-05, gnorm=2.208, loss_scale=16, train_wall=10, gb_free=2.8, wall=63617 2021-06-19 12:19:14 | INFO | train_inner | epoch 002: 2647 / 3002 loss=2.951, ppl=7.73, wps=5889.8, ups=0.09, wpb=64835, bsz=128, num_updates=5614, lr=9.99631e-05, gnorm=2.288, loss_scale=16, train_wall=11, gb_free=2.8, wall=63628 2021-06-19 12:19:25 | INFO | train_inner | epoch 002: 2648 / 3002 loss=2.744, ppl=6.7, wps=5855.3, ups=0.09, wpb=64791, bsz=128, num_updates=5615, lr=9.99631e-05, gnorm=2.152, loss_scale=16, train_wall=11, gb_free=2.8, wall=63639 2021-06-19 12:19:36 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-19 12:19:47 | INFO | train_inner | epoch 002: 2650 / 3002 loss=2.725, ppl=6.61, wps=2924.1, ups=0.05, wpb=64746, bsz=128, num_updates=5616, lr=9.99631e-05, gnorm=2.512, loss_scale=8, train_wall=21, gb_free=2.8, wall=63662 2021-06-19 12:19:58 | INFO | train_inner | epoch 002: 2651 / 3002 loss=2.91, ppl=7.52, wps=5847.3, ups=0.09, wpb=64871, bsz=128, num_updates=5617, lr=9.99631e-05, gnorm=2.18, loss_scale=8, train_wall=11, gb_free=2.8, wall=63673 2021-06-19 12:20:09 | INFO | train_inner | epoch 002: 2652 / 3002 loss=2.943, ppl=7.69, wps=5993.9, ups=0.09, wpb=64861, bsz=128, num_updates=5618, lr=9.99631e-05, gnorm=2.182, loss_scale=8, train_wall=10, gb_free=2.8, wall=63683 2021-06-19 12:20:20 | INFO | train_inner | epoch 002: 2653 / 3002 loss=2.778, ppl=6.86, wps=5823.1, ups=0.09, wpb=64801, bsz=128, num_updates=5619, lr=9.9963e-05, gnorm=2.109, loss_scale=8, train_wall=11, gb_free=2.8, wall=63695 2021-06-19 12:20:31 | INFO | train_inner | epoch 002: 2654 / 3002 loss=2.75, ppl=6.73, wps=5852.7, ups=0.09, wpb=64758, bsz=128, num_updates=5620, lr=9.9963e-05, gnorm=4.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=63706 2021-06-19 12:20:42 | INFO | train_inner | epoch 002: 2655 / 3002 loss=2.738, ppl=6.67, wps=5827.2, ups=0.09, wpb=64877, bsz=128, num_updates=5621, lr=9.9963e-05, gnorm=2.116, loss_scale=8, train_wall=11, gb_free=2.8, wall=63717 2021-06-19 12:20:54 | INFO | train_inner | epoch 002: 2656 / 3002 loss=2.622, ppl=6.15, wps=5770.9, ups=0.09, wpb=64799, bsz=128, num_updates=5622, lr=9.9963e-05, gnorm=2.16, loss_scale=8, train_wall=11, gb_free=2.8, wall=63728 2021-06-19 12:21:05 | INFO | train_inner | epoch 002: 2657 / 3002 loss=2.667, ppl=6.35, wps=5923.8, ups=0.09, wpb=64773, bsz=128, num_updates=5623, lr=9.9963e-05, gnorm=2.24, loss_scale=8, train_wall=10, gb_free=2.8, wall=63739 2021-06-19 12:21:16 | INFO | train_inner | epoch 002: 2658 / 3002 loss=2.535, ppl=5.79, wps=5788.5, ups=0.09, wpb=64825, bsz=128, num_updates=5624, lr=9.9963e-05, gnorm=2.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=63750 2021-06-19 12:21:27 | INFO | train_inner | epoch 002: 2659 / 3002 loss=2.645, ppl=6.26, wps=5919.8, ups=0.09, wpb=64811, bsz=128, num_updates=5625, lr=9.9963e-05, gnorm=2.144, loss_scale=8, train_wall=11, gb_free=2.8, wall=63761 2021-06-19 12:21:38 | INFO | train_inner | epoch 002: 2660 / 3002 loss=2.814, ppl=7.03, wps=5842.8, ups=0.09, wpb=64814, bsz=128, num_updates=5626, lr=9.9963e-05, gnorm=2.367, loss_scale=8, train_wall=11, gb_free=2.8, wall=63772 2021-06-19 12:21:49 | INFO | train_inner | epoch 002: 2661 / 3002 loss=2.746, ppl=6.71, wps=5936.1, ups=0.09, wpb=64864, bsz=128, num_updates=5627, lr=9.9963e-05, gnorm=2.479, loss_scale=8, train_wall=10, gb_free=2.8, wall=63783 2021-06-19 12:22:00 | INFO | train_inner | epoch 002: 2662 / 3002 loss=2.649, ppl=6.27, wps=5894, ups=0.09, wpb=64860, bsz=128, num_updates=5628, lr=9.9963e-05, gnorm=2.206, loss_scale=8, train_wall=11, gb_free=2.8, wall=63794 2021-06-19 12:22:11 | INFO | train_inner | epoch 002: 2663 / 3002 loss=2.773, ppl=6.84, wps=5901.3, ups=0.09, wpb=64796, bsz=128, num_updates=5629, lr=9.9963e-05, gnorm=2.233, loss_scale=8, train_wall=11, gb_free=2.8, wall=63805 2021-06-19 12:22:22 | INFO | train_inner | epoch 002: 2664 / 3002 loss=2.827, ppl=7.1, wps=5933.2, ups=0.09, wpb=64832, bsz=128, num_updates=5630, lr=9.9963e-05, gnorm=2.25, loss_scale=8, train_wall=10, gb_free=2.8, wall=63816 2021-06-19 12:22:33 | INFO | train_inner | epoch 002: 2665 / 3002 loss=2.67, ppl=6.37, wps=5870.5, ups=0.09, wpb=64637, bsz=128, num_updates=5631, lr=9.99629e-05, gnorm=2.131, loss_scale=8, train_wall=11, gb_free=2.8, wall=63827 2021-06-19 12:22:44 | INFO | train_inner | epoch 002: 2666 / 3002 loss=2.655, ppl=6.3, wps=5992.6, ups=0.09, wpb=64809, bsz=128, num_updates=5632, lr=9.99629e-05, gnorm=2.216, loss_scale=8, train_wall=10, gb_free=2.8, wall=63838 2021-06-19 12:22:55 | INFO | train_inner | epoch 002: 2667 / 3002 loss=2.756, ppl=6.76, wps=5830.7, ups=0.09, wpb=64831, bsz=128, num_updates=5633, lr=9.99629e-05, gnorm=2.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=63849 2021-06-19 12:23:06 | INFO | train_inner | epoch 002: 2668 / 3002 loss=2.754, ppl=6.75, wps=5834.6, ups=0.09, wpb=64857, bsz=128, num_updates=5634, lr=9.99629e-05, gnorm=2.106, loss_scale=8, train_wall=11, gb_free=2.8, wall=63860 2021-06-19 12:23:17 | INFO | train_inner | epoch 002: 2669 / 3002 loss=2.719, ppl=6.58, wps=5834.8, ups=0.09, wpb=64795, bsz=128, num_updates=5635, lr=9.99629e-05, gnorm=2.082, loss_scale=8, train_wall=11, gb_free=2.8, wall=63871 2021-06-19 12:23:28 | INFO | train_inner | epoch 002: 2670 / 3002 loss=2.64, ppl=6.23, wps=5763.1, ups=0.09, wpb=64764, bsz=128, num_updates=5636, lr=9.99629e-05, gnorm=2.193, loss_scale=8, train_wall=11, gb_free=2.8, wall=63882 2021-06-19 12:23:39 | INFO | train_inner | epoch 002: 2671 / 3002 loss=2.754, ppl=6.74, wps=5831.7, ups=0.09, wpb=64800, bsz=128, num_updates=5637, lr=9.99629e-05, gnorm=2.232, loss_scale=8, train_wall=11, gb_free=2.8, wall=63894 2021-06-19 12:23:50 | INFO | train_inner | epoch 002: 2672 / 3002 loss=2.791, ppl=6.92, wps=5836.5, ups=0.09, wpb=64815, bsz=128, num_updates=5638, lr=9.99629e-05, gnorm=2.365, loss_scale=8, train_wall=11, gb_free=2.8, wall=63905 2021-06-19 12:24:02 | INFO | train_inner | epoch 002: 2673 / 3002 loss=2.755, ppl=6.75, wps=5759.3, ups=0.09, wpb=64807, bsz=128, num_updates=5639, lr=9.99629e-05, gnorm=2.169, loss_scale=8, train_wall=11, gb_free=2.8, wall=63916 2021-06-19 12:24:13 | INFO | train_inner | epoch 002: 2674 / 3002 loss=2.659, ppl=6.32, wps=5816.3, ups=0.09, wpb=64866, bsz=128, num_updates=5640, lr=9.99629e-05, gnorm=2.175, loss_scale=8, train_wall=11, gb_free=2.8, wall=63927 2021-06-19 12:24:24 | INFO | train_inner | epoch 002: 2675 / 3002 loss=2.782, ppl=6.88, wps=5829.6, ups=0.09, wpb=64807, bsz=128, num_updates=5641, lr=9.99629e-05, gnorm=2.273, loss_scale=8, train_wall=11, gb_free=2.8, wall=63938 2021-06-19 12:24:35 | INFO | train_inner | epoch 002: 2676 / 3002 loss=2.617, ppl=6.13, wps=5830.4, ups=0.09, wpb=64838, bsz=128, num_updates=5642, lr=9.99629e-05, gnorm=2.253, loss_scale=8, train_wall=11, gb_free=2.8, wall=63949 2021-06-19 12:24:46 | INFO | train_inner | epoch 002: 2677 / 3002 loss=2.716, ppl=6.57, wps=5777.4, ups=0.09, wpb=64769, bsz=128, num_updates=5643, lr=9.99629e-05, gnorm=2.189, loss_scale=8, train_wall=11, gb_free=2.8, wall=63961 2021-06-19 12:24:57 | INFO | train_inner | epoch 002: 2678 / 3002 loss=2.71, ppl=6.54, wps=5847.8, ups=0.09, wpb=64821, bsz=128, num_updates=5644, lr=9.99628e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=63972 2021-06-19 12:25:08 | INFO | train_inner | epoch 002: 2679 / 3002 loss=2.697, ppl=6.48, wps=5958.5, ups=0.09, wpb=64876, bsz=128, num_updates=5645, lr=9.99628e-05, gnorm=2.122, loss_scale=8, train_wall=10, gb_free=2.8, wall=63983 2021-06-19 12:25:19 | INFO | train_inner | epoch 002: 2680 / 3002 loss=2.717, ppl=6.57, wps=5866.4, ups=0.09, wpb=64831, bsz=128, num_updates=5646, lr=9.99628e-05, gnorm=2.21, loss_scale=8, train_wall=11, gb_free=2.8, wall=63994 2021-06-19 12:25:30 | INFO | train_inner | epoch 002: 2681 / 3002 loss=2.78, ppl=6.87, wps=5854.1, ups=0.09, wpb=64796, bsz=128, num_updates=5647, lr=9.99628e-05, gnorm=2.246, loss_scale=8, train_wall=11, gb_free=2.8, wall=64005 2021-06-19 12:25:42 | INFO | train_inner | epoch 002: 2682 / 3002 loss=2.827, ppl=7.1, wps=5776.8, ups=0.09, wpb=64858, bsz=128, num_updates=5648, lr=9.99628e-05, gnorm=2.099, loss_scale=8, train_wall=11, gb_free=2.8, wall=64016 2021-06-19 12:25:53 | INFO | train_inner | epoch 002: 2683 / 3002 loss=2.883, ppl=7.38, wps=5860.1, ups=0.09, wpb=64843, bsz=128, num_updates=5649, lr=9.99628e-05, gnorm=2.238, loss_scale=8, train_wall=11, gb_free=2.8, wall=64027 2021-06-19 12:26:04 | INFO | train_inner | epoch 002: 2684 / 3002 loss=2.632, ppl=6.2, wps=5781.6, ups=0.09, wpb=64877, bsz=128, num_updates=5650, lr=9.99628e-05, gnorm=2.155, loss_scale=8, train_wall=11, gb_free=2.8, wall=64038 2021-06-19 12:26:15 | INFO | train_inner | epoch 002: 2685 / 3002 loss=2.593, ppl=6.03, wps=5815.8, ups=0.09, wpb=64914, bsz=128, num_updates=5651, lr=9.99628e-05, gnorm=2.18, loss_scale=8, train_wall=11, gb_free=2.8, wall=64049 2021-06-19 12:26:26 | INFO | train_inner | epoch 002: 2686 / 3002 loss=2.793, ppl=6.93, wps=5808.2, ups=0.09, wpb=64740, bsz=128, num_updates=5652, lr=9.99628e-05, gnorm=2.278, loss_scale=8, train_wall=11, gb_free=2.8, wall=64060 2021-06-19 12:26:37 | INFO | train_inner | epoch 002: 2687 / 3002 loss=2.554, ppl=5.87, wps=5915.4, ups=0.09, wpb=64806, bsz=128, num_updates=5653, lr=9.99628e-05, gnorm=2.089, loss_scale=8, train_wall=10, gb_free=2.8, wall=64071 2021-06-19 12:26:48 | INFO | train_inner | epoch 002: 2688 / 3002 loss=2.634, ppl=6.21, wps=5881.2, ups=0.09, wpb=64792, bsz=128, num_updates=5654, lr=9.99628e-05, gnorm=2.113, loss_scale=8, train_wall=11, gb_free=2.8, wall=64082 2021-06-19 12:26:59 | INFO | train_inner | epoch 002: 2689 / 3002 loss=2.75, ppl=6.73, wps=5881.9, ups=0.09, wpb=64911, bsz=128, num_updates=5655, lr=9.99628e-05, gnorm=2.211, loss_scale=8, train_wall=11, gb_free=2.8, wall=64093 2021-06-19 12:27:10 | INFO | train_inner | epoch 002: 2690 / 3002 loss=2.723, ppl=6.6, wps=5905.8, ups=0.09, wpb=64841, bsz=128, num_updates=5656, lr=9.99627e-05, gnorm=2.202, loss_scale=8, train_wall=11, gb_free=2.8, wall=64104 2021-06-19 12:27:21 | INFO | train_inner | epoch 002: 2691 / 3002 loss=2.627, ppl=6.18, wps=5815.6, ups=0.09, wpb=64801, bsz=128, num_updates=5657, lr=9.99627e-05, gnorm=2.175, loss_scale=8, train_wall=11, gb_free=2.8, wall=64116 2021-06-19 12:27:32 | INFO | train_inner | epoch 002: 2692 / 3002 loss=2.75, ppl=6.73, wps=5929, ups=0.09, wpb=64837, bsz=128, num_updates=5658, lr=9.99627e-05, gnorm=2.395, loss_scale=8, train_wall=10, gb_free=2.8, wall=64127 2021-06-19 12:27:43 | INFO | train_inner | epoch 002: 2693 / 3002 loss=2.761, ppl=6.78, wps=5855.2, ups=0.09, wpb=64805, bsz=128, num_updates=5659, lr=9.99627e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=64138 2021-06-19 12:27:54 | INFO | train_inner | epoch 002: 2694 / 3002 loss=2.698, ppl=6.49, wps=5910.2, ups=0.09, wpb=64902, bsz=128, num_updates=5660, lr=9.99627e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=64149 2021-06-19 12:28:05 | INFO | train_inner | epoch 002: 2695 / 3002 loss=2.744, ppl=6.7, wps=5846.5, ups=0.09, wpb=64826, bsz=128, num_updates=5661, lr=9.99627e-05, gnorm=2.147, loss_scale=8, train_wall=11, gb_free=2.8, wall=64160 2021-06-19 12:28:17 | INFO | train_inner | epoch 002: 2696 / 3002 loss=2.81, ppl=7.01, wps=5737.7, ups=0.09, wpb=64792, bsz=128, num_updates=5662, lr=9.99627e-05, gnorm=5.174, loss_scale=8, train_wall=11, gb_free=2.8, wall=64171 2021-06-19 12:28:28 | INFO | train_inner | epoch 002: 2697 / 3002 loss=2.721, ppl=6.59, wps=5717.6, ups=0.09, wpb=64803, bsz=128, num_updates=5663, lr=9.99627e-05, gnorm=2.134, loss_scale=8, train_wall=11, gb_free=2.8, wall=64182 2021-06-19 12:28:39 | INFO | train_inner | epoch 002: 2698 / 3002 loss=2.609, ppl=6.1, wps=5845.5, ups=0.09, wpb=64824, bsz=128, num_updates=5664, lr=9.99627e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=64193 2021-06-19 12:28:50 | INFO | train_inner | epoch 002: 2699 / 3002 loss=2.521, ppl=5.74, wps=5875.7, ups=0.09, wpb=64824, bsz=128, num_updates=5665, lr=9.99627e-05, gnorm=2.198, loss_scale=8, train_wall=11, gb_free=2.8, wall=64204 2021-06-19 12:29:01 | INFO | train_inner | epoch 002: 2700 / 3002 loss=2.768, ppl=6.81, wps=5734.3, ups=0.09, wpb=64751, bsz=128, num_updates=5666, lr=9.99627e-05, gnorm=2.294, loss_scale=8, train_wall=11, gb_free=2.8, wall=64216 2021-06-19 12:29:13 | INFO | train_inner | epoch 002: 2701 / 3002 loss=2.809, ppl=7.01, wps=5755.9, ups=0.09, wpb=64841, bsz=128, num_updates=5667, lr=9.99627e-05, gnorm=2.427, loss_scale=8, train_wall=11, gb_free=2.8, wall=64227 2021-06-19 12:29:24 | INFO | train_inner | epoch 002: 2702 / 3002 loss=2.701, ppl=6.5, wps=5887.8, ups=0.09, wpb=64827, bsz=128, num_updates=5668, lr=9.99627e-05, gnorm=2.18, loss_scale=8, train_wall=11, gb_free=2.8, wall=64238 2021-06-19 12:29:35 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-19 12:29:46 | INFO | train_inner | epoch 002: 2704 / 3002 loss=2.697, ppl=6.48, wps=2938.3, ups=0.05, wpb=64804, bsz=128, num_updates=5669, lr=9.99626e-05, gnorm=2.41, loss_scale=4, train_wall=21, gb_free=2.8, wall=64260 2021-06-19 12:29:57 | INFO | train_inner | epoch 002: 2705 / 3002 loss=2.617, ppl=6.13, wps=5875, ups=0.09, wpb=64934, bsz=128, num_updates=5670, lr=9.99626e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=64271 2021-06-19 12:30:08 | INFO | train_inner | epoch 002: 2706 / 3002 loss=2.706, ppl=6.52, wps=5795.7, ups=0.09, wpb=64838, bsz=128, num_updates=5671, lr=9.99626e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=64282 2021-06-19 12:30:19 | INFO | train_inner | epoch 002: 2707 / 3002 loss=2.77, ppl=6.82, wps=5927.1, ups=0.09, wpb=64835, bsz=128, num_updates=5672, lr=9.99626e-05, gnorm=2.122, loss_scale=4, train_wall=11, gb_free=2.8, wall=64293 2021-06-19 12:30:30 | INFO | train_inner | epoch 002: 2708 / 3002 loss=2.644, ppl=6.25, wps=5797, ups=0.09, wpb=64772, bsz=128, num_updates=5673, lr=9.99626e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=64304 2021-06-19 12:30:41 | INFO | train_inner | epoch 002: 2709 / 3002 loss=2.723, ppl=6.6, wps=5924.4, ups=0.09, wpb=64844, bsz=128, num_updates=5674, lr=9.99626e-05, gnorm=2.244, loss_scale=4, train_wall=10, gb_free=2.8, wall=64315 2021-06-19 12:30:52 | INFO | train_inner | epoch 002: 2710 / 3002 loss=2.669, ppl=6.36, wps=5859.1, ups=0.09, wpb=64878, bsz=128, num_updates=5675, lr=9.99626e-05, gnorm=2.162, loss_scale=4, train_wall=11, gb_free=2.8, wall=64326 2021-06-19 12:31:03 | INFO | train_inner | epoch 002: 2711 / 3002 loss=2.717, ppl=6.58, wps=5897.7, ups=0.09, wpb=64854, bsz=128, num_updates=5676, lr=9.99626e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=64337 2021-06-19 12:31:14 | INFO | train_inner | epoch 002: 2712 / 3002 loss=2.749, ppl=6.72, wps=5859.3, ups=0.09, wpb=64711, bsz=128, num_updates=5677, lr=9.99626e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=64348 2021-06-19 12:31:25 | INFO | train_inner | epoch 002: 2713 / 3002 loss=2.75, ppl=6.73, wps=5932.2, ups=0.09, wpb=64803, bsz=128, num_updates=5678, lr=9.99626e-05, gnorm=2.195, loss_scale=4, train_wall=10, gb_free=2.8, wall=64359 2021-06-19 12:31:36 | INFO | train_inner | epoch 002: 2714 / 3002 loss=2.651, ppl=6.28, wps=5870.2, ups=0.09, wpb=64774, bsz=128, num_updates=5679, lr=9.99626e-05, gnorm=2.194, loss_scale=4, train_wall=11, gb_free=2.8, wall=64370 2021-06-19 12:31:47 | INFO | train_inner | epoch 002: 2715 / 3002 loss=2.704, ppl=6.52, wps=5811.4, ups=0.09, wpb=64876, bsz=128, num_updates=5680, lr=9.99626e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=64382 2021-06-19 12:31:59 | INFO | train_inner | epoch 002: 2716 / 3002 loss=2.762, ppl=6.78, wps=5755.3, ups=0.09, wpb=64838, bsz=128, num_updates=5681, lr=9.99625e-05, gnorm=2.139, loss_scale=4, train_wall=11, gb_free=2.8, wall=64393 2021-06-19 12:32:10 | INFO | train_inner | epoch 002: 2717 / 3002 loss=2.722, ppl=6.6, wps=5708.1, ups=0.09, wpb=64828, bsz=128, num_updates=5682, lr=9.99625e-05, gnorm=2.367, loss_scale=4, train_wall=11, gb_free=2.8, wall=64404 2021-06-19 12:32:21 | INFO | train_inner | epoch 002: 2718 / 3002 loss=2.591, ppl=6.02, wps=5771, ups=0.09, wpb=64853, bsz=128, num_updates=5683, lr=9.99625e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=64415 2021-06-19 12:32:32 | INFO | train_inner | epoch 002: 2719 / 3002 loss=2.641, ppl=6.24, wps=5866, ups=0.09, wpb=64797, bsz=128, num_updates=5684, lr=9.99625e-05, gnorm=2.848, loss_scale=4, train_wall=11, gb_free=2.8, wall=64426 2021-06-19 12:32:43 | INFO | train_inner | epoch 002: 2720 / 3002 loss=2.689, ppl=6.45, wps=5839.7, ups=0.09, wpb=64803, bsz=128, num_updates=5685, lr=9.99625e-05, gnorm=4.468, loss_scale=4, train_wall=11, gb_free=2.8, wall=64438 2021-06-19 12:32:54 | INFO | train_inner | epoch 002: 2721 / 3002 loss=2.654, ppl=6.29, wps=5791.7, ups=0.09, wpb=64865, bsz=128, num_updates=5686, lr=9.99625e-05, gnorm=2.161, loss_scale=4, train_wall=11, gb_free=2.8, wall=64449 2021-06-19 12:33:06 | INFO | train_inner | epoch 002: 2722 / 3002 loss=2.878, ppl=7.35, wps=5727.3, ups=0.09, wpb=64917, bsz=128, num_updates=5687, lr=9.99625e-05, gnorm=2.189, loss_scale=4, train_wall=11, gb_free=2.8, wall=64460 2021-06-19 12:33:17 | INFO | train_inner | epoch 002: 2723 / 3002 loss=2.842, ppl=7.17, wps=5732.3, ups=0.09, wpb=64884, bsz=128, num_updates=5688, lr=9.99625e-05, gnorm=2.176, loss_scale=4, train_wall=11, gb_free=2.8, wall=64471 2021-06-19 12:33:28 | INFO | train_inner | epoch 002: 2724 / 3002 loss=2.755, ppl=6.75, wps=5922.2, ups=0.09, wpb=64876, bsz=128, num_updates=5689, lr=9.99625e-05, gnorm=2.205, loss_scale=4, train_wall=11, gb_free=2.8, wall=64482 2021-06-19 12:33:39 | INFO | train_inner | epoch 002: 2725 / 3002 loss=2.718, ppl=6.58, wps=5800.8, ups=0.09, wpb=64866, bsz=128, num_updates=5690, lr=9.99625e-05, gnorm=2.302, loss_scale=4, train_wall=11, gb_free=2.8, wall=64494 2021-06-19 12:33:51 | INFO | train_inner | epoch 002: 2726 / 3002 loss=2.654, ppl=6.3, wps=5749.1, ups=0.09, wpb=64773, bsz=128, num_updates=5691, lr=9.99625e-05, gnorm=2.096, loss_scale=4, train_wall=11, gb_free=2.8, wall=64505 2021-06-19 12:34:02 | INFO | train_inner | epoch 002: 2727 / 3002 loss=2.728, ppl=6.63, wps=5819, ups=0.09, wpb=64790, bsz=128, num_updates=5692, lr=9.99625e-05, gnorm=2.278, loss_scale=4, train_wall=11, gb_free=2.8, wall=64516 2021-06-19 12:34:13 | INFO | train_inner | epoch 002: 2728 / 3002 loss=2.84, ppl=7.16, wps=5847.8, ups=0.09, wpb=64770, bsz=128, num_updates=5693, lr=9.99625e-05, gnorm=2.296, loss_scale=4, train_wall=11, gb_free=2.8, wall=64527 2021-06-19 12:34:24 | INFO | train_inner | epoch 002: 2729 / 3002 loss=2.713, ppl=6.56, wps=5913.6, ups=0.09, wpb=64924, bsz=128, num_updates=5694, lr=9.99624e-05, gnorm=2.186, loss_scale=4, train_wall=10, gb_free=2.8, wall=64538 2021-06-19 12:34:35 | INFO | train_inner | epoch 002: 2730 / 3002 loss=2.791, ppl=6.92, wps=5852.1, ups=0.09, wpb=64924, bsz=128, num_updates=5695, lr=9.99624e-05, gnorm=2.312, loss_scale=4, train_wall=11, gb_free=2.8, wall=64549 2021-06-19 12:34:46 | INFO | train_inner | epoch 002: 2731 / 3002 loss=2.707, ppl=6.53, wps=5954.5, ups=0.09, wpb=64776, bsz=128, num_updates=5696, lr=9.99624e-05, gnorm=2.158, loss_scale=4, train_wall=10, gb_free=2.8, wall=64560 2021-06-19 12:34:57 | INFO | train_inner | epoch 002: 2732 / 3002 loss=2.746, ppl=6.71, wps=5846.7, ups=0.09, wpb=64814, bsz=128, num_updates=5697, lr=9.99624e-05, gnorm=2.162, loss_scale=4, train_wall=11, gb_free=2.8, wall=64571 2021-06-19 12:35:08 | INFO | train_inner | epoch 002: 2733 / 3002 loss=2.753, ppl=6.74, wps=5782, ups=0.09, wpb=64797, bsz=128, num_updates=5698, lr=9.99624e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=64582 2021-06-19 12:35:19 | INFO | train_inner | epoch 002: 2734 / 3002 loss=2.708, ppl=6.53, wps=5776, ups=0.09, wpb=64842, bsz=128, num_updates=5699, lr=9.99624e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=64594 2021-06-19 12:35:30 | INFO | train_inner | epoch 002: 2735 / 3002 loss=2.703, ppl=6.51, wps=5880.1, ups=0.09, wpb=64863, bsz=128, num_updates=5700, lr=9.99624e-05, gnorm=2.281, loss_scale=4, train_wall=11, gb_free=2.8, wall=64605 2021-06-19 12:35:42 | INFO | train_inner | epoch 002: 2736 / 3002 loss=2.642, ppl=6.24, wps=5727.9, ups=0.09, wpb=64784, bsz=128, num_updates=5701, lr=9.99624e-05, gnorm=2.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=64616 2021-06-19 12:35:53 | INFO | train_inner | epoch 002: 2737 / 3002 loss=2.653, ppl=6.29, wps=5768.1, ups=0.09, wpb=64840, bsz=128, num_updates=5702, lr=9.99624e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=64627 2021-06-19 12:36:04 | INFO | train_inner | epoch 002: 2738 / 3002 loss=2.759, ppl=6.77, wps=5844.6, ups=0.09, wpb=64807, bsz=128, num_updates=5703, lr=9.99624e-05, gnorm=2.252, loss_scale=4, train_wall=11, gb_free=2.8, wall=64638 2021-06-19 12:36:15 | INFO | train_inner | epoch 002: 2739 / 3002 loss=2.508, ppl=5.69, wps=5962.2, ups=0.09, wpb=64809, bsz=128, num_updates=5704, lr=9.99624e-05, gnorm=2.392, loss_scale=4, train_wall=10, gb_free=2.8, wall=64649 2021-06-19 12:36:26 | INFO | train_inner | epoch 002: 2740 / 3002 loss=2.54, ppl=5.81, wps=5816, ups=0.09, wpb=64874, bsz=128, num_updates=5705, lr=9.99624e-05, gnorm=2.116, loss_scale=4, train_wall=11, gb_free=2.8, wall=64660 2021-06-19 12:36:37 | INFO | train_inner | epoch 002: 2741 / 3002 loss=2.746, ppl=6.71, wps=5816.7, ups=0.09, wpb=64770, bsz=128, num_updates=5706, lr=9.99623e-05, gnorm=2.123, loss_scale=4, train_wall=11, gb_free=2.8, wall=64671 2021-06-19 12:36:48 | INFO | train_inner | epoch 002: 2742 / 3002 loss=2.794, ppl=6.94, wps=5886.8, ups=0.09, wpb=64800, bsz=128, num_updates=5707, lr=9.99623e-05, gnorm=2.175, loss_scale=4, train_wall=11, gb_free=2.8, wall=64682 2021-06-19 12:36:59 | INFO | train_inner | epoch 002: 2743 / 3002 loss=2.586, ppl=6, wps=5761.6, ups=0.09, wpb=64821, bsz=128, num_updates=5708, lr=9.99623e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=64694 2021-06-19 12:37:10 | INFO | train_inner | epoch 002: 2744 / 3002 loss=2.727, ppl=6.62, wps=5881.9, ups=0.09, wpb=64808, bsz=128, num_updates=5709, lr=9.99623e-05, gnorm=2.182, loss_scale=4, train_wall=11, gb_free=2.8, wall=64705 2021-06-19 12:37:21 | INFO | train_inner | epoch 002: 2745 / 3002 loss=2.884, ppl=7.38, wps=5851, ups=0.09, wpb=64806, bsz=128, num_updates=5710, lr=9.99623e-05, gnorm=2.155, loss_scale=4, train_wall=11, gb_free=2.8, wall=64716 2021-06-19 12:37:32 | INFO | train_inner | epoch 002: 2746 / 3002 loss=2.742, ppl=6.69, wps=5986.4, ups=0.09, wpb=64781, bsz=128, num_updates=5711, lr=9.99623e-05, gnorm=3.145, loss_scale=4, train_wall=10, gb_free=2.8, wall=64727 2021-06-19 12:37:43 | INFO | train_inner | epoch 002: 2747 / 3002 loss=2.562, ppl=5.9, wps=5878, ups=0.09, wpb=64840, bsz=128, num_updates=5712, lr=9.99623e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=64738 2021-06-19 12:37:54 | INFO | train_inner | epoch 002: 2748 / 3002 loss=2.605, ppl=6.08, wps=5867.9, ups=0.09, wpb=64880, bsz=128, num_updates=5713, lr=9.99623e-05, gnorm=2.348, loss_scale=4, train_wall=11, gb_free=2.8, wall=64749 2021-06-19 12:38:05 | INFO | train_inner | epoch 002: 2749 / 3002 loss=2.792, ppl=6.93, wps=5786.2, ups=0.09, wpb=64821, bsz=128, num_updates=5714, lr=9.99623e-05, gnorm=2.333, loss_scale=4, train_wall=11, gb_free=2.8, wall=64760 2021-06-19 12:38:17 | INFO | train_inner | epoch 002: 2750 / 3002 loss=2.771, ppl=6.82, wps=5787.2, ups=0.09, wpb=64825, bsz=128, num_updates=5715, lr=9.99623e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=64771 2021-06-19 12:38:28 | INFO | train_inner | epoch 002: 2751 / 3002 loss=2.681, ppl=6.41, wps=5766.5, ups=0.09, wpb=64773, bsz=128, num_updates=5716, lr=9.99623e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=64782 2021-06-19 12:38:39 | INFO | train_inner | epoch 002: 2752 / 3002 loss=2.735, ppl=6.66, wps=5929.5, ups=0.09, wpb=64848, bsz=128, num_updates=5717, lr=9.99623e-05, gnorm=2.197, loss_scale=4, train_wall=10, gb_free=2.8, wall=64793 2021-06-19 12:38:50 | INFO | train_inner | epoch 002: 2753 / 3002 loss=2.787, ppl=6.9, wps=5820.1, ups=0.09, wpb=64868, bsz=128, num_updates=5718, lr=9.99623e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=64804 2021-06-19 12:39:01 | INFO | train_inner | epoch 002: 2754 / 3002 loss=2.804, ppl=6.98, wps=5789.6, ups=0.09, wpb=64783, bsz=128, num_updates=5719, lr=9.99622e-05, gnorm=2.073, loss_scale=4, train_wall=11, gb_free=2.8, wall=64816 2021-06-19 12:39:12 | INFO | train_inner | epoch 002: 2755 / 3002 loss=2.708, ppl=6.53, wps=5917.7, ups=0.09, wpb=64877, bsz=128, num_updates=5720, lr=9.99622e-05, gnorm=3.864, loss_scale=4, train_wall=11, gb_free=2.8, wall=64826 2021-06-19 12:39:23 | INFO | train_inner | epoch 002: 2756 / 3002 loss=2.86, ppl=7.26, wps=5903, ups=0.09, wpb=64857, bsz=128, num_updates=5721, lr=9.99622e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=64837 2021-06-19 12:39:34 | INFO | train_inner | epoch 002: 2757 / 3002 loss=2.712, ppl=6.55, wps=5792.8, ups=0.09, wpb=64934, bsz=128, num_updates=5722, lr=9.99622e-05, gnorm=2.142, loss_scale=4, train_wall=11, gb_free=2.8, wall=64849 2021-06-19 12:39:46 | INFO | train_inner | epoch 002: 2758 / 3002 loss=2.579, ppl=5.97, wps=5773.1, ups=0.09, wpb=64831, bsz=128, num_updates=5723, lr=9.99622e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=64860 2021-06-19 12:39:56 | INFO | train_inner | epoch 002: 2759 / 3002 loss=2.799, ppl=6.96, wps=5964.2, ups=0.09, wpb=64863, bsz=128, num_updates=5724, lr=9.99622e-05, gnorm=2.303, loss_scale=4, train_wall=10, gb_free=2.8, wall=64871 2021-06-19 12:40:08 | INFO | train_inner | epoch 002: 2760 / 3002 loss=2.803, ppl=6.98, wps=5818.3, ups=0.09, wpb=64734, bsz=128, num_updates=5725, lr=9.99622e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=64882 2021-06-19 12:40:19 | INFO | train_inner | epoch 002: 2761 / 3002 loss=2.792, ppl=6.93, wps=5785, ups=0.09, wpb=64752, bsz=128, num_updates=5726, lr=9.99622e-05, gnorm=2.796, loss_scale=4, train_wall=11, gb_free=2.8, wall=64893 2021-06-19 12:40:30 | INFO | train_inner | epoch 002: 2762 / 3002 loss=2.717, ppl=6.57, wps=5835.7, ups=0.09, wpb=64864, bsz=128, num_updates=5727, lr=9.99622e-05, gnorm=2.17, loss_scale=4, train_wall=11, gb_free=2.8, wall=64904 2021-06-19 12:40:41 | INFO | train_inner | epoch 002: 2763 / 3002 loss=2.758, ppl=6.76, wps=5915, ups=0.09, wpb=64847, bsz=128, num_updates=5728, lr=9.99622e-05, gnorm=2.23, loss_scale=4, train_wall=10, gb_free=2.8, wall=64915 2021-06-19 12:40:52 | INFO | train_inner | epoch 002: 2764 / 3002 loss=2.789, ppl=6.91, wps=5932.1, ups=0.09, wpb=64790, bsz=128, num_updates=5729, lr=9.99622e-05, gnorm=2.204, loss_scale=4, train_wall=10, gb_free=2.8, wall=64926 2021-06-19 12:41:03 | INFO | train_inner | epoch 002: 2765 / 3002 loss=2.768, ppl=6.81, wps=5847.2, ups=0.09, wpb=64861, bsz=128, num_updates=5730, lr=9.99622e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=64937 2021-06-19 12:41:14 | INFO | train_inner | epoch 002: 2766 / 3002 loss=2.852, ppl=7.22, wps=5913.7, ups=0.09, wpb=64782, bsz=128, num_updates=5731, lr=9.99621e-05, gnorm=2.21, loss_scale=4, train_wall=10, gb_free=2.8, wall=64948 2021-06-19 12:41:25 | INFO | train_inner | epoch 002: 2767 / 3002 loss=2.734, ppl=6.65, wps=5859, ups=0.09, wpb=64815, bsz=128, num_updates=5732, lr=9.99621e-05, gnorm=2.101, loss_scale=4, train_wall=11, gb_free=2.8, wall=64959 2021-06-19 12:41:36 | INFO | train_inner | epoch 002: 2768 / 3002 loss=2.698, ppl=6.49, wps=5942.5, ups=0.09, wpb=64845, bsz=128, num_updates=5733, lr=9.99621e-05, gnorm=2.251, loss_scale=4, train_wall=10, gb_free=2.8, wall=64970 2021-06-19 12:41:47 | INFO | train_inner | epoch 002: 2769 / 3002 loss=2.724, ppl=6.61, wps=5750.9, ups=0.09, wpb=64825, bsz=128, num_updates=5734, lr=9.99621e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=64981 2021-06-19 12:41:58 | INFO | train_inner | epoch 002: 2770 / 3002 loss=2.524, ppl=5.75, wps=5880.4, ups=0.09, wpb=64767, bsz=128, num_updates=5735, lr=9.99621e-05, gnorm=2.112, loss_scale=4, train_wall=11, gb_free=2.8, wall=64992 2021-06-19 12:42:09 | INFO | train_inner | epoch 002: 2771 / 3002 loss=2.58, ppl=5.98, wps=5889.5, ups=0.09, wpb=64859, bsz=128, num_updates=5736, lr=9.99621e-05, gnorm=2.181, loss_scale=4, train_wall=11, gb_free=2.8, wall=65003 2021-06-19 12:42:20 | INFO | train_inner | epoch 002: 2772 / 3002 loss=2.723, ppl=6.6, wps=5841.5, ups=0.09, wpb=64813, bsz=128, num_updates=5737, lr=9.99621e-05, gnorm=2.782, loss_scale=4, train_wall=11, gb_free=2.8, wall=65015 2021-06-19 12:42:31 | INFO | train_inner | epoch 002: 2773 / 3002 loss=2.546, ppl=5.84, wps=5753.8, ups=0.09, wpb=64773, bsz=128, num_updates=5738, lr=9.99621e-05, gnorm=2.422, loss_scale=4, train_wall=11, gb_free=2.8, wall=65026 2021-06-19 12:42:43 | INFO | train_inner | epoch 002: 2774 / 3002 loss=2.771, ppl=6.83, wps=5746, ups=0.09, wpb=64854, bsz=128, num_updates=5739, lr=9.99621e-05, gnorm=2.102, loss_scale=4, train_wall=11, gb_free=2.8, wall=65037 2021-06-19 12:42:54 | INFO | train_inner | epoch 002: 2775 / 3002 loss=2.749, ppl=6.72, wps=5844.6, ups=0.09, wpb=64741, bsz=128, num_updates=5740, lr=9.99621e-05, gnorm=2.209, loss_scale=4, train_wall=11, gb_free=2.8, wall=65048 2021-06-19 12:43:05 | INFO | train_inner | epoch 002: 2776 / 3002 loss=2.67, ppl=6.37, wps=5805, ups=0.09, wpb=64809, bsz=128, num_updates=5741, lr=9.99621e-05, gnorm=2.302, loss_scale=4, train_wall=11, gb_free=2.8, wall=65059 2021-06-19 12:43:16 | INFO | train_inner | epoch 002: 2777 / 3002 loss=2.646, ppl=6.26, wps=5761.4, ups=0.09, wpb=64872, bsz=128, num_updates=5742, lr=9.99621e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=65071 2021-06-19 12:43:27 | INFO | train_inner | epoch 002: 2778 / 3002 loss=2.761, ppl=6.78, wps=5827.6, ups=0.09, wpb=64784, bsz=128, num_updates=5743, lr=9.99621e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=65082 2021-06-19 12:43:39 | INFO | train_inner | epoch 002: 2779 / 3002 loss=2.652, ppl=6.29, wps=5738.8, ups=0.09, wpb=64808, bsz=128, num_updates=5744, lr=9.9962e-05, gnorm=2.125, loss_scale=4, train_wall=11, gb_free=2.8, wall=65093 2021-06-19 12:43:50 | INFO | train_inner | epoch 002: 2780 / 3002 loss=2.693, ppl=6.46, wps=5855.3, ups=0.09, wpb=64860, bsz=128, num_updates=5745, lr=9.9962e-05, gnorm=2.119, loss_scale=4, train_wall=11, gb_free=2.8, wall=65104 2021-06-19 12:44:01 | INFO | train_inner | epoch 002: 2781 / 3002 loss=2.638, ppl=6.22, wps=5908.1, ups=0.09, wpb=64839, bsz=128, num_updates=5746, lr=9.9962e-05, gnorm=2.365, loss_scale=4, train_wall=11, gb_free=2.8, wall=65115 2021-06-19 12:44:12 | INFO | train_inner | epoch 002: 2782 / 3002 loss=2.628, ppl=6.18, wps=5726.6, ups=0.09, wpb=64874, bsz=128, num_updates=5747, lr=9.9962e-05, gnorm=2.674, loss_scale=4, train_wall=11, gb_free=2.8, wall=65126 2021-06-19 12:44:23 | INFO | train_inner | epoch 002: 2783 / 3002 loss=2.565, ppl=5.92, wps=5997.2, ups=0.09, wpb=64820, bsz=128, num_updates=5748, lr=9.9962e-05, gnorm=2.26, loss_scale=4, train_wall=10, gb_free=2.8, wall=65137 2021-06-19 12:44:34 | INFO | train_inner | epoch 002: 2784 / 3002 loss=2.791, ppl=6.92, wps=5960.5, ups=0.09, wpb=64811, bsz=128, num_updates=5749, lr=9.9962e-05, gnorm=2.355, loss_scale=4, train_wall=10, gb_free=2.8, wall=65148 2021-06-19 12:44:45 | INFO | train_inner | epoch 002: 2785 / 3002 loss=2.679, ppl=6.41, wps=5784.5, ups=0.09, wpb=64789, bsz=128, num_updates=5750, lr=9.9962e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=65159 2021-06-19 12:44:56 | INFO | train_inner | epoch 002: 2786 / 3002 loss=2.715, ppl=6.57, wps=5849.2, ups=0.09, wpb=64864, bsz=128, num_updates=5751, lr=9.9962e-05, gnorm=2.241, loss_scale=4, train_wall=11, gb_free=2.8, wall=65170 2021-06-19 12:45:07 | INFO | train_inner | epoch 002: 2787 / 3002 loss=2.784, ppl=6.89, wps=5817.3, ups=0.09, wpb=64891, bsz=128, num_updates=5752, lr=9.9962e-05, gnorm=2.203, loss_scale=4, train_wall=11, gb_free=2.8, wall=65181 2021-06-19 12:45:18 | INFO | train_inner | epoch 002: 2788 / 3002 loss=2.737, ppl=6.67, wps=5872.5, ups=0.09, wpb=64867, bsz=128, num_updates=5753, lr=9.9962e-05, gnorm=2.15, loss_scale=4, train_wall=11, gb_free=2.8, wall=65193 2021-06-19 12:45:29 | INFO | train_inner | epoch 002: 2789 / 3002 loss=2.732, ppl=6.64, wps=5836.5, ups=0.09, wpb=64818, bsz=128, num_updates=5754, lr=9.9962e-05, gnorm=10.28, loss_scale=4, train_wall=11, gb_free=2.8, wall=65204 2021-06-19 12:45:41 | INFO | train_inner | epoch 002: 2790 / 3002 loss=2.818, ppl=7.05, wps=5796.8, ups=0.09, wpb=64842, bsz=128, num_updates=5755, lr=9.9962e-05, gnorm=2.15, loss_scale=4, train_wall=11, gb_free=2.8, wall=65215 2021-06-19 12:45:51 | INFO | train_inner | epoch 002: 2791 / 3002 loss=2.657, ppl=6.31, wps=5912.6, ups=0.09, wpb=64820, bsz=128, num_updates=5756, lr=9.99619e-05, gnorm=4.831, loss_scale=4, train_wall=11, gb_free=2.8, wall=65226 2021-06-19 12:46:03 | INFO | train_inner | epoch 002: 2792 / 3002 loss=2.592, ppl=6.03, wps=5854.1, ups=0.09, wpb=64828, bsz=128, num_updates=5757, lr=9.99619e-05, gnorm=2.136, loss_scale=4, train_wall=11, gb_free=2.8, wall=65237 2021-06-19 12:46:14 | INFO | train_inner | epoch 002: 2793 / 3002 loss=2.827, ppl=7.09, wps=5908.6, ups=0.09, wpb=64798, bsz=128, num_updates=5758, lr=9.99619e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=65248 2021-06-19 12:46:25 | INFO | train_inner | epoch 002: 2794 / 3002 loss=2.616, ppl=6.13, wps=5662.2, ups=0.09, wpb=64769, bsz=128, num_updates=5759, lr=9.99619e-05, gnorm=2.215, loss_scale=4, train_wall=11, gb_free=2.8, wall=65259 2021-06-19 12:46:36 | INFO | train_inner | epoch 002: 2795 / 3002 loss=2.889, ppl=7.41, wps=5809.3, ups=0.09, wpb=64773, bsz=128, num_updates=5760, lr=9.99619e-05, gnorm=2.432, loss_scale=4, train_wall=11, gb_free=2.8, wall=65270 2021-06-19 12:46:47 | INFO | train_inner | epoch 002: 2796 / 3002 loss=2.721, ppl=6.59, wps=5928.5, ups=0.09, wpb=64938, bsz=128, num_updates=5761, lr=9.99619e-05, gnorm=2.106, loss_scale=4, train_wall=10, gb_free=2.8, wall=65281 2021-06-19 12:46:58 | INFO | train_inner | epoch 002: 2797 / 3002 loss=2.838, ppl=7.15, wps=5832.9, ups=0.09, wpb=64804, bsz=128, num_updates=5762, lr=9.99619e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=65292 2021-06-19 12:47:09 | INFO | train_inner | epoch 002: 2798 / 3002 loss=2.689, ppl=6.45, wps=5858.3, ups=0.09, wpb=64820, bsz=128, num_updates=5763, lr=9.99619e-05, gnorm=3.523, loss_scale=4, train_wall=11, gb_free=2.8, wall=65304 2021-06-19 12:47:20 | INFO | train_inner | epoch 002: 2799 / 3002 loss=2.606, ppl=6.09, wps=5820, ups=0.09, wpb=64819, bsz=128, num_updates=5764, lr=9.99619e-05, gnorm=2.098, loss_scale=4, train_wall=11, gb_free=2.8, wall=65315 2021-06-19 12:47:31 | INFO | train_inner | epoch 002: 2800 / 3002 loss=2.74, ppl=6.68, wps=6033.3, ups=0.09, wpb=64877, bsz=128, num_updates=5765, lr=9.99619e-05, gnorm=2.231, loss_scale=4, train_wall=10, gb_free=2.8, wall=65325 2021-06-19 12:47:42 | INFO | train_inner | epoch 002: 2801 / 3002 loss=2.687, ppl=6.44, wps=6013.5, ups=0.09, wpb=64932, bsz=128, num_updates=5766, lr=9.99619e-05, gnorm=2.267, loss_scale=4, train_wall=10, gb_free=2.8, wall=65336 2021-06-19 12:47:53 | INFO | train_inner | epoch 002: 2802 / 3002 loss=2.66, ppl=6.32, wps=5742, ups=0.09, wpb=64713, bsz=128, num_updates=5767, lr=9.99619e-05, gnorm=2.274, loss_scale=4, train_wall=11, gb_free=2.8, wall=65348 2021-06-19 12:48:04 | INFO | train_inner | epoch 002: 2803 / 3002 loss=2.738, ppl=6.67, wps=5965.1, ups=0.09, wpb=64778, bsz=128, num_updates=5768, lr=9.99619e-05, gnorm=2.258, loss_scale=4, train_wall=10, gb_free=2.8, wall=65358 2021-06-19 12:48:15 | INFO | train_inner | epoch 002: 2804 / 3002 loss=2.729, ppl=6.63, wps=5929.6, ups=0.09, wpb=64761, bsz=128, num_updates=5769, lr=9.99618e-05, gnorm=2.174, loss_scale=4, train_wall=10, gb_free=2.8, wall=65369 2021-06-19 12:48:26 | INFO | train_inner | epoch 002: 2805 / 3002 loss=2.709, ppl=6.54, wps=5971.8, ups=0.09, wpb=64823, bsz=128, num_updates=5770, lr=9.99618e-05, gnorm=2.283, loss_scale=4, train_wall=10, gb_free=2.8, wall=65380 2021-06-19 12:48:37 | INFO | train_inner | epoch 002: 2806 / 3002 loss=2.735, ppl=6.66, wps=5934.3, ups=0.09, wpb=64927, bsz=128, num_updates=5771, lr=9.99618e-05, gnorm=2.511, loss_scale=4, train_wall=10, gb_free=2.8, wall=65391 2021-06-19 12:48:48 | INFO | train_inner | epoch 002: 2807 / 3002 loss=2.695, ppl=6.48, wps=5834.1, ups=0.09, wpb=64752, bsz=128, num_updates=5772, lr=9.99618e-05, gnorm=2.421, loss_scale=4, train_wall=11, gb_free=2.8, wall=65402 2021-06-19 12:48:59 | INFO | train_inner | epoch 002: 2808 / 3002 loss=2.75, ppl=6.73, wps=5756.2, ups=0.09, wpb=64818, bsz=128, num_updates=5773, lr=9.99618e-05, gnorm=2.109, loss_scale=4, train_wall=11, gb_free=2.8, wall=65413 2021-06-19 12:49:10 | INFO | train_inner | epoch 002: 2809 / 3002 loss=2.573, ppl=5.95, wps=5868.6, ups=0.09, wpb=64802, bsz=128, num_updates=5774, lr=9.99618e-05, gnorm=2.117, loss_scale=4, train_wall=11, gb_free=2.8, wall=65424 2021-06-19 12:49:21 | INFO | train_inner | epoch 002: 2810 / 3002 loss=2.797, ppl=6.95, wps=5891, ups=0.09, wpb=64794, bsz=128, num_updates=5775, lr=9.99618e-05, gnorm=2.334, loss_scale=4, train_wall=11, gb_free=2.8, wall=65435 2021-06-19 12:49:32 | INFO | train_inner | epoch 002: 2811 / 3002 loss=2.676, ppl=6.39, wps=5873, ups=0.09, wpb=64869, bsz=128, num_updates=5776, lr=9.99618e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=65447 2021-06-19 12:49:43 | INFO | train_inner | epoch 002: 2812 / 3002 loss=2.686, ppl=6.44, wps=5819.6, ups=0.09, wpb=64781, bsz=128, num_updates=5777, lr=9.99618e-05, gnorm=2.163, loss_scale=4, train_wall=11, gb_free=2.8, wall=65458 2021-06-19 12:49:55 | INFO | train_inner | epoch 002: 2813 / 3002 loss=2.818, ppl=7.05, wps=5751.7, ups=0.09, wpb=64780, bsz=128, num_updates=5778, lr=9.99618e-05, gnorm=2.184, loss_scale=4, train_wall=11, gb_free=2.8, wall=65469 2021-06-19 12:50:06 | INFO | train_inner | epoch 002: 2814 / 3002 loss=2.863, ppl=7.27, wps=5884.1, ups=0.09, wpb=64912, bsz=128, num_updates=5779, lr=9.99618e-05, gnorm=2.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=65480 2021-06-19 12:50:17 | INFO | train_inner | epoch 002: 2815 / 3002 loss=2.676, ppl=6.39, wps=5774.2, ups=0.09, wpb=64885, bsz=128, num_updates=5780, lr=9.99618e-05, gnorm=2.203, loss_scale=4, train_wall=11, gb_free=2.8, wall=65491 2021-06-19 12:50:28 | INFO | train_inner | epoch 002: 2816 / 3002 loss=2.628, ppl=6.18, wps=5831.5, ups=0.09, wpb=64899, bsz=128, num_updates=5781, lr=9.99617e-05, gnorm=2.263, loss_scale=4, train_wall=11, gb_free=2.8, wall=65502 2021-06-19 12:50:39 | INFO | train_inner | epoch 002: 2817 / 3002 loss=2.635, ppl=6.21, wps=5766.5, ups=0.09, wpb=64811, bsz=128, num_updates=5782, lr=9.99617e-05, gnorm=2.135, loss_scale=4, train_wall=11, gb_free=2.8, wall=65514 2021-06-19 12:50:50 | INFO | train_inner | epoch 002: 2818 / 3002 loss=2.746, ppl=6.71, wps=5884.6, ups=0.09, wpb=64849, bsz=128, num_updates=5783, lr=9.99617e-05, gnorm=2.764, loss_scale=4, train_wall=11, gb_free=2.8, wall=65525 2021-06-19 12:51:01 | INFO | train_inner | epoch 002: 2819 / 3002 loss=2.789, ppl=6.91, wps=5889.5, ups=0.09, wpb=64837, bsz=128, num_updates=5784, lr=9.99617e-05, gnorm=2.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=65536 2021-06-19 12:51:12 | INFO | train_inner | epoch 002: 2820 / 3002 loss=2.883, ppl=7.38, wps=5863.1, ups=0.09, wpb=64792, bsz=128, num_updates=5785, lr=9.99617e-05, gnorm=2.484, loss_scale=4, train_wall=11, gb_free=2.8, wall=65547 2021-06-19 12:51:23 | INFO | train_inner | epoch 002: 2821 / 3002 loss=2.713, ppl=6.56, wps=5845.4, ups=0.09, wpb=64833, bsz=128, num_updates=5786, lr=9.99617e-05, gnorm=2.507, loss_scale=4, train_wall=11, gb_free=2.8, wall=65558 2021-06-19 12:51:34 | INFO | train_inner | epoch 002: 2822 / 3002 loss=2.673, ppl=6.38, wps=5881, ups=0.09, wpb=64830, bsz=128, num_updates=5787, lr=9.99617e-05, gnorm=4.293, loss_scale=4, train_wall=11, gb_free=2.8, wall=65569 2021-06-19 12:51:46 | INFO | train_inner | epoch 002: 2823 / 3002 loss=2.658, ppl=6.31, wps=5839, ups=0.09, wpb=64872, bsz=128, num_updates=5788, lr=9.99617e-05, gnorm=2.289, loss_scale=4, train_wall=11, gb_free=2.8, wall=65580 2021-06-19 12:51:57 | INFO | train_inner | epoch 002: 2824 / 3002 loss=2.677, ppl=6.39, wps=5875.4, ups=0.09, wpb=64848, bsz=128, num_updates=5789, lr=9.99617e-05, gnorm=7.933, loss_scale=4, train_wall=11, gb_free=2.8, wall=65591 2021-06-19 12:52:08 | INFO | train_inner | epoch 002: 2825 / 3002 loss=2.792, ppl=6.92, wps=5804.8, ups=0.09, wpb=64682, bsz=128, num_updates=5790, lr=9.99617e-05, gnorm=2.52, loss_scale=4, train_wall=11, gb_free=2.8, wall=65602 2021-06-19 12:52:19 | INFO | train_inner | epoch 002: 2826 / 3002 loss=2.582, ppl=5.99, wps=5762.8, ups=0.09, wpb=64830, bsz=128, num_updates=5791, lr=9.99617e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=65613 2021-06-19 12:52:30 | INFO | train_inner | epoch 002: 2827 / 3002 loss=2.835, ppl=7.14, wps=5875.6, ups=0.09, wpb=64833, bsz=128, num_updates=5792, lr=9.99617e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=65624 2021-06-19 12:52:41 | INFO | train_inner | epoch 002: 2828 / 3002 loss=2.56, ppl=5.9, wps=5816, ups=0.09, wpb=64869, bsz=128, num_updates=5793, lr=9.99617e-05, gnorm=2.156, loss_scale=4, train_wall=11, gb_free=2.8, wall=65636 2021-06-19 12:52:52 | INFO | train_inner | epoch 002: 2829 / 3002 loss=2.69, ppl=6.45, wps=5884.9, ups=0.09, wpb=64804, bsz=128, num_updates=5794, lr=9.99616e-05, gnorm=2.295, loss_scale=4, train_wall=11, gb_free=2.8, wall=65647 2021-06-19 12:53:03 | INFO | train_inner | epoch 002: 2830 / 3002 loss=2.546, ppl=5.84, wps=5826.4, ups=0.09, wpb=64828, bsz=128, num_updates=5795, lr=9.99616e-05, gnorm=2.172, loss_scale=4, train_wall=11, gb_free=2.8, wall=65658 2021-06-19 12:53:14 | INFO | train_inner | epoch 002: 2831 / 3002 loss=2.63, ppl=6.19, wps=5922.2, ups=0.09, wpb=64793, bsz=128, num_updates=5796, lr=9.99616e-05, gnorm=2.11, loss_scale=8, train_wall=10, gb_free=2.8, wall=65669 2021-06-19 12:53:25 | INFO | train_inner | epoch 002: 2832 / 3002 loss=2.733, ppl=6.65, wps=5776.4, ups=0.09, wpb=64773, bsz=128, num_updates=5797, lr=9.99616e-05, gnorm=4.661, loss_scale=8, train_wall=11, gb_free=2.8, wall=65680 2021-06-19 12:53:37 | INFO | train_inner | epoch 002: 2833 / 3002 loss=2.716, ppl=6.57, wps=5802.9, ups=0.09, wpb=64827, bsz=128, num_updates=5798, lr=9.99616e-05, gnorm=2.206, loss_scale=8, train_wall=11, gb_free=2.8, wall=65691 2021-06-19 12:53:48 | INFO | train_inner | epoch 002: 2834 / 3002 loss=2.632, ppl=6.2, wps=5923.9, ups=0.09, wpb=64827, bsz=128, num_updates=5799, lr=9.99616e-05, gnorm=2.172, loss_scale=8, train_wall=11, gb_free=2.8, wall=65702 2021-06-19 12:53:59 | INFO | train_inner | epoch 002: 2835 / 3002 loss=2.825, ppl=7.09, wps=5834.5, ups=0.09, wpb=64758, bsz=128, num_updates=5800, lr=9.99616e-05, gnorm=2.202, loss_scale=8, train_wall=11, gb_free=2.8, wall=65713 2021-06-19 12:54:10 | INFO | train_inner | epoch 002: 2836 / 3002 loss=2.705, ppl=6.52, wps=5833.8, ups=0.09, wpb=64771, bsz=128, num_updates=5801, lr=9.99616e-05, gnorm=2.213, loss_scale=8, train_wall=11, gb_free=2.8, wall=65724 2021-06-19 12:54:21 | INFO | train_inner | epoch 002: 2837 / 3002 loss=2.669, ppl=6.36, wps=5874.5, ups=0.09, wpb=64771, bsz=128, num_updates=5802, lr=9.99616e-05, gnorm=2.304, loss_scale=8, train_wall=11, gb_free=2.8, wall=65735 2021-06-19 12:54:32 | INFO | train_inner | epoch 002: 2838 / 3002 loss=2.584, ppl=5.99, wps=5798.2, ups=0.09, wpb=64858, bsz=128, num_updates=5803, lr=9.99616e-05, gnorm=2.24, loss_scale=8, train_wall=11, gb_free=2.8, wall=65746 2021-06-19 12:54:43 | INFO | train_inner | epoch 002: 2839 / 3002 loss=2.702, ppl=6.51, wps=5803.6, ups=0.09, wpb=64779, bsz=128, num_updates=5804, lr=9.99616e-05, gnorm=2.368, loss_scale=8, train_wall=11, gb_free=2.8, wall=65757 2021-06-19 12:54:54 | INFO | train_inner | epoch 002: 2840 / 3002 loss=2.764, ppl=6.79, wps=5784.7, ups=0.09, wpb=64792, bsz=128, num_updates=5805, lr=9.99616e-05, gnorm=3.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=65769 2021-06-19 12:55:05 | INFO | train_inner | epoch 002: 2841 / 3002 loss=2.69, ppl=6.45, wps=5925.7, ups=0.09, wpb=64833, bsz=128, num_updates=5806, lr=9.99615e-05, gnorm=2.249, loss_scale=8, train_wall=10, gb_free=2.8, wall=65780 2021-06-19 12:55:16 | INFO | train_inner | epoch 002: 2842 / 3002 loss=2.638, ppl=6.22, wps=5899.4, ups=0.09, wpb=64929, bsz=128, num_updates=5807, lr=9.99615e-05, gnorm=2.397, loss_scale=8, train_wall=11, gb_free=2.8, wall=65791 2021-06-19 12:55:27 | INFO | train_inner | epoch 002: 2843 / 3002 loss=2.712, ppl=6.55, wps=5841.6, ups=0.09, wpb=64805, bsz=128, num_updates=5808, lr=9.99615e-05, gnorm=2.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=65802 2021-06-19 12:55:38 | INFO | train_inner | epoch 002: 2844 / 3002 loss=2.626, ppl=6.17, wps=5883, ups=0.09, wpb=64917, bsz=128, num_updates=5809, lr=9.99615e-05, gnorm=2.173, loss_scale=8, train_wall=11, gb_free=2.8, wall=65813 2021-06-19 12:55:50 | INFO | train_inner | epoch 002: 2845 / 3002 loss=2.929, ppl=7.62, wps=5805.5, ups=0.09, wpb=64721, bsz=128, num_updates=5810, lr=9.99615e-05, gnorm=2.238, loss_scale=8, train_wall=11, gb_free=2.8, wall=65824 2021-06-19 12:56:01 | INFO | train_inner | epoch 002: 2846 / 3002 loss=2.61, ppl=6.11, wps=5753.9, ups=0.09, wpb=64827, bsz=128, num_updates=5811, lr=9.99615e-05, gnorm=2.221, loss_scale=8, train_wall=11, gb_free=2.8, wall=65835 2021-06-19 12:56:12 | INFO | train_inner | epoch 002: 2847 / 3002 loss=2.824, ppl=7.08, wps=5874.4, ups=0.09, wpb=64831, bsz=128, num_updates=5812, lr=9.99615e-05, gnorm=2.229, loss_scale=8, train_wall=11, gb_free=2.8, wall=65846 2021-06-19 12:56:23 | INFO | train_inner | epoch 002: 2848 / 3002 loss=2.762, ppl=6.78, wps=5823.3, ups=0.09, wpb=64812, bsz=128, num_updates=5813, lr=9.99615e-05, gnorm=2.214, loss_scale=8, train_wall=11, gb_free=2.8, wall=65857 2021-06-19 12:56:34 | INFO | train_inner | epoch 002: 2849 / 3002 loss=2.577, ppl=5.96, wps=5805.9, ups=0.09, wpb=64885, bsz=128, num_updates=5814, lr=9.99615e-05, gnorm=2.336, loss_scale=8, train_wall=11, gb_free=2.8, wall=65869 2021-06-19 12:56:45 | INFO | train_inner | epoch 002: 2850 / 3002 loss=2.764, ppl=6.79, wps=5904.9, ups=0.09, wpb=64858, bsz=128, num_updates=5815, lr=9.99615e-05, gnorm=2.226, loss_scale=8, train_wall=11, gb_free=2.8, wall=65880 2021-06-19 12:56:56 | INFO | train_inner | epoch 002: 2851 / 3002 loss=2.846, ppl=7.19, wps=5764.9, ups=0.09, wpb=64729, bsz=128, num_updates=5816, lr=9.99615e-05, gnorm=2.559, loss_scale=8, train_wall=11, gb_free=2.8, wall=65891 2021-06-19 12:57:08 | INFO | train_inner | epoch 002: 2852 / 3002 loss=2.89, ppl=7.41, wps=5843.7, ups=0.09, wpb=64948, bsz=128, num_updates=5817, lr=9.99615e-05, gnorm=2.177, loss_scale=8, train_wall=11, gb_free=2.8, wall=65902 2021-06-19 12:57:18 | INFO | train_inner | epoch 002: 2853 / 3002 loss=2.882, ppl=7.37, wps=5932.4, ups=0.09, wpb=64834, bsz=128, num_updates=5818, lr=9.99615e-05, gnorm=2.126, loss_scale=8, train_wall=10, gb_free=2.8, wall=65913 2021-06-19 12:57:30 | INFO | train_inner | epoch 002: 2854 / 3002 loss=2.783, ppl=6.88, wps=5824.8, ups=0.09, wpb=64890, bsz=128, num_updates=5819, lr=9.99614e-05, gnorm=2.134, loss_scale=8, train_wall=11, gb_free=2.8, wall=65924 2021-06-19 12:57:41 | INFO | train_inner | epoch 002: 2855 / 3002 loss=2.723, ppl=6.6, wps=5907.5, ups=0.09, wpb=64803, bsz=128, num_updates=5820, lr=9.99614e-05, gnorm=2.187, loss_scale=8, train_wall=10, gb_free=2.8, wall=65935 2021-06-19 12:57:51 | INFO | train_inner | epoch 002: 2856 / 3002 loss=2.727, ppl=6.62, wps=5966.1, ups=0.09, wpb=64831, bsz=128, num_updates=5821, lr=9.99614e-05, gnorm=2.134, loss_scale=8, train_wall=10, gb_free=2.8, wall=65946 2021-06-19 12:58:02 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-19 12:58:14 | INFO | train_inner | epoch 002: 2858 / 3002 loss=2.746, ppl=6.71, wps=2929.3, ups=0.05, wpb=64786, bsz=128, num_updates=5822, lr=9.99614e-05, gnorm=2.631, loss_scale=4, train_wall=21, gb_free=2.8, wall=65968 2021-06-19 12:58:25 | INFO | train_inner | epoch 002: 2859 / 3002 loss=2.578, ppl=5.97, wps=5889.7, ups=0.09, wpb=64878, bsz=128, num_updates=5823, lr=9.99614e-05, gnorm=2.17, loss_scale=4, train_wall=11, gb_free=2.8, wall=65979 2021-06-19 12:58:35 | INFO | train_inner | epoch 002: 2860 / 3002 loss=2.517, ppl=5.72, wps=5933.1, ups=0.09, wpb=64833, bsz=128, num_updates=5824, lr=9.99614e-05, gnorm=2.169, loss_scale=4, train_wall=10, gb_free=2.8, wall=65990 2021-06-19 12:58:47 | INFO | train_inner | epoch 002: 2861 / 3002 loss=2.646, ppl=6.26, wps=5750.5, ups=0.09, wpb=64696, bsz=128, num_updates=5825, lr=9.99614e-05, gnorm=2.465, loss_scale=4, train_wall=11, gb_free=2.8, wall=66001 2021-06-19 12:58:58 | INFO | train_inner | epoch 002: 2862 / 3002 loss=2.815, ppl=7.04, wps=5810, ups=0.09, wpb=64871, bsz=128, num_updates=5826, lr=9.99614e-05, gnorm=2.787, loss_scale=4, train_wall=11, gb_free=2.8, wall=66012 2021-06-19 12:59:09 | INFO | train_inner | epoch 002: 2863 / 3002 loss=2.626, ppl=6.17, wps=5765.5, ups=0.09, wpb=64727, bsz=128, num_updates=5827, lr=9.99614e-05, gnorm=2.11, loss_scale=4, train_wall=11, gb_free=2.8, wall=66023 2021-06-19 12:59:20 | INFO | train_inner | epoch 002: 2864 / 3002 loss=2.619, ppl=6.14, wps=5865.3, ups=0.09, wpb=64866, bsz=128, num_updates=5828, lr=9.99614e-05, gnorm=2.111, loss_scale=4, train_wall=11, gb_free=2.8, wall=66035 2021-06-19 12:59:31 | INFO | train_inner | epoch 002: 2865 / 3002 loss=2.755, ppl=6.75, wps=5792.4, ups=0.09, wpb=64815, bsz=128, num_updates=5829, lr=9.99614e-05, gnorm=3.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=66046 2021-06-19 12:59:43 | INFO | train_inner | epoch 002: 2866 / 3002 loss=2.527, ppl=5.77, wps=5748.8, ups=0.09, wpb=64877, bsz=128, num_updates=5830, lr=9.99614e-05, gnorm=2.261, loss_scale=4, train_wall=11, gb_free=2.8, wall=66057 2021-06-19 12:59:54 | INFO | train_inner | epoch 002: 2867 / 3002 loss=2.729, ppl=6.63, wps=5890.2, ups=0.09, wpb=64874, bsz=128, num_updates=5831, lr=9.99613e-05, gnorm=2.503, loss_scale=4, train_wall=11, gb_free=2.8, wall=66068 2021-06-19 13:00:05 | INFO | train_inner | epoch 002: 2868 / 3002 loss=2.723, ppl=6.6, wps=5764.8, ups=0.09, wpb=64817, bsz=128, num_updates=5832, lr=9.99613e-05, gnorm=2.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=66079 2021-06-19 13:00:16 | INFO | train_inner | epoch 002: 2869 / 3002 loss=2.807, ppl=7, wps=5863.3, ups=0.09, wpb=64824, bsz=128, num_updates=5833, lr=9.99613e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=66090 2021-06-19 13:00:27 | INFO | train_inner | epoch 002: 2870 / 3002 loss=2.675, ppl=6.38, wps=5827.7, ups=0.09, wpb=64873, bsz=128, num_updates=5834, lr=9.99613e-05, gnorm=2.293, loss_scale=4, train_wall=11, gb_free=2.8, wall=66101 2021-06-19 13:00:38 | INFO | train_inner | epoch 002: 2871 / 3002 loss=2.721, ppl=6.59, wps=5815.9, ups=0.09, wpb=64813, bsz=128, num_updates=5835, lr=9.99613e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=66113 2021-06-19 13:00:49 | INFO | train_inner | epoch 002: 2872 / 3002 loss=2.993, ppl=7.96, wps=5830.3, ups=0.09, wpb=64734, bsz=128, num_updates=5836, lr=9.99613e-05, gnorm=2.401, loss_scale=4, train_wall=11, gb_free=2.8, wall=66124 2021-06-19 13:01:00 | INFO | train_inner | epoch 002: 2873 / 3002 loss=2.678, ppl=6.4, wps=5872.6, ups=0.09, wpb=64844, bsz=128, num_updates=5837, lr=9.99613e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=66135 2021-06-19 13:01:12 | INFO | train_inner | epoch 002: 2874 / 3002 loss=2.769, ppl=6.82, wps=5833.4, ups=0.09, wpb=64819, bsz=128, num_updates=5838, lr=9.99613e-05, gnorm=2.146, loss_scale=4, train_wall=11, gb_free=2.8, wall=66146 2021-06-19 13:01:23 | INFO | train_inner | epoch 002: 2875 / 3002 loss=2.684, ppl=6.43, wps=5801.3, ups=0.09, wpb=64860, bsz=128, num_updates=5839, lr=9.99613e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=66157 2021-06-19 13:01:34 | INFO | train_inner | epoch 002: 2876 / 3002 loss=2.851, ppl=7.21, wps=5765.4, ups=0.09, wpb=64764, bsz=128, num_updates=5840, lr=9.99613e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=66168 2021-06-19 13:01:45 | INFO | train_inner | epoch 002: 2877 / 3002 loss=2.799, ppl=6.96, wps=5853.9, ups=0.09, wpb=64828, bsz=128, num_updates=5841, lr=9.99613e-05, gnorm=2.218, loss_scale=4, train_wall=11, gb_free=2.8, wall=66179 2021-06-19 13:01:56 | INFO | train_inner | epoch 002: 2878 / 3002 loss=2.877, ppl=7.35, wps=5720, ups=0.09, wpb=64863, bsz=128, num_updates=5842, lr=9.99613e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=66191 2021-06-19 13:02:07 | INFO | train_inner | epoch 002: 2879 / 3002 loss=2.707, ppl=6.53, wps=5834.8, ups=0.09, wpb=64776, bsz=128, num_updates=5843, lr=9.99613e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=66202 2021-06-19 13:02:18 | INFO | train_inner | epoch 002: 2880 / 3002 loss=2.622, ppl=6.16, wps=5900.2, ups=0.09, wpb=64862, bsz=128, num_updates=5844, lr=9.99612e-05, gnorm=4.497, loss_scale=4, train_wall=10, gb_free=2.8, wall=66213 2021-06-19 13:02:30 | INFO | train_inner | epoch 002: 2881 / 3002 loss=2.812, ppl=7.02, wps=5740.3, ups=0.09, wpb=64797, bsz=128, num_updates=5845, lr=9.99612e-05, gnorm=5.071, loss_scale=4, train_wall=11, gb_free=2.8, wall=66224 2021-06-19 13:02:41 | INFO | train_inner | epoch 002: 2882 / 3002 loss=2.648, ppl=6.27, wps=5782.9, ups=0.09, wpb=64760, bsz=128, num_updates=5846, lr=9.99612e-05, gnorm=2.152, loss_scale=4, train_wall=11, gb_free=2.8, wall=66235 2021-06-19 13:02:52 | INFO | train_inner | epoch 002: 2883 / 3002 loss=2.61, ppl=6.11, wps=5770.5, ups=0.09, wpb=64851, bsz=128, num_updates=5847, lr=9.99612e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=66246 2021-06-19 13:03:03 | INFO | train_inner | epoch 002: 2884 / 3002 loss=2.798, ppl=6.96, wps=6006.6, ups=0.09, wpb=64866, bsz=128, num_updates=5848, lr=9.99612e-05, gnorm=2.296, loss_scale=4, train_wall=10, gb_free=2.8, wall=66257 2021-06-19 13:03:14 | INFO | train_inner | epoch 002: 2885 / 3002 loss=2.577, ppl=5.97, wps=5841.4, ups=0.09, wpb=64825, bsz=128, num_updates=5849, lr=9.99612e-05, gnorm=2.173, loss_scale=4, train_wall=11, gb_free=2.8, wall=66268 2021-06-19 13:03:25 | INFO | train_inner | epoch 002: 2886 / 3002 loss=2.649, ppl=6.27, wps=5848.2, ups=0.09, wpb=64877, bsz=128, num_updates=5850, lr=9.99612e-05, gnorm=2.194, loss_scale=4, train_wall=11, gb_free=2.8, wall=66279 2021-06-19 13:03:36 | INFO | train_inner | epoch 002: 2887 / 3002 loss=2.635, ppl=6.21, wps=5736.3, ups=0.09, wpb=64788, bsz=128, num_updates=5851, lr=9.99612e-05, gnorm=6.702, loss_scale=4, train_wall=11, gb_free=2.8, wall=66291 2021-06-19 13:03:48 | INFO | train_inner | epoch 002: 2888 / 3002 loss=2.762, ppl=6.78, wps=5841.9, ups=0.09, wpb=64841, bsz=128, num_updates=5852, lr=9.99612e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=66302 2021-06-19 13:03:58 | INFO | train_inner | epoch 002: 2889 / 3002 loss=2.837, ppl=7.14, wps=5925.5, ups=0.09, wpb=64863, bsz=128, num_updates=5853, lr=9.99612e-05, gnorm=5.563, loss_scale=4, train_wall=10, gb_free=2.8, wall=66313 2021-06-19 13:04:10 | INFO | train_inner | epoch 002: 2890 / 3002 loss=2.722, ppl=6.6, wps=5804, ups=0.09, wpb=64833, bsz=128, num_updates=5854, lr=9.99612e-05, gnorm=2.302, loss_scale=4, train_wall=11, gb_free=2.8, wall=66324 2021-06-19 13:04:21 | INFO | train_inner | epoch 002: 2891 / 3002 loss=2.672, ppl=6.37, wps=5837.5, ups=0.09, wpb=64834, bsz=128, num_updates=5855, lr=9.99612e-05, gnorm=2.17, loss_scale=4, train_wall=11, gb_free=2.8, wall=66335 2021-06-19 13:04:32 | INFO | train_inner | epoch 002: 2892 / 3002 loss=2.775, ppl=6.84, wps=5838.8, ups=0.09, wpb=64893, bsz=128, num_updates=5856, lr=9.99611e-05, gnorm=2.155, loss_scale=4, train_wall=11, gb_free=2.8, wall=66346 2021-06-19 13:04:43 | INFO | train_inner | epoch 002: 2893 / 3002 loss=2.562, ppl=5.91, wps=6004.6, ups=0.09, wpb=64811, bsz=128, num_updates=5857, lr=9.99611e-05, gnorm=2.14, loss_scale=4, train_wall=10, gb_free=2.8, wall=66357 2021-06-19 13:04:54 | INFO | train_inner | epoch 002: 2894 / 3002 loss=2.801, ppl=6.97, wps=5890.8, ups=0.09, wpb=64757, bsz=128, num_updates=5858, lr=9.99611e-05, gnorm=2.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=66368 2021-06-19 13:05:05 | INFO | train_inner | epoch 002: 2895 / 3002 loss=2.666, ppl=6.35, wps=5685.8, ups=0.09, wpb=64825, bsz=128, num_updates=5859, lr=9.99611e-05, gnorm=2.217, loss_scale=4, train_wall=11, gb_free=2.8, wall=66379 2021-06-19 13:05:16 | INFO | train_inner | epoch 002: 2896 / 3002 loss=2.882, ppl=7.37, wps=5927.9, ups=0.09, wpb=64839, bsz=128, num_updates=5860, lr=9.99611e-05, gnorm=2.249, loss_scale=4, train_wall=10, gb_free=2.8, wall=66390 2021-06-19 13:05:27 | INFO | train_inner | epoch 002: 2897 / 3002 loss=2.792, ppl=6.92, wps=5858.2, ups=0.09, wpb=64906, bsz=128, num_updates=5861, lr=9.99611e-05, gnorm=2.205, loss_scale=4, train_wall=11, gb_free=2.8, wall=66401 2021-06-19 13:05:38 | INFO | train_inner | epoch 002: 2898 / 3002 loss=2.665, ppl=6.34, wps=5874.7, ups=0.09, wpb=64855, bsz=128, num_updates=5862, lr=9.99611e-05, gnorm=6.676, loss_scale=4, train_wall=11, gb_free=2.8, wall=66412 2021-06-19 13:05:49 | INFO | train_inner | epoch 002: 2899 / 3002 loss=2.611, ppl=6.11, wps=5887.6, ups=0.09, wpb=64807, bsz=128, num_updates=5863, lr=9.99611e-05, gnorm=2.246, loss_scale=4, train_wall=11, gb_free=2.8, wall=66423 2021-06-19 13:06:00 | INFO | train_inner | epoch 002: 2900 / 3002 loss=2.794, ppl=6.93, wps=5776.5, ups=0.09, wpb=64835, bsz=128, num_updates=5864, lr=9.99611e-05, gnorm=2.687, loss_scale=4, train_wall=11, gb_free=2.8, wall=66435 2021-06-19 13:06:11 | INFO | train_inner | epoch 002: 2901 / 3002 loss=2.812, ppl=7.02, wps=5890.1, ups=0.09, wpb=64776, bsz=128, num_updates=5865, lr=9.99611e-05, gnorm=2.34, loss_scale=4, train_wall=11, gb_free=2.8, wall=66446 2021-06-19 13:06:22 | INFO | train_inner | epoch 002: 2902 / 3002 loss=2.75, ppl=6.73, wps=5940.2, ups=0.09, wpb=64930, bsz=128, num_updates=5866, lr=9.99611e-05, gnorm=2.283, loss_scale=4, train_wall=10, gb_free=2.8, wall=66457 2021-06-19 13:06:33 | INFO | train_inner | epoch 002: 2903 / 3002 loss=2.714, ppl=6.56, wps=5882.9, ups=0.09, wpb=64784, bsz=128, num_updates=5867, lr=9.99611e-05, gnorm=2.388, loss_scale=4, train_wall=11, gb_free=2.8, wall=66468 2021-06-19 13:06:44 | INFO | train_inner | epoch 002: 2904 / 3002 loss=2.647, ppl=6.26, wps=5919.7, ups=0.09, wpb=64769, bsz=128, num_updates=5868, lr=9.99611e-05, gnorm=2.305, loss_scale=4, train_wall=11, gb_free=2.8, wall=66479 2021-06-19 13:06:55 | INFO | train_inner | epoch 002: 2905 / 3002 loss=2.638, ppl=6.23, wps=5817.9, ups=0.09, wpb=64801, bsz=128, num_updates=5869, lr=9.9961e-05, gnorm=2.406, loss_scale=4, train_wall=11, gb_free=2.8, wall=66490 2021-06-19 13:07:06 | INFO | train_inner | epoch 002: 2906 / 3002 loss=2.738, ppl=6.67, wps=5921.8, ups=0.09, wpb=64834, bsz=128, num_updates=5870, lr=9.9961e-05, gnorm=2.424, loss_scale=4, train_wall=11, gb_free=2.8, wall=66501 2021-06-19 13:07:17 | INFO | train_inner | epoch 002: 2907 / 3002 loss=2.692, ppl=6.46, wps=5850.8, ups=0.09, wpb=64825, bsz=128, num_updates=5871, lr=9.9961e-05, gnorm=2.319, loss_scale=4, train_wall=11, gb_free=2.8, wall=66512 2021-06-19 13:07:29 | INFO | train_inner | epoch 002: 2908 / 3002 loss=2.567, ppl=5.92, wps=5829.6, ups=0.09, wpb=64807, bsz=128, num_updates=5872, lr=9.9961e-05, gnorm=2.258, loss_scale=4, train_wall=11, gb_free=2.8, wall=66523 2021-06-19 13:07:40 | INFO | train_inner | epoch 002: 2909 / 3002 loss=2.616, ppl=6.13, wps=5802.4, ups=0.09, wpb=64832, bsz=128, num_updates=5873, lr=9.9961e-05, gnorm=2.282, loss_scale=4, train_wall=11, gb_free=2.8, wall=66534 2021-06-19 13:07:51 | INFO | train_inner | epoch 002: 2910 / 3002 loss=2.727, ppl=6.62, wps=5864.2, ups=0.09, wpb=64872, bsz=128, num_updates=5874, lr=9.9961e-05, gnorm=2.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=66545 2021-06-19 13:08:02 | INFO | train_inner | epoch 002: 2911 / 3002 loss=2.538, ppl=5.81, wps=5909, ups=0.09, wpb=64825, bsz=128, num_updates=5875, lr=9.9961e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=66556 2021-06-19 13:08:13 | INFO | train_inner | epoch 002: 2912 / 3002 loss=2.759, ppl=6.77, wps=5829.3, ups=0.09, wpb=64798, bsz=128, num_updates=5876, lr=9.9961e-05, gnorm=2.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=66567 2021-06-19 13:08:24 | INFO | train_inner | epoch 002: 2913 / 3002 loss=2.687, ppl=6.44, wps=5873.2, ups=0.09, wpb=64892, bsz=128, num_updates=5877, lr=9.9961e-05, gnorm=2.286, loss_scale=4, train_wall=11, gb_free=2.8, wall=66578 2021-06-19 13:08:35 | INFO | train_inner | epoch 002: 2914 / 3002 loss=2.693, ppl=6.47, wps=5806.1, ups=0.09, wpb=64772, bsz=128, num_updates=5878, lr=9.9961e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=66589 2021-06-19 13:08:46 | INFO | train_inner | epoch 002: 2915 / 3002 loss=2.734, ppl=6.65, wps=5856.5, ups=0.09, wpb=64851, bsz=128, num_updates=5879, lr=9.9961e-05, gnorm=3.304, loss_scale=4, train_wall=11, gb_free=2.8, wall=66600 2021-06-19 13:08:57 | INFO | train_inner | epoch 002: 2916 / 3002 loss=2.696, ppl=6.48, wps=5887.9, ups=0.09, wpb=64879, bsz=128, num_updates=5880, lr=9.9961e-05, gnorm=2.198, loss_scale=4, train_wall=11, gb_free=2.8, wall=66611 2021-06-19 13:09:08 | INFO | train_inner | epoch 002: 2917 / 3002 loss=2.685, ppl=6.43, wps=5835.6, ups=0.09, wpb=64843, bsz=128, num_updates=5881, lr=9.99609e-05, gnorm=2.217, loss_scale=4, train_wall=11, gb_free=2.8, wall=66623 2021-06-19 13:09:19 | INFO | train_inner | epoch 002: 2918 / 3002 loss=2.8, ppl=6.97, wps=5950.7, ups=0.09, wpb=64875, bsz=128, num_updates=5882, lr=9.99609e-05, gnorm=2.549, loss_scale=4, train_wall=10, gb_free=2.8, wall=66634 2021-06-19 13:09:30 | INFO | train_inner | epoch 002: 2919 / 3002 loss=2.716, ppl=6.57, wps=5825, ups=0.09, wpb=64829, bsz=128, num_updates=5883, lr=9.99609e-05, gnorm=2.202, loss_scale=4, train_wall=11, gb_free=2.8, wall=66645 2021-06-19 13:09:41 | INFO | train_inner | epoch 002: 2920 / 3002 loss=2.631, ppl=6.19, wps=5977.4, ups=0.09, wpb=64908, bsz=128, num_updates=5884, lr=9.99609e-05, gnorm=8.935, loss_scale=4, train_wall=10, gb_free=2.8, wall=66655 2021-06-19 13:09:52 | INFO | train_inner | epoch 002: 2921 / 3002 loss=2.562, ppl=5.9, wps=5844.7, ups=0.09, wpb=64780, bsz=128, num_updates=5885, lr=9.99609e-05, gnorm=2.104, loss_scale=4, train_wall=11, gb_free=2.8, wall=66667 2021-06-19 13:10:03 | INFO | train_inner | epoch 002: 2922 / 3002 loss=2.658, ppl=6.31, wps=5925.4, ups=0.09, wpb=64774, bsz=128, num_updates=5886, lr=9.99609e-05, gnorm=2.715, loss_scale=4, train_wall=10, gb_free=2.8, wall=66678 2021-06-19 13:10:14 | INFO | train_inner | epoch 002: 2923 / 3002 loss=2.697, ppl=6.48, wps=5797.5, ups=0.09, wpb=64834, bsz=128, num_updates=5887, lr=9.99609e-05, gnorm=2.233, loss_scale=4, train_wall=11, gb_free=2.8, wall=66689 2021-06-19 13:10:25 | INFO | train_inner | epoch 002: 2924 / 3002 loss=2.772, ppl=6.83, wps=5831.7, ups=0.09, wpb=64834, bsz=128, num_updates=5888, lr=9.99609e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=66700 2021-06-19 13:10:37 | INFO | train_inner | epoch 002: 2925 / 3002 loss=2.636, ppl=6.21, wps=5835.4, ups=0.09, wpb=64779, bsz=128, num_updates=5889, lr=9.99609e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=66711 2021-06-19 13:10:48 | INFO | train_inner | epoch 002: 2926 / 3002 loss=2.772, ppl=6.83, wps=5853.1, ups=0.09, wpb=64827, bsz=128, num_updates=5890, lr=9.99609e-05, gnorm=2.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=66722 2021-06-19 13:10:59 | INFO | train_inner | epoch 002: 2927 / 3002 loss=2.686, ppl=6.44, wps=5769.2, ups=0.09, wpb=64796, bsz=128, num_updates=5891, lr=9.99609e-05, gnorm=2.2, loss_scale=4, train_wall=11, gb_free=2.8, wall=66733 2021-06-19 13:11:10 | INFO | train_inner | epoch 002: 2928 / 3002 loss=2.735, ppl=6.66, wps=5779.1, ups=0.09, wpb=64884, bsz=128, num_updates=5892, lr=9.99609e-05, gnorm=2.194, loss_scale=4, train_wall=11, gb_free=2.8, wall=66744 2021-06-19 13:11:21 | INFO | train_inner | epoch 002: 2929 / 3002 loss=2.686, ppl=6.43, wps=5898.6, ups=0.09, wpb=64876, bsz=128, num_updates=5893, lr=9.99609e-05, gnorm=2.175, loss_scale=4, train_wall=11, gb_free=2.8, wall=66755 2021-06-19 13:11:32 | INFO | train_inner | epoch 002: 2930 / 3002 loss=2.609, ppl=6.1, wps=5925, ups=0.09, wpb=64926, bsz=128, num_updates=5894, lr=9.99608e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=66766 2021-06-19 13:11:43 | INFO | train_inner | epoch 002: 2931 / 3002 loss=2.826, ppl=7.09, wps=5840.4, ups=0.09, wpb=64859, bsz=128, num_updates=5895, lr=9.99608e-05, gnorm=2.192, loss_scale=4, train_wall=11, gb_free=2.8, wall=66778 2021-06-19 13:11:54 | INFO | train_inner | epoch 002: 2932 / 3002 loss=2.729, ppl=6.63, wps=5856.8, ups=0.09, wpb=64858, bsz=128, num_updates=5896, lr=9.99608e-05, gnorm=2.508, loss_scale=4, train_wall=11, gb_free=2.8, wall=66789 2021-06-19 13:12:05 | INFO | train_inner | epoch 002: 2933 / 3002 loss=2.799, ppl=6.96, wps=5944.6, ups=0.09, wpb=64945, bsz=128, num_updates=5897, lr=9.99608e-05, gnorm=2.194, loss_scale=4, train_wall=10, gb_free=2.8, wall=66800 2021-06-19 13:12:16 | INFO | train_inner | epoch 002: 2934 / 3002 loss=2.729, ppl=6.63, wps=5819.3, ups=0.09, wpb=64872, bsz=128, num_updates=5898, lr=9.99608e-05, gnorm=2.18, loss_scale=4, train_wall=11, gb_free=2.8, wall=66811 2021-06-19 13:12:27 | INFO | train_inner | epoch 002: 2935 / 3002 loss=2.67, ppl=6.36, wps=5827.2, ups=0.09, wpb=64858, bsz=128, num_updates=5899, lr=9.99608e-05, gnorm=2.264, loss_scale=4, train_wall=11, gb_free=2.8, wall=66822 2021-06-19 13:12:38 | INFO | train_inner | epoch 002: 2936 / 3002 loss=2.64, ppl=6.23, wps=5947.8, ups=0.09, wpb=64811, bsz=128, num_updates=5900, lr=9.99608e-05, gnorm=2.249, loss_scale=4, train_wall=10, gb_free=2.8, wall=66833 2021-06-19 13:12:49 | INFO | train_inner | epoch 002: 2937 / 3002 loss=2.804, ppl=6.98, wps=5971.9, ups=0.09, wpb=64832, bsz=128, num_updates=5901, lr=9.99608e-05, gnorm=13.585, loss_scale=4, train_wall=10, gb_free=2.8, wall=66844 2021-06-19 13:13:00 | INFO | train_inner | epoch 002: 2938 / 3002 loss=2.699, ppl=6.49, wps=5840.6, ups=0.09, wpb=64805, bsz=128, num_updates=5902, lr=9.99608e-05, gnorm=2.17, loss_scale=4, train_wall=11, gb_free=2.8, wall=66855 2021-06-19 13:13:11 | INFO | train_inner | epoch 002: 2939 / 3002 loss=2.681, ppl=6.41, wps=5834.2, ups=0.09, wpb=64774, bsz=128, num_updates=5903, lr=9.99608e-05, gnorm=2.164, loss_scale=4, train_wall=11, gb_free=2.8, wall=66866 2021-06-19 13:13:22 | INFO | train_inner | epoch 002: 2940 / 3002 loss=2.61, ppl=6.1, wps=5948.5, ups=0.09, wpb=64917, bsz=128, num_updates=5904, lr=9.99608e-05, gnorm=2.16, loss_scale=4, train_wall=10, gb_free=2.8, wall=66877 2021-06-19 13:13:33 | INFO | train_inner | epoch 002: 2941 / 3002 loss=2.634, ppl=6.21, wps=5919, ups=0.09, wpb=64909, bsz=128, num_updates=5905, lr=9.99608e-05, gnorm=2.16, loss_scale=4, train_wall=11, gb_free=2.8, wall=66888 2021-06-19 13:13:44 | INFO | train_inner | epoch 002: 2942 / 3002 loss=2.514, ppl=5.71, wps=5869, ups=0.09, wpb=64918, bsz=128, num_updates=5906, lr=9.99607e-05, gnorm=2.227, loss_scale=4, train_wall=11, gb_free=2.8, wall=66899 2021-06-19 13:13:55 | INFO | train_inner | epoch 002: 2943 / 3002 loss=2.609, ppl=6.1, wps=5832.8, ups=0.09, wpb=64828, bsz=128, num_updates=5907, lr=9.99607e-05, gnorm=3.703, loss_scale=4, train_wall=11, gb_free=2.8, wall=66910 2021-06-19 13:14:06 | INFO | train_inner | epoch 002: 2944 / 3002 loss=2.673, ppl=6.38, wps=5875.2, ups=0.09, wpb=64823, bsz=128, num_updates=5908, lr=9.99607e-05, gnorm=2.215, loss_scale=4, train_wall=11, gb_free=2.8, wall=66921 2021-06-19 13:14:18 | INFO | train_inner | epoch 002: 2945 / 3002 loss=2.821, ppl=7.07, wps=5864.7, ups=0.09, wpb=64842, bsz=128, num_updates=5909, lr=9.99607e-05, gnorm=3.601, loss_scale=4, train_wall=11, gb_free=2.8, wall=66932 2021-06-19 13:14:29 | INFO | train_inner | epoch 002: 2946 / 3002 loss=2.64, ppl=6.23, wps=5741.9, ups=0.09, wpb=64757, bsz=128, num_updates=5910, lr=9.99607e-05, gnorm=2.734, loss_scale=4, train_wall=11, gb_free=2.8, wall=66943 2021-06-19 13:14:40 | INFO | train_inner | epoch 002: 2947 / 3002 loss=2.603, ppl=6.07, wps=5902, ups=0.09, wpb=64882, bsz=128, num_updates=5911, lr=9.99607e-05, gnorm=2.174, loss_scale=4, train_wall=11, gb_free=2.8, wall=66954 2021-06-19 13:14:51 | INFO | train_inner | epoch 002: 2948 / 3002 loss=2.866, ppl=7.29, wps=5927.2, ups=0.09, wpb=64817, bsz=128, num_updates=5912, lr=9.99607e-05, gnorm=2.35, loss_scale=4, train_wall=10, gb_free=2.8, wall=66965 2021-06-19 13:15:02 | INFO | train_inner | epoch 002: 2949 / 3002 loss=2.928, ppl=7.61, wps=5869.2, ups=0.09, wpb=64837, bsz=128, num_updates=5913, lr=9.99607e-05, gnorm=2.098, loss_scale=4, train_wall=11, gb_free=2.8, wall=66976 2021-06-19 13:15:13 | INFO | train_inner | epoch 002: 2950 / 3002 loss=2.755, ppl=6.75, wps=5711.2, ups=0.09, wpb=64769, bsz=128, num_updates=5914, lr=9.99607e-05, gnorm=2.146, loss_scale=4, train_wall=11, gb_free=2.8, wall=66987 2021-06-19 13:15:24 | INFO | train_inner | epoch 002: 2951 / 3002 loss=2.858, ppl=7.25, wps=5857.8, ups=0.09, wpb=64811, bsz=128, num_updates=5915, lr=9.99607e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=66999 2021-06-19 13:15:36 | INFO | train_inner | epoch 002: 2952 / 3002 loss=2.507, ppl=5.69, wps=5725.2, ups=0.09, wpb=64798, bsz=128, num_updates=5916, lr=9.99607e-05, gnorm=2.127, loss_scale=4, train_wall=11, gb_free=2.8, wall=67010 2021-06-19 13:15:47 | INFO | train_inner | epoch 002: 2953 / 3002 loss=2.783, ppl=6.88, wps=5836.7, ups=0.09, wpb=64862, bsz=128, num_updates=5917, lr=9.99607e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=67021 2021-06-19 13:15:58 | INFO | train_inner | epoch 002: 2954 / 3002 loss=2.716, ppl=6.57, wps=5835.4, ups=0.09, wpb=64759, bsz=128, num_updates=5918, lr=9.99607e-05, gnorm=2.203, loss_scale=4, train_wall=11, gb_free=2.8, wall=67032 2021-06-19 13:16:09 | INFO | train_inner | epoch 002: 2955 / 3002 loss=2.654, ppl=6.3, wps=5790, ups=0.09, wpb=64846, bsz=128, num_updates=5919, lr=9.99606e-05, gnorm=2.353, loss_scale=4, train_wall=11, gb_free=2.8, wall=67043 2021-06-19 13:16:20 | INFO | train_inner | epoch 002: 2956 / 3002 loss=2.693, ppl=6.47, wps=5894.8, ups=0.09, wpb=64845, bsz=128, num_updates=5920, lr=9.99606e-05, gnorm=2.166, loss_scale=4, train_wall=11, gb_free=2.8, wall=67054 2021-06-19 13:16:31 | INFO | train_inner | epoch 002: 2957 / 3002 loss=2.784, ppl=6.89, wps=5814.9, ups=0.09, wpb=64900, bsz=128, num_updates=5921, lr=9.99606e-05, gnorm=3.358, loss_scale=4, train_wall=11, gb_free=2.8, wall=67065 2021-06-19 13:16:42 | INFO | train_inner | epoch 002: 2958 / 3002 loss=2.583, ppl=5.99, wps=5724.8, ups=0.09, wpb=64829, bsz=128, num_updates=5922, lr=9.99606e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=67077 2021-06-19 13:16:53 | INFO | train_inner | epoch 002: 2959 / 3002 loss=2.675, ppl=6.38, wps=5927.8, ups=0.09, wpb=64932, bsz=128, num_updates=5923, lr=9.99606e-05, gnorm=2.31, loss_scale=4, train_wall=10, gb_free=2.8, wall=67088 2021-06-19 13:17:04 | INFO | train_inner | epoch 002: 2960 / 3002 loss=2.61, ppl=6.11, wps=5892.2, ups=0.09, wpb=64767, bsz=128, num_updates=5924, lr=9.99606e-05, gnorm=2.216, loss_scale=4, train_wall=11, gb_free=2.8, wall=67099 2021-06-19 13:17:15 | INFO | train_inner | epoch 002: 2961 / 3002 loss=2.692, ppl=6.46, wps=5980.2, ups=0.09, wpb=64920, bsz=128, num_updates=5925, lr=9.99606e-05, gnorm=2.174, loss_scale=4, train_wall=10, gb_free=2.8, wall=67110 2021-06-19 13:17:26 | INFO | train_inner | epoch 002: 2962 / 3002 loss=2.683, ppl=6.42, wps=5823.2, ups=0.09, wpb=64887, bsz=128, num_updates=5926, lr=9.99606e-05, gnorm=2.212, loss_scale=4, train_wall=11, gb_free=2.8, wall=67121 2021-06-19 13:17:37 | INFO | train_inner | epoch 002: 2963 / 3002 loss=2.776, ppl=6.85, wps=5952.7, ups=0.09, wpb=64866, bsz=128, num_updates=5927, lr=9.99606e-05, gnorm=2.161, loss_scale=4, train_wall=10, gb_free=2.8, wall=67132 2021-06-19 13:17:48 | INFO | train_inner | epoch 002: 2964 / 3002 loss=2.878, ppl=7.35, wps=5834.3, ups=0.09, wpb=64801, bsz=128, num_updates=5928, lr=9.99606e-05, gnorm=2.274, loss_scale=4, train_wall=11, gb_free=2.8, wall=67143 2021-06-19 13:17:59 | INFO | train_inner | epoch 002: 2965 / 3002 loss=2.557, ppl=5.88, wps=5902, ups=0.09, wpb=64800, bsz=128, num_updates=5929, lr=9.99606e-05, gnorm=2.25, loss_scale=4, train_wall=11, gb_free=2.8, wall=67154 2021-06-19 13:18:10 | INFO | train_inner | epoch 002: 2966 / 3002 loss=2.705, ppl=6.52, wps=5816.2, ups=0.09, wpb=64763, bsz=128, num_updates=5930, lr=9.99606e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=67165 2021-06-19 13:18:22 | INFO | train_inner | epoch 002: 2967 / 3002 loss=2.655, ppl=6.3, wps=5771.6, ups=0.09, wpb=64812, bsz=128, num_updates=5931, lr=9.99605e-05, gnorm=2.217, loss_scale=4, train_wall=11, gb_free=2.8, wall=67176 2021-06-19 13:18:33 | INFO | train_inner | epoch 002: 2968 / 3002 loss=2.769, ppl=6.82, wps=5807.1, ups=0.09, wpb=64882, bsz=128, num_updates=5932, lr=9.99605e-05, gnorm=2.337, loss_scale=4, train_wall=11, gb_free=2.8, wall=67187 2021-06-19 13:18:44 | INFO | train_inner | epoch 002: 2969 / 3002 loss=2.558, ppl=5.89, wps=5874.2, ups=0.09, wpb=64781, bsz=128, num_updates=5933, lr=9.99605e-05, gnorm=2.346, loss_scale=4, train_wall=11, gb_free=2.8, wall=67198 2021-06-19 13:18:55 | INFO | train_inner | epoch 002: 2970 / 3002 loss=2.586, ppl=6, wps=5880.7, ups=0.09, wpb=64830, bsz=128, num_updates=5934, lr=9.99605e-05, gnorm=2.404, loss_scale=4, train_wall=11, gb_free=2.8, wall=67209 2021-06-19 13:19:06 | INFO | train_inner | epoch 002: 2971 / 3002 loss=2.639, ppl=6.23, wps=5903.4, ups=0.09, wpb=64802, bsz=128, num_updates=5935, lr=9.99605e-05, gnorm=4.808, loss_scale=4, train_wall=11, gb_free=2.8, wall=67220 2021-06-19 13:19:17 | INFO | train_inner | epoch 002: 2972 / 3002 loss=2.909, ppl=7.51, wps=5861.3, ups=0.09, wpb=64817, bsz=128, num_updates=5936, lr=9.99605e-05, gnorm=2.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=67231 2021-06-19 13:19:28 | INFO | train_inner | epoch 002: 2973 / 3002 loss=2.787, ppl=6.9, wps=5809.3, ups=0.09, wpb=64760, bsz=128, num_updates=5937, lr=9.99605e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=67242 2021-06-19 13:19:39 | INFO | train_inner | epoch 002: 2974 / 3002 loss=2.601, ppl=6.07, wps=5892.1, ups=0.09, wpb=64829, bsz=128, num_updates=5938, lr=9.99605e-05, gnorm=2.11, loss_scale=4, train_wall=11, gb_free=2.8, wall=67253 2021-06-19 13:19:50 | INFO | train_inner | epoch 002: 2975 / 3002 loss=2.678, ppl=6.4, wps=5947.2, ups=0.09, wpb=64793, bsz=128, num_updates=5939, lr=9.99605e-05, gnorm=2.887, loss_scale=4, train_wall=10, gb_free=2.8, wall=67264 2021-06-19 13:20:01 | INFO | train_inner | epoch 002: 2976 / 3002 loss=2.765, ppl=6.8, wps=5884.9, ups=0.09, wpb=64780, bsz=128, num_updates=5940, lr=9.99605e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=67275 2021-06-19 13:20:12 | INFO | train_inner | epoch 002: 2977 / 3002 loss=2.724, ppl=6.61, wps=5811.2, ups=0.09, wpb=64799, bsz=128, num_updates=5941, lr=9.99605e-05, gnorm=3.584, loss_scale=4, train_wall=11, gb_free=2.8, wall=67287 2021-06-19 13:20:23 | INFO | train_inner | epoch 002: 2978 / 3002 loss=2.698, ppl=6.49, wps=5899.4, ups=0.09, wpb=64815, bsz=128, num_updates=5942, lr=9.99605e-05, gnorm=2.141, loss_scale=4, train_wall=11, gb_free=2.8, wall=67298 2021-06-19 13:20:34 | INFO | train_inner | epoch 002: 2979 / 3002 loss=2.654, ppl=6.29, wps=5830.3, ups=0.09, wpb=64911, bsz=128, num_updates=5943, lr=9.99605e-05, gnorm=2.456, loss_scale=4, train_wall=11, gb_free=2.8, wall=67309 2021-06-19 13:20:45 | INFO | train_inner | epoch 002: 2980 / 3002 loss=2.659, ppl=6.32, wps=5815.2, ups=0.09, wpb=64850, bsz=128, num_updates=5944, lr=9.99604e-05, gnorm=2.612, loss_scale=4, train_wall=11, gb_free=2.8, wall=67320 2021-06-19 13:20:57 | INFO | train_inner | epoch 002: 2981 / 3002 loss=2.785, ppl=6.89, wps=5835.5, ups=0.09, wpb=64906, bsz=128, num_updates=5945, lr=9.99604e-05, gnorm=2.212, loss_scale=4, train_wall=11, gb_free=2.8, wall=67331 2021-06-19 13:21:08 | INFO | train_inner | epoch 002: 2982 / 3002 loss=2.632, ppl=6.2, wps=5839.7, ups=0.09, wpb=64800, bsz=128, num_updates=5946, lr=9.99604e-05, gnorm=2.213, loss_scale=4, train_wall=11, gb_free=2.8, wall=67342 2021-06-19 13:21:19 | INFO | train_inner | epoch 002: 2983 / 3002 loss=2.7, ppl=6.5, wps=5954.6, ups=0.09, wpb=64870, bsz=128, num_updates=5947, lr=9.99604e-05, gnorm=2.224, loss_scale=4, train_wall=10, gb_free=2.8, wall=67353 2021-06-19 13:21:29 | INFO | train_inner | epoch 002: 2984 / 3002 loss=2.618, ppl=6.14, wps=5995.2, ups=0.09, wpb=64755, bsz=128, num_updates=5948, lr=9.99604e-05, gnorm=2.605, loss_scale=4, train_wall=10, gb_free=2.8, wall=67364 2021-06-19 13:21:41 | INFO | train_inner | epoch 002: 2985 / 3002 loss=2.595, ppl=6.04, wps=5811.5, ups=0.09, wpb=64858, bsz=128, num_updates=5949, lr=9.99604e-05, gnorm=2.16, loss_scale=8, train_wall=11, gb_free=2.8, wall=67375 2021-06-19 13:21:52 | INFO | train_inner | epoch 002: 2986 / 3002 loss=2.61, ppl=6.1, wps=5900.5, ups=0.09, wpb=64790, bsz=128, num_updates=5950, lr=9.99604e-05, gnorm=2.345, loss_scale=8, train_wall=11, gb_free=2.8, wall=67386 2021-06-19 13:22:03 | INFO | train_inner | epoch 002: 2987 / 3002 loss=2.626, ppl=6.17, wps=5855.4, ups=0.09, wpb=64866, bsz=128, num_updates=5951, lr=9.99604e-05, gnorm=2.251, loss_scale=8, train_wall=11, gb_free=2.8, wall=67397 2021-06-19 13:22:14 | INFO | train_inner | epoch 002: 2988 / 3002 loss=2.875, ppl=7.34, wps=5915.7, ups=0.09, wpb=64800, bsz=128, num_updates=5952, lr=9.99604e-05, gnorm=2.187, loss_scale=8, train_wall=11, gb_free=2.8, wall=67408 2021-06-19 13:22:24 | INFO | train_inner | epoch 002: 2989 / 3002 loss=2.735, ppl=6.66, wps=5959.1, ups=0.09, wpb=64887, bsz=128, num_updates=5953, lr=9.99604e-05, gnorm=2.296, loss_scale=8, train_wall=10, gb_free=2.8, wall=67419 2021-06-19 13:22:36 | INFO | train_inner | epoch 002: 2990 / 3002 loss=2.618, ppl=6.14, wps=5811.4, ups=0.09, wpb=64832, bsz=128, num_updates=5954, lr=9.99604e-05, gnorm=2.236, loss_scale=8, train_wall=11, gb_free=2.8, wall=67430 2021-06-19 13:22:47 | INFO | train_inner | epoch 002: 2991 / 3002 loss=2.675, ppl=6.38, wps=5878, ups=0.09, wpb=64820, bsz=128, num_updates=5955, lr=9.99604e-05, gnorm=2.171, loss_scale=8, train_wall=11, gb_free=2.8, wall=67441 2021-06-19 13:22:58 | INFO | train_inner | epoch 002: 2992 / 3002 loss=2.705, ppl=6.52, wps=5952.5, ups=0.09, wpb=64846, bsz=128, num_updates=5956, lr=9.99603e-05, gnorm=2.13, loss_scale=8, train_wall=10, gb_free=2.8, wall=67452 2021-06-19 13:23:09 | INFO | train_inner | epoch 002: 2993 / 3002 loss=2.583, ppl=5.99, wps=5846.8, ups=0.09, wpb=64811, bsz=128, num_updates=5957, lr=9.99603e-05, gnorm=2.182, loss_scale=8, train_wall=11, gb_free=2.8, wall=67463 2021-06-19 13:23:20 | INFO | train_inner | epoch 002: 2994 / 3002 loss=2.709, ppl=6.54, wps=5910.6, ups=0.09, wpb=64823, bsz=128, num_updates=5958, lr=9.99603e-05, gnorm=2.275, loss_scale=8, train_wall=11, gb_free=2.8, wall=67474 2021-06-19 13:23:31 | INFO | train_inner | epoch 002: 2995 / 3002 loss=2.765, ppl=6.8, wps=5883.7, ups=0.09, wpb=64821, bsz=128, num_updates=5959, lr=9.99603e-05, gnorm=2.247, loss_scale=8, train_wall=11, gb_free=2.8, wall=67485 2021-06-19 13:23:41 | INFO | train_inner | epoch 002: 2996 / 3002 loss=2.553, ppl=5.87, wps=6022.7, ups=0.09, wpb=64881, bsz=128, num_updates=5960, lr=9.99603e-05, gnorm=2.224, loss_scale=8, train_wall=10, gb_free=2.8, wall=67496 2021-06-19 13:23:53 | INFO | train_inner | epoch 002: 2997 / 3002 loss=2.708, ppl=6.53, wps=5758.2, ups=0.09, wpb=64752, bsz=128, num_updates=5961, lr=9.99603e-05, gnorm=2.302, loss_scale=8, train_wall=11, gb_free=2.8, wall=67507 2021-06-19 13:24:04 | INFO | train_inner | epoch 002: 2998 / 3002 loss=2.783, ppl=6.88, wps=5826.9, ups=0.09, wpb=64842, bsz=128, num_updates=5962, lr=9.99603e-05, gnorm=2.32, loss_scale=8, train_wall=11, gb_free=2.8, wall=67518 2021-06-19 13:24:15 | INFO | train_inner | epoch 002: 2999 / 3002 loss=2.658, ppl=6.31, wps=5749.1, ups=0.09, wpb=64876, bsz=128, num_updates=5963, lr=9.99603e-05, gnorm=2.241, loss_scale=8, train_wall=11, gb_free=2.8, wall=67529 2021-06-19 13:24:26 | INFO | train_inner | epoch 002: 3000 / 3002 loss=2.571, ppl=5.94, wps=5747.1, ups=0.09, wpb=64805, bsz=128, num_updates=5964, lr=9.99603e-05, gnorm=2.153, loss_scale=8, train_wall=11, gb_free=2.8, wall=67541 2021-06-19 13:24:37 | INFO | train_inner | epoch 002: 3001 / 3002 loss=2.687, ppl=6.44, wps=5954.8, ups=0.09, wpb=64797, bsz=128, num_updates=5965, lr=9.99603e-05, gnorm=2.218, loss_scale=8, train_wall=10, gb_free=2.8, wall=67552 2021-06-19 13:24:43 | INFO | train_inner | epoch 002: 3002 / 3002 loss=2.725, ppl=6.61, wps=5831.9, ups=0.16, wpb=36452, bsz=72, num_updates=5966, lr=9.99603e-05, gnorm=2.896, loss_scale=8, train_wall=6, gb_free=2.8, wall=67558 2021-06-19 13:24:43 | INFO | fairseq_cli.train | begin validation on "valid" subset 2021-06-19 13:39:39 | INFO | valid | epoch 002 | valid on 'valid' subset | loss 2.535 | ppl 5.79 | wps 19710.6 | wpb 506.5 | bsz 1 | num_updates 5966 | best_loss 2.535 2021-06-19 13:39:39 | INFO | fairseq.checkpoint_utils | Preparing to save checkpoint for epoch 2 @ 5966 updates 2021-06-19 13:39:39 | INFO | fairseq.trainer | Saving checkpoint to checkpoints/checkpoint2.pt 2021-06-19 13:39:54 | INFO | fairseq.trainer | Finished saving checkpoint to checkpoints/checkpoint2.pt 2021-06-19 13:46:21 | INFO | fairseq.checkpoint_utils | Saved checkpoint checkpoints/checkpoint2.pt (epoch 2 @ 5966 updates, score 2.535) (writing took 402.46520497099846 seconds) 2021-06-19 13:46:21 | INFO | fairseq_cli.train | end of epoch 2 (average epoch stats below) 2021-06-19 13:46:21 | INFO | train | epoch 002 | loss 2.768 | ppl 6.81 | wps 5602.8 | ups 0.09 | wpb 64819.4 | bsz 128 | num_updates 5966 | lr 9.99603e-05 | gnorm 2.571 | loss_scale 8 | train_wall 31877 | gb_free 2.8 | wall 68856 2021-06-19 13:46:22 | INFO | fairseq.trainer | begin training epoch 3 2021-06-19 13:46:22 | INFO | fairseq_cli.train | Start iterating over samples 2021-06-19 13:46:32 | INFO | train_inner | epoch 003: 1 / 3002 loss=2.854, ppl=7.23, wps=49.6, ups=0, wpb=64861, bsz=128, num_updates=5967, lr=9.99603e-05, gnorm=2.125, loss_scale=8, train_wall=10, gb_free=2.8, wall=68866 2021-06-19 13:46:42 | INFO | train_inner | epoch 003: 2 / 3002 loss=2.708, ppl=6.53, wps=6305.1, ups=0.1, wpb=64843, bsz=128, num_updates=5968, lr=9.99603e-05, gnorm=2.231, loss_scale=8, train_wall=10, gb_free=2.8, wall=68877 2021-06-19 13:46:53 | INFO | train_inner | epoch 003: 3 / 3002 loss=2.592, ppl=6.03, wps=6232.8, ups=0.1, wpb=64801, bsz=128, num_updates=5969, lr=9.99602e-05, gnorm=2.101, loss_scale=8, train_wall=10, gb_free=2.8, wall=68887 2021-06-19 13:47:03 | INFO | train_inner | epoch 003: 4 / 3002 loss=2.667, ppl=6.35, wps=6134.2, ups=0.09, wpb=64800, bsz=128, num_updates=5970, lr=9.99602e-05, gnorm=2.166, loss_scale=8, train_wall=10, gb_free=2.8, wall=68898 2021-06-19 13:47:14 | INFO | train_inner | epoch 003: 5 / 3002 loss=2.685, ppl=6.43, wps=5998.3, ups=0.09, wpb=64840, bsz=128, num_updates=5971, lr=9.99602e-05, gnorm=2.265, loss_scale=8, train_wall=10, gb_free=2.8, wall=68909 2021-06-19 13:47:25 | INFO | train_inner | epoch 003: 6 / 3002 loss=2.803, ppl=6.98, wps=6144.4, ups=0.09, wpb=64837, bsz=128, num_updates=5972, lr=9.99602e-05, gnorm=3.475, loss_scale=8, train_wall=10, gb_free=2.8, wall=68919 2021-06-19 13:47:35 | INFO | train_inner | epoch 003: 7 / 3002 loss=2.606, ppl=6.09, wps=6059.5, ups=0.09, wpb=64829, bsz=128, num_updates=5973, lr=9.99602e-05, gnorm=2.227, loss_scale=8, train_wall=10, gb_free=2.8, wall=68930 2021-06-19 13:47:46 | INFO | train_inner | epoch 003: 8 / 3002 loss=2.633, ppl=6.2, wps=6043.3, ups=0.09, wpb=64784, bsz=128, num_updates=5974, lr=9.99602e-05, gnorm=2.139, loss_scale=8, train_wall=10, gb_free=2.8, wall=68941 2021-06-19 13:47:57 | INFO | train_inner | epoch 003: 9 / 3002 loss=2.654, ppl=6.29, wps=6006.5, ups=0.09, wpb=64807, bsz=128, num_updates=5975, lr=9.99602e-05, gnorm=2.161, loss_scale=8, train_wall=10, gb_free=2.8, wall=68951 2021-06-19 13:48:08 | INFO | train_inner | epoch 003: 10 / 3002 loss=2.823, ppl=7.07, wps=6054.7, ups=0.09, wpb=64825, bsz=128, num_updates=5976, lr=9.99602e-05, gnorm=2.447, loss_scale=8, train_wall=10, gb_free=2.8, wall=68962 2021-06-19 13:48:19 | INFO | train_inner | epoch 003: 11 / 3002 loss=2.632, ppl=6.2, wps=5949.8, ups=0.09, wpb=64836, bsz=128, num_updates=5977, lr=9.99602e-05, gnorm=2.158, loss_scale=8, train_wall=10, gb_free=2.8, wall=68973 2021-06-19 13:48:30 | INFO | train_inner | epoch 003: 12 / 3002 loss=2.636, ppl=6.21, wps=5889.1, ups=0.09, wpb=64880, bsz=128, num_updates=5978, lr=9.99602e-05, gnorm=2.181, loss_scale=8, train_wall=11, gb_free=2.8, wall=68984 2021-06-19 13:48:40 | INFO | train_inner | epoch 003: 13 / 3002 loss=2.704, ppl=6.51, wps=5960.2, ups=0.09, wpb=64840, bsz=128, num_updates=5979, lr=9.99602e-05, gnorm=2.189, loss_scale=8, train_wall=10, gb_free=2.8, wall=68995 2021-06-19 13:48:51 | INFO | train_inner | epoch 003: 14 / 3002 loss=2.649, ppl=6.27, wps=5908, ups=0.09, wpb=64830, bsz=128, num_updates=5980, lr=9.99602e-05, gnorm=2.475, loss_scale=8, train_wall=11, gb_free=2.8, wall=69006 2021-06-19 13:49:02 | INFO | train_inner | epoch 003: 15 / 3002 loss=2.594, ppl=6.04, wps=6005.3, ups=0.09, wpb=64845, bsz=128, num_updates=5981, lr=9.99601e-05, gnorm=3.109, loss_scale=8, train_wall=10, gb_free=2.8, wall=69017 2021-06-19 13:49:13 | INFO | train_inner | epoch 003: 16 / 3002 loss=2.7, ppl=6.5, wps=6027.3, ups=0.09, wpb=64936, bsz=128, num_updates=5982, lr=9.99601e-05, gnorm=2.163, loss_scale=8, train_wall=10, gb_free=2.8, wall=69027 2021-06-19 13:49:24 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-19 13:49:35 | INFO | train_inner | epoch 003: 18 / 3002 loss=2.536, ppl=5.8, wps=2970.6, ups=0.05, wpb=64932, bsz=128, num_updates=5983, lr=9.99601e-05, gnorm=2.118, loss_scale=4, train_wall=21, gb_free=2.8, wall=69049 2021-06-19 13:49:46 | INFO | train_inner | epoch 003: 19 / 3002 loss=2.571, ppl=5.94, wps=5848.3, ups=0.09, wpb=64799, bsz=128, num_updates=5984, lr=9.99601e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=69060 2021-06-19 13:49:57 | INFO | train_inner | epoch 003: 20 / 3002 loss=2.591, ppl=6.03, wps=5862.8, ups=0.09, wpb=64781, bsz=128, num_updates=5985, lr=9.99601e-05, gnorm=2.184, loss_scale=4, train_wall=11, gb_free=2.8, wall=69071 2021-06-19 13:50:08 | INFO | train_inner | epoch 003: 21 / 3002 loss=2.518, ppl=5.73, wps=5944.1, ups=0.09, wpb=64759, bsz=128, num_updates=5986, lr=9.99601e-05, gnorm=2.249, loss_scale=4, train_wall=10, gb_free=2.8, wall=69082 2021-06-19 13:50:19 | INFO | train_inner | epoch 003: 22 / 3002 loss=2.712, ppl=6.55, wps=5996.9, ups=0.09, wpb=64846, bsz=128, num_updates=5987, lr=9.99601e-05, gnorm=2.152, loss_scale=4, train_wall=10, gb_free=2.8, wall=69093 2021-06-19 13:50:30 | INFO | train_inner | epoch 003: 23 / 3002 loss=2.71, ppl=6.54, wps=5872.7, ups=0.09, wpb=64855, bsz=128, num_updates=5988, lr=9.99601e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=69104 2021-06-19 13:50:41 | INFO | train_inner | epoch 003: 24 / 3002 loss=2.596, ppl=6.05, wps=5795.8, ups=0.09, wpb=64873, bsz=128, num_updates=5989, lr=9.99601e-05, gnorm=2.183, loss_scale=4, train_wall=11, gb_free=2.8, wall=69115 2021-06-19 13:50:52 | INFO | train_inner | epoch 003: 25 / 3002 loss=2.766, ppl=6.8, wps=5850.8, ups=0.09, wpb=64784, bsz=128, num_updates=5990, lr=9.99601e-05, gnorm=2.229, loss_scale=4, train_wall=11, gb_free=2.8, wall=69126 2021-06-19 13:51:03 | INFO | train_inner | epoch 003: 26 / 3002 loss=2.512, ppl=5.71, wps=5771.9, ups=0.09, wpb=64732, bsz=128, num_updates=5991, lr=9.99601e-05, gnorm=2.137, loss_scale=4, train_wall=11, gb_free=2.8, wall=69138 2021-06-19 13:51:15 | INFO | train_inner | epoch 003: 27 / 3002 loss=2.774, ppl=6.84, wps=5691.6, ups=0.09, wpb=64785, bsz=128, num_updates=5992, lr=9.99601e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=69149 2021-06-19 13:51:26 | INFO | train_inner | epoch 003: 28 / 3002 loss=2.657, ppl=6.31, wps=5743.9, ups=0.09, wpb=64853, bsz=128, num_updates=5993, lr=9.99601e-05, gnorm=2.118, loss_scale=4, train_wall=11, gb_free=2.8, wall=69160 2021-06-19 13:51:37 | INFO | train_inner | epoch 003: 29 / 3002 loss=2.597, ppl=6.05, wps=5798.6, ups=0.09, wpb=64825, bsz=128, num_updates=5994, lr=9.996e-05, gnorm=2.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=69171 2021-06-19 13:51:48 | INFO | train_inner | epoch 003: 30 / 3002 loss=2.609, ppl=6.1, wps=5765.7, ups=0.09, wpb=64824, bsz=128, num_updates=5995, lr=9.996e-05, gnorm=2.988, loss_scale=4, train_wall=11, gb_free=2.8, wall=69183 2021-06-19 13:51:59 | INFO | train_inner | epoch 003: 31 / 3002 loss=2.712, ppl=6.55, wps=5805.4, ups=0.09, wpb=64769, bsz=128, num_updates=5996, lr=9.996e-05, gnorm=2.14, loss_scale=4, train_wall=11, gb_free=2.8, wall=69194 2021-06-19 13:52:11 | INFO | train_inner | epoch 003: 32 / 3002 loss=2.99, ppl=7.95, wps=5880.6, ups=0.09, wpb=64796, bsz=128, num_updates=5997, lr=9.996e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=69205 2021-06-19 13:52:22 | INFO | train_inner | epoch 003: 33 / 3002 loss=2.666, ppl=6.35, wps=5881.3, ups=0.09, wpb=64882, bsz=128, num_updates=5998, lr=9.996e-05, gnorm=2.172, loss_scale=4, train_wall=11, gb_free=2.8, wall=69216 2021-06-19 13:52:33 | INFO | train_inner | epoch 003: 34 / 3002 loss=2.553, ppl=5.87, wps=5886.8, ups=0.09, wpb=64861, bsz=128, num_updates=5999, lr=9.996e-05, gnorm=2.233, loss_scale=4, train_wall=11, gb_free=2.8, wall=69227 2021-06-19 13:52:44 | INFO | train_inner | epoch 003: 35 / 3002 loss=2.541, ppl=5.82, wps=5909.3, ups=0.09, wpb=64908, bsz=128, num_updates=6000, lr=9.996e-05, gnorm=2.228, loss_scale=4, train_wall=11, gb_free=2.8, wall=69238 2021-06-19 13:52:54 | INFO | train_inner | epoch 003: 36 / 3002 loss=2.64, ppl=6.23, wps=5949.6, ups=0.09, wpb=64889, bsz=128, num_updates=6001, lr=9.996e-05, gnorm=2.207, loss_scale=4, train_wall=10, gb_free=2.8, wall=69249 2021-06-19 13:53:06 | INFO | train_inner | epoch 003: 37 / 3002 loss=2.578, ppl=5.97, wps=5865.9, ups=0.09, wpb=64894, bsz=128, num_updates=6002, lr=9.996e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=69260 2021-06-19 13:53:17 | INFO | train_inner | epoch 003: 38 / 3002 loss=2.717, ppl=6.58, wps=5789, ups=0.09, wpb=64859, bsz=128, num_updates=6003, lr=9.996e-05, gnorm=2.129, loss_scale=4, train_wall=11, gb_free=2.8, wall=69271 2021-06-19 13:53:28 | INFO | train_inner | epoch 003: 39 / 3002 loss=2.565, ppl=5.92, wps=5870.5, ups=0.09, wpb=64852, bsz=128, num_updates=6004, lr=9.996e-05, gnorm=2.067, loss_scale=4, train_wall=11, gb_free=2.8, wall=69282 2021-06-19 13:53:39 | INFO | train_inner | epoch 003: 40 / 3002 loss=2.645, ppl=6.26, wps=5869.7, ups=0.09, wpb=64719, bsz=128, num_updates=6005, lr=9.996e-05, gnorm=2.043, loss_scale=4, train_wall=11, gb_free=2.8, wall=69293 2021-06-19 13:53:50 | INFO | train_inner | epoch 003: 41 / 3002 loss=2.672, ppl=6.37, wps=5837.6, ups=0.09, wpb=64864, bsz=128, num_updates=6006, lr=9.99599e-05, gnorm=2.227, loss_scale=4, train_wall=11, gb_free=2.8, wall=69304 2021-06-19 13:54:01 | INFO | train_inner | epoch 003: 42 / 3002 loss=2.733, ppl=6.65, wps=5864.8, ups=0.09, wpb=64761, bsz=128, num_updates=6007, lr=9.99599e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=69315 2021-06-19 13:54:12 | INFO | train_inner | epoch 003: 43 / 3002 loss=2.544, ppl=5.83, wps=5825.8, ups=0.09, wpb=64816, bsz=128, num_updates=6008, lr=9.99599e-05, gnorm=2.069, loss_scale=4, train_wall=11, gb_free=2.8, wall=69326 2021-06-19 13:54:23 | INFO | train_inner | epoch 003: 44 / 3002 loss=2.793, ppl=6.93, wps=5774.4, ups=0.09, wpb=64708, bsz=128, num_updates=6009, lr=9.99599e-05, gnorm=2.113, loss_scale=4, train_wall=11, gb_free=2.8, wall=69338 2021-06-19 13:54:35 | INFO | train_inner | epoch 003: 45 / 3002 loss=2.69, ppl=6.45, wps=5750.4, ups=0.09, wpb=64849, bsz=128, num_updates=6010, lr=9.99599e-05, gnorm=2.177, loss_scale=4, train_wall=11, gb_free=2.8, wall=69349 2021-06-19 13:54:46 | INFO | train_inner | epoch 003: 46 / 3002 loss=2.636, ppl=6.22, wps=5805.5, ups=0.09, wpb=64899, bsz=128, num_updates=6011, lr=9.99599e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=69360 2021-06-19 13:54:57 | INFO | train_inner | epoch 003: 47 / 3002 loss=2.511, ppl=5.7, wps=5810.4, ups=0.09, wpb=64914, bsz=128, num_updates=6012, lr=9.99599e-05, gnorm=2.218, loss_scale=4, train_wall=11, gb_free=2.8, wall=69371 2021-06-19 13:55:08 | INFO | train_inner | epoch 003: 48 / 3002 loss=2.645, ppl=6.25, wps=5846.3, ups=0.09, wpb=64855, bsz=128, num_updates=6013, lr=9.99599e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=69382 2021-06-19 13:55:19 | INFO | train_inner | epoch 003: 49 / 3002 loss=2.672, ppl=6.37, wps=5861.8, ups=0.09, wpb=64789, bsz=128, num_updates=6014, lr=9.99599e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=69393 2021-06-19 13:55:30 | INFO | train_inner | epoch 003: 50 / 3002 loss=2.582, ppl=5.99, wps=5846.4, ups=0.09, wpb=64763, bsz=128, num_updates=6015, lr=9.99599e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=69404 2021-06-19 13:55:41 | INFO | train_inner | epoch 003: 51 / 3002 loss=2.459, ppl=5.5, wps=5940.3, ups=0.09, wpb=64879, bsz=128, num_updates=6016, lr=9.99599e-05, gnorm=2.197, loss_scale=4, train_wall=10, gb_free=2.8, wall=69415 2021-06-19 13:55:52 | INFO | train_inner | epoch 003: 52 / 3002 loss=2.713, ppl=6.56, wps=5848.5, ups=0.09, wpb=64778, bsz=128, num_updates=6017, lr=9.99599e-05, gnorm=2.283, loss_scale=4, train_wall=11, gb_free=2.8, wall=69426 2021-06-19 13:56:03 | INFO | train_inner | epoch 003: 53 / 3002 loss=2.856, ppl=7.24, wps=5833.2, ups=0.09, wpb=64770, bsz=128, num_updates=6018, lr=9.99599e-05, gnorm=2.181, loss_scale=4, train_wall=11, gb_free=2.8, wall=69438 2021-06-19 13:56:14 | INFO | train_inner | epoch 003: 54 / 3002 loss=2.62, ppl=6.15, wps=5891.6, ups=0.09, wpb=64752, bsz=128, num_updates=6019, lr=9.99598e-05, gnorm=2.159, loss_scale=4, train_wall=11, gb_free=2.8, wall=69449 2021-06-19 13:56:25 | INFO | train_inner | epoch 003: 55 / 3002 loss=2.698, ppl=6.49, wps=5768.5, ups=0.09, wpb=64809, bsz=128, num_updates=6020, lr=9.99598e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=69460 2021-06-19 13:56:37 | INFO | train_inner | epoch 003: 56 / 3002 loss=2.64, ppl=6.23, wps=5788.2, ups=0.09, wpb=64861, bsz=128, num_updates=6021, lr=9.99598e-05, gnorm=2.125, loss_scale=4, train_wall=11, gb_free=2.8, wall=69471 2021-06-19 13:56:48 | INFO | train_inner | epoch 003: 57 / 3002 loss=2.602, ppl=6.07, wps=5861.9, ups=0.09, wpb=64847, bsz=128, num_updates=6022, lr=9.99598e-05, gnorm=2.16, loss_scale=4, train_wall=11, gb_free=2.8, wall=69482 2021-06-19 13:56:59 | INFO | train_inner | epoch 003: 58 / 3002 loss=2.673, ppl=6.38, wps=5857, ups=0.09, wpb=64807, bsz=128, num_updates=6023, lr=9.99598e-05, gnorm=2.171, loss_scale=4, train_wall=11, gb_free=2.8, wall=69493 2021-06-19 13:57:10 | INFO | train_inner | epoch 003: 59 / 3002 loss=2.635, ppl=6.21, wps=5898.3, ups=0.09, wpb=64889, bsz=128, num_updates=6024, lr=9.99598e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=69504 2021-06-19 13:57:21 | INFO | train_inner | epoch 003: 60 / 3002 loss=2.802, ppl=6.98, wps=5875, ups=0.09, wpb=64889, bsz=128, num_updates=6025, lr=9.99598e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=69515 2021-06-19 13:57:32 | INFO | train_inner | epoch 003: 61 / 3002 loss=2.546, ppl=5.84, wps=5932.4, ups=0.09, wpb=64845, bsz=128, num_updates=6026, lr=9.99598e-05, gnorm=2.169, loss_scale=4, train_wall=10, gb_free=2.8, wall=69526 2021-06-19 13:57:43 | INFO | train_inner | epoch 003: 62 / 3002 loss=2.517, ppl=5.73, wps=5839.3, ups=0.09, wpb=64796, bsz=128, num_updates=6027, lr=9.99598e-05, gnorm=2.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=69537 2021-06-19 13:57:54 | INFO | train_inner | epoch 003: 63 / 3002 loss=2.702, ppl=6.51, wps=5958.8, ups=0.09, wpb=64856, bsz=128, num_updates=6028, lr=9.99598e-05, gnorm=2.192, loss_scale=4, train_wall=10, gb_free=2.8, wall=69548 2021-06-19 13:58:05 | INFO | train_inner | epoch 003: 64 / 3002 loss=2.653, ppl=6.29, wps=5941.8, ups=0.09, wpb=64831, bsz=128, num_updates=6029, lr=9.99598e-05, gnorm=2.143, loss_scale=4, train_wall=10, gb_free=2.8, wall=69559 2021-06-19 13:58:16 | INFO | train_inner | epoch 003: 65 / 3002 loss=2.793, ppl=6.93, wps=5956.6, ups=0.09, wpb=64906, bsz=128, num_updates=6030, lr=9.99598e-05, gnorm=2.219, loss_scale=4, train_wall=10, gb_free=2.8, wall=69570 2021-06-19 13:58:26 | INFO | train_inner | epoch 003: 66 / 3002 loss=2.663, ppl=6.33, wps=5934.9, ups=0.09, wpb=64862, bsz=128, num_updates=6031, lr=9.99597e-05, gnorm=2.177, loss_scale=4, train_wall=10, gb_free=2.8, wall=69581 2021-06-19 13:58:38 | INFO | train_inner | epoch 003: 67 / 3002 loss=2.537, ppl=5.8, wps=5772.2, ups=0.09, wpb=64796, bsz=128, num_updates=6032, lr=9.99597e-05, gnorm=2.211, loss_scale=4, train_wall=11, gb_free=2.8, wall=69592 2021-06-19 13:58:49 | INFO | train_inner | epoch 003: 68 / 3002 loss=2.574, ppl=5.96, wps=5857.4, ups=0.09, wpb=64912, bsz=128, num_updates=6033, lr=9.99597e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=69603 2021-06-19 13:59:00 | INFO | train_inner | epoch 003: 69 / 3002 loss=2.669, ppl=6.36, wps=5900.3, ups=0.09, wpb=64803, bsz=128, num_updates=6034, lr=9.99597e-05, gnorm=2.122, loss_scale=4, train_wall=11, gb_free=2.8, wall=69614 2021-06-19 13:59:11 | INFO | train_inner | epoch 003: 70 / 3002 loss=2.79, ppl=6.92, wps=5883.1, ups=0.09, wpb=64868, bsz=128, num_updates=6035, lr=9.99597e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=69625 2021-06-19 13:59:22 | INFO | train_inner | epoch 003: 71 / 3002 loss=2.625, ppl=6.17, wps=5931.7, ups=0.09, wpb=64859, bsz=128, num_updates=6036, lr=9.99597e-05, gnorm=2.179, loss_scale=4, train_wall=10, gb_free=2.8, wall=69636 2021-06-19 13:59:33 | INFO | train_inner | epoch 003: 72 / 3002 loss=2.691, ppl=6.46, wps=5849.6, ups=0.09, wpb=64766, bsz=128, num_updates=6037, lr=9.99597e-05, gnorm=4.769, loss_scale=4, train_wall=11, gb_free=2.8, wall=69647 2021-06-19 13:59:44 | INFO | train_inner | epoch 003: 73 / 3002 loss=2.66, ppl=6.32, wps=5811.2, ups=0.09, wpb=64814, bsz=128, num_updates=6038, lr=9.99597e-05, gnorm=2.239, loss_scale=4, train_wall=11, gb_free=2.8, wall=69658 2021-06-19 13:59:55 | INFO | train_inner | epoch 003: 74 / 3002 loss=2.77, ppl=6.82, wps=5849.6, ups=0.09, wpb=64831, bsz=128, num_updates=6039, lr=9.99597e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=69669 2021-06-19 14:00:06 | INFO | train_inner | epoch 003: 75 / 3002 loss=2.699, ppl=6.49, wps=5911, ups=0.09, wpb=64849, bsz=128, num_updates=6040, lr=9.99597e-05, gnorm=2.212, loss_scale=4, train_wall=11, gb_free=2.8, wall=69680 2021-06-19 14:00:17 | INFO | train_inner | epoch 003: 76 / 3002 loss=2.709, ppl=6.54, wps=5879.4, ups=0.09, wpb=64837, bsz=128, num_updates=6041, lr=9.99597e-05, gnorm=2.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=69691 2021-06-19 14:00:28 | INFO | train_inner | epoch 003: 77 / 3002 loss=2.681, ppl=6.41, wps=5897.6, ups=0.09, wpb=64895, bsz=128, num_updates=6042, lr=9.99597e-05, gnorm=2.229, loss_scale=4, train_wall=11, gb_free=2.8, wall=69702 2021-06-19 14:00:39 | INFO | train_inner | epoch 003: 78 / 3002 loss=2.663, ppl=6.34, wps=5903.4, ups=0.09, wpb=64909, bsz=128, num_updates=6043, lr=9.99597e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=69713 2021-06-19 14:00:50 | INFO | train_inner | epoch 003: 79 / 3002 loss=2.665, ppl=6.34, wps=5893.3, ups=0.09, wpb=64769, bsz=128, num_updates=6044, lr=9.99596e-05, gnorm=2.266, loss_scale=4, train_wall=11, gb_free=2.8, wall=69724 2021-06-19 14:01:01 | INFO | train_inner | epoch 003: 80 / 3002 loss=2.603, ppl=6.07, wps=5792.4, ups=0.09, wpb=64829, bsz=128, num_updates=6045, lr=9.99596e-05, gnorm=2.099, loss_scale=4, train_wall=11, gb_free=2.8, wall=69736 2021-06-19 14:01:12 | INFO | train_inner | epoch 003: 81 / 3002 loss=2.706, ppl=6.53, wps=5939.9, ups=0.09, wpb=64881, bsz=128, num_updates=6046, lr=9.99596e-05, gnorm=2.306, loss_scale=4, train_wall=10, gb_free=2.8, wall=69746 2021-06-19 14:01:23 | INFO | train_inner | epoch 003: 82 / 3002 loss=2.77, ppl=6.82, wps=5978.8, ups=0.09, wpb=64841, bsz=128, num_updates=6047, lr=9.99596e-05, gnorm=2.227, loss_scale=4, train_wall=10, gb_free=2.8, wall=69757 2021-06-19 14:01:34 | INFO | train_inner | epoch 003: 83 / 3002 loss=2.7, ppl=6.5, wps=5971.8, ups=0.09, wpb=64915, bsz=128, num_updates=6048, lr=9.99596e-05, gnorm=3.025, loss_scale=4, train_wall=10, gb_free=2.8, wall=69768 2021-06-19 14:01:45 | INFO | train_inner | epoch 003: 84 / 3002 loss=2.629, ppl=6.19, wps=5824, ups=0.09, wpb=64821, bsz=128, num_updates=6049, lr=9.99596e-05, gnorm=4.118, loss_scale=4, train_wall=11, gb_free=2.8, wall=69779 2021-06-19 14:01:56 | INFO | train_inner | epoch 003: 85 / 3002 loss=2.683, ppl=6.42, wps=5917.9, ups=0.09, wpb=64809, bsz=128, num_updates=6050, lr=9.99596e-05, gnorm=2.265, loss_scale=4, train_wall=10, gb_free=2.8, wall=69790 2021-06-19 14:02:07 | INFO | train_inner | epoch 003: 86 / 3002 loss=2.84, ppl=7.16, wps=5967.1, ups=0.09, wpb=64775, bsz=128, num_updates=6051, lr=9.99596e-05, gnorm=2.299, loss_scale=4, train_wall=10, gb_free=2.8, wall=69801 2021-06-19 14:02:18 | INFO | train_inner | epoch 003: 87 / 3002 loss=2.672, ppl=6.37, wps=5776.6, ups=0.09, wpb=64728, bsz=128, num_updates=6052, lr=9.99596e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=69812 2021-06-19 14:02:29 | INFO | train_inner | epoch 003: 88 / 3002 loss=2.581, ppl=5.99, wps=5957.8, ups=0.09, wpb=64888, bsz=128, num_updates=6053, lr=9.99596e-05, gnorm=2.237, loss_scale=4, train_wall=10, gb_free=2.8, wall=69823 2021-06-19 14:02:40 | INFO | train_inner | epoch 003: 89 / 3002 loss=2.642, ppl=6.24, wps=5812.7, ups=0.09, wpb=64737, bsz=128, num_updates=6054, lr=9.99596e-05, gnorm=2.422, loss_scale=4, train_wall=11, gb_free=2.8, wall=69834 2021-06-19 14:02:51 | INFO | train_inner | epoch 003: 90 / 3002 loss=2.548, ppl=5.85, wps=5903.8, ups=0.09, wpb=64847, bsz=128, num_updates=6055, lr=9.99596e-05, gnorm=2.071, loss_scale=4, train_wall=11, gb_free=2.8, wall=69845 2021-06-19 14:03:02 | INFO | train_inner | epoch 003: 91 / 3002 loss=2.589, ppl=6.02, wps=5790.4, ups=0.09, wpb=64772, bsz=128, num_updates=6056, lr=9.99595e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=69857 2021-06-19 14:03:13 | INFO | train_inner | epoch 003: 92 / 3002 loss=2.671, ppl=6.37, wps=5812.5, ups=0.09, wpb=64790, bsz=128, num_updates=6057, lr=9.99595e-05, gnorm=2.075, loss_scale=4, train_wall=11, gb_free=2.8, wall=69868 2021-06-19 14:03:25 | INFO | train_inner | epoch 003: 93 / 3002 loss=2.667, ppl=6.35, wps=5778.2, ups=0.09, wpb=64738, bsz=128, num_updates=6058, lr=9.99595e-05, gnorm=2.177, loss_scale=4, train_wall=11, gb_free=2.8, wall=69879 2021-06-19 14:03:36 | INFO | train_inner | epoch 003: 94 / 3002 loss=2.599, ppl=6.06, wps=5816.1, ups=0.09, wpb=64859, bsz=128, num_updates=6059, lr=9.99595e-05, gnorm=2.128, loss_scale=4, train_wall=11, gb_free=2.8, wall=69890 2021-06-19 14:03:47 | INFO | train_inner | epoch 003: 95 / 3002 loss=2.623, ppl=6.16, wps=5966.6, ups=0.09, wpb=64908, bsz=128, num_updates=6060, lr=9.99595e-05, gnorm=2.198, loss_scale=4, train_wall=10, gb_free=2.8, wall=69901 2021-06-19 14:03:58 | INFO | train_inner | epoch 003: 96 / 3002 loss=2.605, ppl=6.08, wps=5836.1, ups=0.09, wpb=64886, bsz=128, num_updates=6061, lr=9.99595e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=69912 2021-06-19 14:04:09 | INFO | train_inner | epoch 003: 97 / 3002 loss=2.795, ppl=6.94, wps=5771.2, ups=0.09, wpb=64798, bsz=128, num_updates=6062, lr=9.99595e-05, gnorm=2.125, loss_scale=4, train_wall=11, gb_free=2.8, wall=69923 2021-06-19 14:04:20 | INFO | train_inner | epoch 003: 98 / 3002 loss=2.654, ppl=6.29, wps=5759.7, ups=0.09, wpb=64749, bsz=128, num_updates=6063, lr=9.99595e-05, gnorm=2.106, loss_scale=4, train_wall=11, gb_free=2.8, wall=69935 2021-06-19 14:04:31 | INFO | train_inner | epoch 003: 99 / 3002 loss=2.675, ppl=6.39, wps=5800.2, ups=0.09, wpb=64854, bsz=128, num_updates=6064, lr=9.99595e-05, gnorm=2.219, loss_scale=4, train_wall=11, gb_free=2.8, wall=69946 2021-06-19 14:04:43 | INFO | train_inner | epoch 003: 100 / 3002 loss=2.772, ppl=6.83, wps=5747.7, ups=0.09, wpb=64847, bsz=128, num_updates=6065, lr=9.99595e-05, gnorm=2.426, loss_scale=4, train_wall=11, gb_free=2.8, wall=69957 2021-06-19 14:04:54 | INFO | train_inner | epoch 003: 101 / 3002 loss=2.677, ppl=6.39, wps=5871.9, ups=0.09, wpb=64873, bsz=128, num_updates=6066, lr=9.99595e-05, gnorm=4.012, loss_scale=4, train_wall=11, gb_free=2.8, wall=69968 2021-06-19 14:05:05 | INFO | train_inner | epoch 003: 102 / 3002 loss=2.636, ppl=6.22, wps=5823.4, ups=0.09, wpb=64880, bsz=128, num_updates=6067, lr=9.99595e-05, gnorm=2.089, loss_scale=4, train_wall=11, gb_free=2.8, wall=69979 2021-06-19 14:05:16 | INFO | train_inner | epoch 003: 103 / 3002 loss=2.622, ppl=6.16, wps=5830, ups=0.09, wpb=64836, bsz=128, num_updates=6068, lr=9.99595e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=69990 2021-06-19 14:05:27 | INFO | train_inner | epoch 003: 104 / 3002 loss=2.617, ppl=6.13, wps=5882.9, ups=0.09, wpb=64816, bsz=128, num_updates=6069, lr=9.99594e-05, gnorm=2.107, loss_scale=4, train_wall=11, gb_free=2.8, wall=70001 2021-06-19 14:05:38 | INFO | train_inner | epoch 003: 105 / 3002 loss=2.569, ppl=5.93, wps=5835.3, ups=0.09, wpb=64923, bsz=128, num_updates=6070, lr=9.99594e-05, gnorm=2.829, loss_scale=4, train_wall=11, gb_free=2.8, wall=70012 2021-06-19 14:05:49 | INFO | train_inner | epoch 003: 106 / 3002 loss=2.861, ppl=7.27, wps=5827.7, ups=0.09, wpb=64862, bsz=128, num_updates=6071, lr=9.99594e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=70024 2021-06-19 14:06:00 | INFO | train_inner | epoch 003: 107 / 3002 loss=2.655, ppl=6.3, wps=5832, ups=0.09, wpb=64764, bsz=128, num_updates=6072, lr=9.99594e-05, gnorm=2.145, loss_scale=4, train_wall=11, gb_free=2.8, wall=70035 2021-06-19 14:06:11 | INFO | train_inner | epoch 003: 108 / 3002 loss=2.79, ppl=6.91, wps=5881.3, ups=0.09, wpb=64778, bsz=128, num_updates=6073, lr=9.99594e-05, gnorm=2.822, loss_scale=4, train_wall=11, gb_free=2.8, wall=70046 2021-06-19 14:06:23 | INFO | train_inner | epoch 003: 109 / 3002 loss=2.617, ppl=6.14, wps=5700, ups=0.09, wpb=64795, bsz=128, num_updates=6074, lr=9.99594e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=70057 2021-06-19 14:06:34 | INFO | train_inner | epoch 003: 110 / 3002 loss=2.799, ppl=6.96, wps=5862.2, ups=0.09, wpb=64836, bsz=128, num_updates=6075, lr=9.99594e-05, gnorm=2.287, loss_scale=4, train_wall=11, gb_free=2.8, wall=70068 2021-06-19 14:06:45 | INFO | train_inner | epoch 003: 111 / 3002 loss=2.634, ppl=6.21, wps=5654.6, ups=0.09, wpb=64766, bsz=128, num_updates=6076, lr=9.99594e-05, gnorm=3.58, loss_scale=4, train_wall=11, gb_free=2.8, wall=70080 2021-06-19 14:06:56 | INFO | train_inner | epoch 003: 112 / 3002 loss=2.627, ppl=6.18, wps=5795.1, ups=0.09, wpb=64839, bsz=128, num_updates=6077, lr=9.99594e-05, gnorm=2.107, loss_scale=4, train_wall=11, gb_free=2.8, wall=70091 2021-06-19 14:07:08 | INFO | train_inner | epoch 003: 113 / 3002 loss=2.662, ppl=6.33, wps=5850.9, ups=0.09, wpb=64813, bsz=128, num_updates=6078, lr=9.99594e-05, gnorm=2.25, loss_scale=4, train_wall=11, gb_free=2.8, wall=70102 2021-06-19 14:07:19 | INFO | train_inner | epoch 003: 114 / 3002 loss=2.792, ppl=6.93, wps=5836.1, ups=0.09, wpb=64863, bsz=128, num_updates=6079, lr=9.99594e-05, gnorm=2.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=70113 2021-06-19 14:07:30 | INFO | train_inner | epoch 003: 115 / 3002 loss=2.594, ppl=6.04, wps=5848.7, ups=0.09, wpb=64796, bsz=128, num_updates=6080, lr=9.99594e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=70124 2021-06-19 14:07:41 | INFO | train_inner | epoch 003: 116 / 3002 loss=2.703, ppl=6.51, wps=5893.6, ups=0.09, wpb=64744, bsz=128, num_updates=6081, lr=9.99593e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=70135 2021-06-19 14:07:52 | INFO | train_inner | epoch 003: 117 / 3002 loss=2.626, ppl=6.17, wps=5956.6, ups=0.09, wpb=64826, bsz=128, num_updates=6082, lr=9.99593e-05, gnorm=2.179, loss_scale=4, train_wall=10, gb_free=2.8, wall=70146 2021-06-19 14:08:03 | INFO | train_inner | epoch 003: 118 / 3002 loss=2.77, ppl=6.82, wps=5831.2, ups=0.09, wpb=64860, bsz=128, num_updates=6083, lr=9.99593e-05, gnorm=2.251, loss_scale=4, train_wall=11, gb_free=2.8, wall=70157 2021-06-19 14:08:14 | INFO | train_inner | epoch 003: 119 / 3002 loss=2.814, ppl=7.03, wps=5947.2, ups=0.09, wpb=64746, bsz=128, num_updates=6084, lr=9.99593e-05, gnorm=2.415, loss_scale=4, train_wall=10, gb_free=2.8, wall=70168 2021-06-19 14:08:25 | INFO | train_inner | epoch 003: 120 / 3002 loss=2.639, ppl=6.23, wps=5819.4, ups=0.09, wpb=64793, bsz=128, num_updates=6085, lr=9.99593e-05, gnorm=2.791, loss_scale=4, train_wall=11, gb_free=2.8, wall=70179 2021-06-19 14:08:36 | INFO | train_inner | epoch 003: 121 / 3002 loss=2.769, ppl=6.81, wps=5714.2, ups=0.09, wpb=64852, bsz=128, num_updates=6086, lr=9.99593e-05, gnorm=2.11, loss_scale=4, train_wall=11, gb_free=2.8, wall=70190 2021-06-19 14:08:47 | INFO | train_inner | epoch 003: 122 / 3002 loss=2.69, ppl=6.45, wps=5881.9, ups=0.09, wpb=64790, bsz=128, num_updates=6087, lr=9.99593e-05, gnorm=2.192, loss_scale=4, train_wall=11, gb_free=2.8, wall=70201 2021-06-19 14:08:58 | INFO | train_inner | epoch 003: 123 / 3002 loss=2.633, ppl=6.2, wps=5797.5, ups=0.09, wpb=64805, bsz=128, num_updates=6088, lr=9.99593e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=70213 2021-06-19 14:09:09 | INFO | train_inner | epoch 003: 124 / 3002 loss=2.549, ppl=5.85, wps=5989.2, ups=0.09, wpb=64802, bsz=128, num_updates=6089, lr=9.99593e-05, gnorm=2.243, loss_scale=4, train_wall=10, gb_free=2.8, wall=70223 2021-06-19 14:09:20 | INFO | train_inner | epoch 003: 125 / 3002 loss=2.699, ppl=6.49, wps=5761.3, ups=0.09, wpb=64800, bsz=128, num_updates=6090, lr=9.99593e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=70235 2021-06-19 14:09:32 | INFO | train_inner | epoch 003: 126 / 3002 loss=2.771, ppl=6.83, wps=5774.7, ups=0.09, wpb=64692, bsz=128, num_updates=6091, lr=9.99593e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=70246 2021-06-19 14:09:43 | INFO | train_inner | epoch 003: 127 / 3002 loss=2.857, ppl=7.24, wps=5846.5, ups=0.09, wpb=64827, bsz=128, num_updates=6092, lr=9.99593e-05, gnorm=2.199, loss_scale=4, train_wall=11, gb_free=2.8, wall=70257 2021-06-19 14:09:54 | INFO | train_inner | epoch 003: 128 / 3002 loss=2.717, ppl=6.58, wps=5808.6, ups=0.09, wpb=64793, bsz=128, num_updates=6093, lr=9.99593e-05, gnorm=2.267, loss_scale=4, train_wall=11, gb_free=2.8, wall=70268 2021-06-19 14:10:05 | INFO | train_inner | epoch 003: 129 / 3002 loss=2.78, ppl=6.87, wps=5772.9, ups=0.09, wpb=64783, bsz=128, num_updates=6094, lr=9.99592e-05, gnorm=2.202, loss_scale=4, train_wall=11, gb_free=2.8, wall=70279 2021-06-19 14:10:16 | INFO | train_inner | epoch 003: 130 / 3002 loss=2.759, ppl=6.77, wps=5811.8, ups=0.09, wpb=64872, bsz=128, num_updates=6095, lr=9.99592e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=70290 2021-06-19 14:10:27 | INFO | train_inner | epoch 003: 131 / 3002 loss=2.673, ppl=6.38, wps=5886.5, ups=0.09, wpb=64852, bsz=128, num_updates=6096, lr=9.99592e-05, gnorm=3.962, loss_scale=4, train_wall=11, gb_free=2.8, wall=70302 2021-06-19 14:10:38 | INFO | train_inner | epoch 003: 132 / 3002 loss=2.562, ppl=5.91, wps=5889.1, ups=0.09, wpb=64809, bsz=128, num_updates=6097, lr=9.99592e-05, gnorm=2.217, loss_scale=4, train_wall=11, gb_free=2.8, wall=70313 2021-06-19 14:10:49 | INFO | train_inner | epoch 003: 133 / 3002 loss=2.65, ppl=6.28, wps=5908.8, ups=0.09, wpb=64879, bsz=128, num_updates=6098, lr=9.99592e-05, gnorm=2.176, loss_scale=4, train_wall=11, gb_free=2.8, wall=70323 2021-06-19 14:11:00 | INFO | train_inner | epoch 003: 134 / 3002 loss=2.71, ppl=6.55, wps=5881.9, ups=0.09, wpb=64897, bsz=128, num_updates=6099, lr=9.99592e-05, gnorm=2.202, loss_scale=4, train_wall=11, gb_free=2.8, wall=70335 2021-06-19 14:11:11 | INFO | train_inner | epoch 003: 135 / 3002 loss=2.669, ppl=6.36, wps=5827.7, ups=0.09, wpb=64813, bsz=128, num_updates=6100, lr=9.99592e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=70346 2021-06-19 14:11:22 | INFO | train_inner | epoch 003: 136 / 3002 loss=2.797, ppl=6.95, wps=5902.7, ups=0.09, wpb=64832, bsz=128, num_updates=6101, lr=9.99592e-05, gnorm=2.261, loss_scale=4, train_wall=11, gb_free=2.8, wall=70357 2021-06-19 14:11:34 | INFO | train_inner | epoch 003: 137 / 3002 loss=2.675, ppl=6.39, wps=5762.8, ups=0.09, wpb=64808, bsz=128, num_updates=6102, lr=9.99592e-05, gnorm=2.159, loss_scale=4, train_wall=11, gb_free=2.8, wall=70368 2021-06-19 14:11:45 | INFO | train_inner | epoch 003: 138 / 3002 loss=2.721, ppl=6.59, wps=5766.1, ups=0.09, wpb=64784, bsz=128, num_updates=6103, lr=9.99592e-05, gnorm=3.719, loss_scale=4, train_wall=11, gb_free=2.8, wall=70379 2021-06-19 14:11:56 | INFO | train_inner | epoch 003: 139 / 3002 loss=2.568, ppl=5.93, wps=5702.3, ups=0.09, wpb=64875, bsz=128, num_updates=6104, lr=9.99592e-05, gnorm=2.117, loss_scale=4, train_wall=11, gb_free=2.8, wall=70390 2021-06-19 14:12:07 | INFO | train_inner | epoch 003: 140 / 3002 loss=2.908, ppl=7.51, wps=5827.9, ups=0.09, wpb=64778, bsz=128, num_updates=6105, lr=9.99592e-05, gnorm=2.297, loss_scale=4, train_wall=11, gb_free=2.8, wall=70402 2021-06-19 14:12:19 | INFO | train_inner | epoch 003: 141 / 3002 loss=2.684, ppl=6.43, wps=5756.1, ups=0.09, wpb=64896, bsz=128, num_updates=6106, lr=9.99591e-05, gnorm=2.464, loss_scale=4, train_wall=11, gb_free=2.8, wall=70413 2021-06-19 14:12:30 | INFO | train_inner | epoch 003: 142 / 3002 loss=2.67, ppl=6.36, wps=5821.1, ups=0.09, wpb=64861, bsz=128, num_updates=6107, lr=9.99591e-05, gnorm=2.358, loss_scale=4, train_wall=11, gb_free=2.8, wall=70424 2021-06-19 14:12:41 | INFO | train_inner | epoch 003: 143 / 3002 loss=2.876, ppl=7.34, wps=5925.3, ups=0.09, wpb=64726, bsz=128, num_updates=6108, lr=9.99591e-05, gnorm=2.423, loss_scale=4, train_wall=10, gb_free=2.8, wall=70435 2021-06-19 14:12:52 | INFO | train_inner | epoch 003: 144 / 3002 loss=2.689, ppl=6.45, wps=5753.4, ups=0.09, wpb=64847, bsz=128, num_updates=6109, lr=9.99591e-05, gnorm=2.391, loss_scale=4, train_wall=11, gb_free=2.8, wall=70446 2021-06-19 14:13:03 | INFO | train_inner | epoch 003: 145 / 3002 loss=2.686, ppl=6.44, wps=5922.6, ups=0.09, wpb=64810, bsz=128, num_updates=6110, lr=9.99591e-05, gnorm=2.418, loss_scale=8, train_wall=10, gb_free=2.8, wall=70457 2021-06-19 14:13:14 | INFO | train_inner | epoch 003: 146 / 3002 loss=2.674, ppl=6.38, wps=5827.1, ups=0.09, wpb=64850, bsz=128, num_updates=6111, lr=9.99591e-05, gnorm=2.122, loss_scale=8, train_wall=11, gb_free=2.8, wall=70468 2021-06-19 14:13:25 | INFO | train_inner | epoch 003: 147 / 3002 loss=2.968, ppl=7.82, wps=5856.6, ups=0.09, wpb=64758, bsz=128, num_updates=6112, lr=9.99591e-05, gnorm=2.66, loss_scale=8, train_wall=11, gb_free=2.8, wall=70479 2021-06-19 14:13:36 | INFO | train_inner | epoch 003: 148 / 3002 loss=2.813, ppl=7.03, wps=5854.6, ups=0.09, wpb=64780, bsz=128, num_updates=6113, lr=9.99591e-05, gnorm=2.37, loss_scale=8, train_wall=11, gb_free=2.8, wall=70490 2021-06-19 14:13:47 | INFO | train_inner | epoch 003: 149 / 3002 loss=2.76, ppl=6.78, wps=5754.4, ups=0.09, wpb=64794, bsz=128, num_updates=6114, lr=9.99591e-05, gnorm=2.287, loss_scale=8, train_wall=11, gb_free=2.8, wall=70502 2021-06-19 14:13:59 | INFO | train_inner | epoch 003: 150 / 3002 loss=2.689, ppl=6.45, wps=5793.8, ups=0.09, wpb=64916, bsz=128, num_updates=6115, lr=9.99591e-05, gnorm=2.274, loss_scale=8, train_wall=11, gb_free=2.8, wall=70513 2021-06-19 14:14:09 | INFO | train_inner | epoch 003: 151 / 3002 loss=2.757, ppl=6.76, wps=5929.2, ups=0.09, wpb=64670, bsz=128, num_updates=6116, lr=9.99591e-05, gnorm=2.359, loss_scale=8, train_wall=10, gb_free=2.8, wall=70524 2021-06-19 14:14:21 | INFO | train_inner | epoch 003: 152 / 3002 loss=2.574, ppl=5.96, wps=5802.9, ups=0.09, wpb=64841, bsz=128, num_updates=6117, lr=9.99591e-05, gnorm=2.226, loss_scale=8, train_wall=11, gb_free=2.8, wall=70535 2021-06-19 14:14:32 | INFO | train_inner | epoch 003: 153 / 3002 loss=2.713, ppl=6.56, wps=5937.9, ups=0.09, wpb=64821, bsz=128, num_updates=6118, lr=9.99591e-05, gnorm=2.451, loss_scale=8, train_wall=10, gb_free=2.8, wall=70546 2021-06-19 14:14:43 | INFO | train_inner | epoch 003: 154 / 3002 loss=2.611, ppl=6.11, wps=5803.4, ups=0.09, wpb=64813, bsz=128, num_updates=6119, lr=9.9959e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=70557 2021-06-19 14:14:54 | INFO | train_inner | epoch 003: 155 / 3002 loss=2.654, ppl=6.29, wps=5781.3, ups=0.09, wpb=64858, bsz=128, num_updates=6120, lr=9.9959e-05, gnorm=2.151, loss_scale=8, train_wall=11, gb_free=2.8, wall=70568 2021-06-19 14:15:05 | INFO | train_inner | epoch 003: 156 / 3002 loss=2.607, ppl=6.09, wps=5861, ups=0.09, wpb=64900, bsz=128, num_updates=6121, lr=9.9959e-05, gnorm=2.207, loss_scale=8, train_wall=11, gb_free=2.8, wall=70579 2021-06-19 14:15:16 | INFO | train_inner | epoch 003: 157 / 3002 loss=2.673, ppl=6.38, wps=5852.6, ups=0.09, wpb=64850, bsz=128, num_updates=6122, lr=9.9959e-05, gnorm=3.556, loss_scale=8, train_wall=11, gb_free=2.8, wall=70590 2021-06-19 14:15:27 | INFO | train_inner | epoch 003: 158 / 3002 loss=2.718, ppl=6.58, wps=5946.2, ups=0.09, wpb=64883, bsz=128, num_updates=6123, lr=9.9959e-05, gnorm=2.203, loss_scale=8, train_wall=10, gb_free=2.8, wall=70601 2021-06-19 14:15:38 | INFO | train_inner | epoch 003: 159 / 3002 loss=2.636, ppl=6.21, wps=5841.8, ups=0.09, wpb=64844, bsz=128, num_updates=6124, lr=9.9959e-05, gnorm=2.316, loss_scale=8, train_wall=11, gb_free=2.8, wall=70612 2021-06-19 14:15:49 | INFO | train_inner | epoch 003: 160 / 3002 loss=2.656, ppl=6.3, wps=5905.6, ups=0.09, wpb=64847, bsz=128, num_updates=6125, lr=9.9959e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=70623 2021-06-19 14:16:00 | INFO | train_inner | epoch 003: 161 / 3002 loss=2.766, ppl=6.8, wps=5919.8, ups=0.09, wpb=64801, bsz=128, num_updates=6126, lr=9.9959e-05, gnorm=2.32, loss_scale=8, train_wall=11, gb_free=2.8, wall=70634 2021-06-19 14:16:11 | INFO | train_inner | epoch 003: 162 / 3002 loss=2.647, ppl=6.26, wps=5820.7, ups=0.09, wpb=64778, bsz=128, num_updates=6127, lr=9.9959e-05, gnorm=2.33, loss_scale=8, train_wall=11, gb_free=2.8, wall=70645 2021-06-19 14:16:22 | INFO | train_inner | epoch 003: 163 / 3002 loss=2.842, ppl=7.17, wps=5727.5, ups=0.09, wpb=64716, bsz=128, num_updates=6128, lr=9.9959e-05, gnorm=2.56, loss_scale=8, train_wall=11, gb_free=2.8, wall=70657 2021-06-19 14:16:34 | INFO | train_inner | epoch 003: 164 / 3002 loss=2.583, ppl=5.99, wps=5805.1, ups=0.09, wpb=64820, bsz=128, num_updates=6129, lr=9.9959e-05, gnorm=2.273, loss_scale=8, train_wall=11, gb_free=2.8, wall=70668 2021-06-19 14:16:44 | INFO | train_inner | epoch 003: 165 / 3002 loss=2.645, ppl=6.26, wps=5949.9, ups=0.09, wpb=64555, bsz=128, num_updates=6130, lr=9.9959e-05, gnorm=2.48, loss_scale=8, train_wall=10, gb_free=2.8, wall=70679 2021-06-19 14:16:56 | INFO | train_inner | epoch 003: 166 / 3002 loss=2.625, ppl=6.17, wps=5848.7, ups=0.09, wpb=64855, bsz=128, num_updates=6131, lr=9.99589e-05, gnorm=2.175, loss_scale=8, train_wall=11, gb_free=2.8, wall=70690 2021-06-19 14:17:07 | INFO | train_inner | epoch 003: 167 / 3002 loss=2.654, ppl=6.3, wps=5878.3, ups=0.09, wpb=64842, bsz=128, num_updates=6132, lr=9.99589e-05, gnorm=2.307, loss_scale=8, train_wall=11, gb_free=2.8, wall=70701 2021-06-19 14:17:18 | INFO | train_inner | epoch 003: 168 / 3002 loss=2.702, ppl=6.51, wps=5741.7, ups=0.09, wpb=64711, bsz=128, num_updates=6133, lr=9.99589e-05, gnorm=2.221, loss_scale=8, train_wall=11, gb_free=2.8, wall=70712 2021-06-19 14:17:29 | INFO | train_inner | epoch 003: 169 / 3002 loss=2.712, ppl=6.55, wps=5746.8, ups=0.09, wpb=64723, bsz=128, num_updates=6134, lr=9.99589e-05, gnorm=2.789, loss_scale=8, train_wall=11, gb_free=2.8, wall=70723 2021-06-19 14:17:40 | INFO | train_inner | epoch 003: 170 / 3002 loss=2.67, ppl=6.36, wps=5848.3, ups=0.09, wpb=64879, bsz=128, num_updates=6135, lr=9.99589e-05, gnorm=2.155, loss_scale=8, train_wall=11, gb_free=2.8, wall=70735 2021-06-19 14:17:51 | INFO | train_inner | epoch 003: 171 / 3002 loss=2.628, ppl=6.18, wps=5940.3, ups=0.09, wpb=64837, bsz=128, num_updates=6136, lr=9.99589e-05, gnorm=2.268, loss_scale=8, train_wall=10, gb_free=2.8, wall=70745 2021-06-19 14:18:02 | INFO | train_inner | epoch 003: 172 / 3002 loss=2.556, ppl=5.88, wps=5770.1, ups=0.09, wpb=64734, bsz=128, num_updates=6137, lr=9.99589e-05, gnorm=2.329, loss_scale=8, train_wall=11, gb_free=2.8, wall=70757 2021-06-19 14:18:13 | INFO | train_inner | epoch 003: 173 / 3002 loss=2.759, ppl=6.77, wps=5866.9, ups=0.09, wpb=64863, bsz=128, num_updates=6138, lr=9.99589e-05, gnorm=2.265, loss_scale=8, train_wall=11, gb_free=2.8, wall=70768 2021-06-19 14:18:25 | INFO | train_inner | epoch 003: 174 / 3002 loss=2.727, ppl=6.62, wps=5826, ups=0.09, wpb=64787, bsz=128, num_updates=6139, lr=9.99589e-05, gnorm=2.284, loss_scale=8, train_wall=11, gb_free=2.8, wall=70779 2021-06-19 14:18:35 | INFO | train_inner | epoch 003: 175 / 3002 loss=2.693, ppl=6.47, wps=5971.5, ups=0.09, wpb=64790, bsz=128, num_updates=6140, lr=9.99589e-05, gnorm=2.176, loss_scale=8, train_wall=10, gb_free=2.8, wall=70790 2021-06-19 14:18:46 | INFO | train_inner | epoch 003: 176 / 3002 loss=2.771, ppl=6.83, wps=5842.3, ups=0.09, wpb=64840, bsz=128, num_updates=6141, lr=9.99589e-05, gnorm=2.26, loss_scale=8, train_wall=11, gb_free=2.8, wall=70801 2021-06-19 14:18:58 | INFO | train_inner | epoch 003: 177 / 3002 loss=2.772, ppl=6.83, wps=5823.2, ups=0.09, wpb=64814, bsz=128, num_updates=6142, lr=9.99589e-05, gnorm=2.296, loss_scale=8, train_wall=11, gb_free=2.8, wall=70812 2021-06-19 14:19:09 | INFO | train_inner | epoch 003: 178 / 3002 loss=2.687, ppl=6.44, wps=5852.4, ups=0.09, wpb=64807, bsz=128, num_updates=6143, lr=9.99589e-05, gnorm=2.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=70823 2021-06-19 14:19:20 | INFO | train_inner | epoch 003: 179 / 3002 loss=2.623, ppl=6.16, wps=5877.9, ups=0.09, wpb=64823, bsz=128, num_updates=6144, lr=9.99588e-05, gnorm=2.224, loss_scale=8, train_wall=11, gb_free=2.8, wall=70834 2021-06-19 14:19:31 | INFO | train_inner | epoch 003: 180 / 3002 loss=2.627, ppl=6.18, wps=5922.1, ups=0.09, wpb=64810, bsz=128, num_updates=6145, lr=9.99588e-05, gnorm=2.209, loss_scale=8, train_wall=10, gb_free=2.8, wall=70845 2021-06-19 14:19:42 | INFO | train_inner | epoch 003: 181 / 3002 loss=2.875, ppl=7.34, wps=5869, ups=0.09, wpb=64755, bsz=128, num_updates=6146, lr=9.99588e-05, gnorm=2.25, loss_scale=8, train_wall=11, gb_free=2.8, wall=70856 2021-06-19 14:19:53 | INFO | train_inner | epoch 003: 182 / 3002 loss=2.638, ppl=6.22, wps=5910, ups=0.09, wpb=64864, bsz=128, num_updates=6147, lr=9.99588e-05, gnorm=2.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=70867 2021-06-19 14:20:04 | INFO | train_inner | epoch 003: 183 / 3002 loss=2.807, ppl=7, wps=5841.8, ups=0.09, wpb=64775, bsz=128, num_updates=6148, lr=9.99588e-05, gnorm=2.261, loss_scale=8, train_wall=11, gb_free=2.8, wall=70878 2021-06-19 14:20:15 | INFO | train_inner | epoch 003: 184 / 3002 loss=2.604, ppl=6.08, wps=5861.3, ups=0.09, wpb=64893, bsz=128, num_updates=6149, lr=9.99588e-05, gnorm=2.133, loss_scale=8, train_wall=11, gb_free=2.8, wall=70889 2021-06-19 14:20:26 | INFO | train_inner | epoch 003: 185 / 3002 loss=2.591, ppl=6.02, wps=5822.4, ups=0.09, wpb=64906, bsz=128, num_updates=6150, lr=9.99588e-05, gnorm=2.326, loss_scale=8, train_wall=11, gb_free=2.8, wall=70900 2021-06-19 14:20:37 | INFO | train_inner | epoch 003: 186 / 3002 loss=2.688, ppl=6.45, wps=5925.7, ups=0.09, wpb=64709, bsz=128, num_updates=6151, lr=9.99588e-05, gnorm=2.238, loss_scale=8, train_wall=10, gb_free=2.8, wall=70911 2021-06-19 14:20:48 | INFO | train_inner | epoch 003: 187 / 3002 loss=2.669, ppl=6.36, wps=5909.3, ups=0.09, wpb=64889, bsz=128, num_updates=6152, lr=9.99588e-05, gnorm=2.251, loss_scale=8, train_wall=10, gb_free=2.8, wall=70922 2021-06-19 14:20:59 | INFO | train_inner | epoch 003: 188 / 3002 loss=2.709, ppl=6.54, wps=5843.9, ups=0.09, wpb=64842, bsz=128, num_updates=6153, lr=9.99588e-05, gnorm=2.22, loss_scale=8, train_wall=11, gb_free=2.8, wall=70933 2021-06-19 14:21:10 | INFO | train_inner | epoch 003: 189 / 3002 loss=2.425, ppl=5.37, wps=5970.8, ups=0.09, wpb=64899, bsz=128, num_updates=6154, lr=9.99588e-05, gnorm=2.206, loss_scale=8, train_wall=10, gb_free=2.8, wall=70944 2021-06-19 14:21:21 | INFO | train_inner | epoch 003: 190 / 3002 loss=2.863, ppl=7.28, wps=5836, ups=0.09, wpb=64810, bsz=128, num_updates=6155, lr=9.99588e-05, gnorm=2.216, loss_scale=8, train_wall=11, gb_free=2.8, wall=70955 2021-06-19 14:21:32 | INFO | train_inner | epoch 003: 191 / 3002 loss=2.631, ppl=6.19, wps=5741.7, ups=0.09, wpb=64850, bsz=128, num_updates=6156, lr=9.99587e-05, gnorm=2.272, loss_scale=8, train_wall=11, gb_free=2.8, wall=70967 2021-06-19 14:21:43 | INFO | train_inner | epoch 003: 192 / 3002 loss=2.613, ppl=6.12, wps=5869.4, ups=0.09, wpb=64799, bsz=128, num_updates=6157, lr=9.99587e-05, gnorm=2.309, loss_scale=8, train_wall=11, gb_free=2.8, wall=70978 2021-06-19 14:21:54 | INFO | train_inner | epoch 003: 193 / 3002 loss=2.672, ppl=6.37, wps=5793, ups=0.09, wpb=64850, bsz=128, num_updates=6158, lr=9.99587e-05, gnorm=2.166, loss_scale=8, train_wall=11, gb_free=2.8, wall=70989 2021-06-19 14:22:05 | INFO | train_inner | epoch 003: 194 / 3002 loss=2.713, ppl=6.56, wps=5954.2, ups=0.09, wpb=64799, bsz=128, num_updates=6159, lr=9.99587e-05, gnorm=2.222, loss_scale=8, train_wall=10, gb_free=2.8, wall=71000 2021-06-19 14:22:16 | INFO | train_inner | epoch 003: 195 / 3002 loss=2.748, ppl=6.72, wps=5880, ups=0.09, wpb=64740, bsz=128, num_updates=6160, lr=9.99587e-05, gnorm=2.15, loss_scale=8, train_wall=11, gb_free=2.8, wall=71011 2021-06-19 14:22:28 | INFO | train_inner | epoch 003: 196 / 3002 loss=2.68, ppl=6.41, wps=5806.3, ups=0.09, wpb=64822, bsz=128, num_updates=6161, lr=9.99587e-05, gnorm=2.069, loss_scale=8, train_wall=11, gb_free=2.8, wall=71022 2021-06-19 14:22:39 | INFO | train_inner | epoch 003: 197 / 3002 loss=2.638, ppl=6.22, wps=5913.1, ups=0.09, wpb=64899, bsz=128, num_updates=6162, lr=9.99587e-05, gnorm=2.241, loss_scale=8, train_wall=10, gb_free=2.8, wall=71033 2021-06-19 14:22:50 | INFO | train_inner | epoch 003: 198 / 3002 loss=2.671, ppl=6.37, wps=5868, ups=0.09, wpb=64824, bsz=128, num_updates=6163, lr=9.99587e-05, gnorm=2.182, loss_scale=8, train_wall=11, gb_free=2.8, wall=71044 2021-06-19 14:23:01 | INFO | train_inner | epoch 003: 199 / 3002 loss=2.71, ppl=6.54, wps=5896.2, ups=0.09, wpb=64910, bsz=128, num_updates=6164, lr=9.99587e-05, gnorm=2.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=71055 2021-06-19 14:23:12 | INFO | train_inner | epoch 003: 200 / 3002 loss=2.648, ppl=6.27, wps=5778.3, ups=0.09, wpb=64846, bsz=128, num_updates=6165, lr=9.99587e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=71066 2021-06-19 14:23:23 | INFO | train_inner | epoch 003: 201 / 3002 loss=2.868, ppl=7.3, wps=5869.5, ups=0.09, wpb=64831, bsz=128, num_updates=6166, lr=9.99587e-05, gnorm=2.133, loss_scale=8, train_wall=11, gb_free=2.8, wall=71077 2021-06-19 14:23:34 | INFO | train_inner | epoch 003: 202 / 3002 loss=2.597, ppl=6.05, wps=5862, ups=0.09, wpb=64858, bsz=128, num_updates=6167, lr=9.99587e-05, gnorm=2.22, loss_scale=8, train_wall=11, gb_free=2.8, wall=71088 2021-06-19 14:23:45 | INFO | train_inner | epoch 003: 203 / 3002 loss=2.561, ppl=5.9, wps=5855.2, ups=0.09, wpb=64873, bsz=128, num_updates=6168, lr=9.99587e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=71099 2021-06-19 14:23:56 | INFO | train_inner | epoch 003: 204 / 3002 loss=2.55, ppl=5.86, wps=5707.2, ups=0.09, wpb=64814, bsz=128, num_updates=6169, lr=9.99586e-05, gnorm=2.134, loss_scale=8, train_wall=11, gb_free=2.8, wall=71111 2021-06-19 14:24:07 | INFO | train_inner | epoch 003: 205 / 3002 loss=2.777, ppl=6.85, wps=5964.2, ups=0.09, wpb=64881, bsz=128, num_updates=6170, lr=9.99586e-05, gnorm=2.334, loss_scale=8, train_wall=10, gb_free=2.8, wall=71122 2021-06-19 14:24:18 | INFO | train_inner | epoch 003: 206 / 3002 loss=2.748, ppl=6.72, wps=5816, ups=0.09, wpb=64789, bsz=128, num_updates=6171, lr=9.99586e-05, gnorm=2.327, loss_scale=8, train_wall=11, gb_free=2.8, wall=71133 2021-06-19 14:24:29 | INFO | train_inner | epoch 003: 207 / 3002 loss=2.45, ppl=5.47, wps=5898.3, ups=0.09, wpb=64853, bsz=128, num_updates=6172, lr=9.99586e-05, gnorm=2.198, loss_scale=8, train_wall=11, gb_free=2.8, wall=71144 2021-06-19 14:24:40 | INFO | train_inner | epoch 003: 208 / 3002 loss=2.61, ppl=6.1, wps=5857.9, ups=0.09, wpb=64825, bsz=128, num_updates=6173, lr=9.99586e-05, gnorm=13, loss_scale=8, train_wall=11, gb_free=2.8, wall=71155 2021-06-19 14:24:51 | INFO | train_inner | epoch 003: 209 / 3002 loss=2.672, ppl=6.37, wps=5909.4, ups=0.09, wpb=64847, bsz=128, num_updates=6174, lr=9.99586e-05, gnorm=2.197, loss_scale=8, train_wall=10, gb_free=2.8, wall=71166 2021-06-19 14:25:03 | INFO | train_inner | epoch 003: 210 / 3002 loss=2.751, ppl=6.73, wps=5779.1, ups=0.09, wpb=64857, bsz=128, num_updates=6175, lr=9.99586e-05, gnorm=2.238, loss_scale=8, train_wall=11, gb_free=2.8, wall=71177 2021-06-19 14:25:14 | INFO | train_inner | epoch 003: 211 / 3002 loss=2.77, ppl=6.82, wps=5820.4, ups=0.09, wpb=64859, bsz=128, num_updates=6176, lr=9.99586e-05, gnorm=2.38, loss_scale=8, train_wall=11, gb_free=2.8, wall=71188 2021-06-19 14:25:25 | INFO | train_inner | epoch 003: 212 / 3002 loss=2.672, ppl=6.37, wps=5790.9, ups=0.09, wpb=64772, bsz=128, num_updates=6177, lr=9.99586e-05, gnorm=2.255, loss_scale=8, train_wall=11, gb_free=2.8, wall=71199 2021-06-19 14:25:36 | INFO | train_inner | epoch 003: 213 / 3002 loss=2.534, ppl=5.79, wps=5906.6, ups=0.09, wpb=64875, bsz=128, num_updates=6178, lr=9.99586e-05, gnorm=2.334, loss_scale=8, train_wall=11, gb_free=2.8, wall=71210 2021-06-19 14:25:47 | INFO | train_inner | epoch 003: 214 / 3002 loss=2.747, ppl=6.71, wps=5873.7, ups=0.09, wpb=64885, bsz=128, num_updates=6179, lr=9.99586e-05, gnorm=2.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=71221 2021-06-19 14:25:58 | INFO | train_inner | epoch 003: 215 / 3002 loss=2.495, ppl=5.64, wps=5769, ups=0.09, wpb=64775, bsz=128, num_updates=6180, lr=9.99586e-05, gnorm=2.464, loss_scale=8, train_wall=11, gb_free=2.8, wall=71233 2021-06-19 14:26:09 | INFO | train_inner | epoch 003: 216 / 3002 loss=2.716, ppl=6.57, wps=5850.4, ups=0.09, wpb=64821, bsz=128, num_updates=6181, lr=9.99585e-05, gnorm=3.465, loss_scale=8, train_wall=11, gb_free=2.8, wall=71244 2021-06-19 14:26:20 | INFO | train_inner | epoch 003: 217 / 3002 loss=2.652, ppl=6.29, wps=5845.3, ups=0.09, wpb=64874, bsz=128, num_updates=6182, lr=9.99585e-05, gnorm=2.269, loss_scale=8, train_wall=11, gb_free=2.8, wall=71255 2021-06-19 14:26:32 | INFO | train_inner | epoch 003: 218 / 3002 loss=2.691, ppl=6.46, wps=5789.3, ups=0.09, wpb=64839, bsz=128, num_updates=6183, lr=9.99585e-05, gnorm=2.194, loss_scale=8, train_wall=11, gb_free=2.8, wall=71266 2021-06-19 14:26:42 | INFO | train_inner | epoch 003: 219 / 3002 loss=2.808, ppl=7, wps=5975.5, ups=0.09, wpb=64790, bsz=128, num_updates=6184, lr=9.99585e-05, gnorm=2.121, loss_scale=8, train_wall=10, gb_free=2.8, wall=71277 2021-06-19 14:26:53 | INFO | train_inner | epoch 003: 220 / 3002 loss=2.548, ppl=5.85, wps=5866.4, ups=0.09, wpb=64817, bsz=128, num_updates=6185, lr=9.99585e-05, gnorm=2.207, loss_scale=8, train_wall=11, gb_free=2.8, wall=71288 2021-06-19 14:27:05 | INFO | train_inner | epoch 003: 221 / 3002 loss=2.627, ppl=6.18, wps=5863.4, ups=0.09, wpb=64783, bsz=128, num_updates=6186, lr=9.99585e-05, gnorm=2.137, loss_scale=8, train_wall=11, gb_free=2.8, wall=71299 2021-06-19 14:27:16 | INFO | train_inner | epoch 003: 222 / 3002 loss=2.682, ppl=6.42, wps=5720, ups=0.09, wpb=64859, bsz=128, num_updates=6187, lr=9.99585e-05, gnorm=2.22, loss_scale=8, train_wall=11, gb_free=2.8, wall=71310 2021-06-19 14:27:27 | INFO | train_inner | epoch 003: 223 / 3002 loss=2.895, ppl=7.44, wps=5913.7, ups=0.09, wpb=64736, bsz=128, num_updates=6188, lr=9.99585e-05, gnorm=2.427, loss_scale=8, train_wall=10, gb_free=2.8, wall=71321 2021-06-19 14:27:38 | INFO | train_inner | epoch 003: 224 / 3002 loss=2.645, ppl=6.26, wps=5936, ups=0.09, wpb=64817, bsz=128, num_updates=6189, lr=9.99585e-05, gnorm=2.355, loss_scale=8, train_wall=10, gb_free=2.8, wall=71332 2021-06-19 14:27:49 | INFO | train_inner | epoch 003: 225 / 3002 loss=2.694, ppl=6.47, wps=5795.3, ups=0.09, wpb=64809, bsz=128, num_updates=6190, lr=9.99585e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=71343 2021-06-19 14:28:00 | INFO | train_inner | epoch 003: 226 / 3002 loss=2.554, ppl=5.87, wps=5904.9, ups=0.09, wpb=64802, bsz=128, num_updates=6191, lr=9.99585e-05, gnorm=2.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=71354 2021-06-19 14:28:11 | INFO | train_inner | epoch 003: 227 / 3002 loss=2.673, ppl=6.38, wps=5836.7, ups=0.09, wpb=64698, bsz=128, num_updates=6192, lr=9.99585e-05, gnorm=2.157, loss_scale=8, train_wall=11, gb_free=2.8, wall=71365 2021-06-19 14:28:22 | INFO | train_inner | epoch 003: 228 / 3002 loss=2.616, ppl=6.13, wps=5856.4, ups=0.09, wpb=64760, bsz=128, num_updates=6193, lr=9.99585e-05, gnorm=2.136, loss_scale=8, train_wall=11, gb_free=2.8, wall=71376 2021-06-19 14:28:33 | INFO | train_inner | epoch 003: 229 / 3002 loss=2.687, ppl=6.44, wps=5913.3, ups=0.09, wpb=64813, bsz=128, num_updates=6194, lr=9.99584e-05, gnorm=5.726, loss_scale=8, train_wall=11, gb_free=2.8, wall=71387 2021-06-19 14:28:44 | INFO | train_inner | epoch 003: 230 / 3002 loss=2.584, ppl=6, wps=5844, ups=0.09, wpb=64930, bsz=128, num_updates=6195, lr=9.99584e-05, gnorm=2.239, loss_scale=8, train_wall=11, gb_free=2.8, wall=71398 2021-06-19 14:28:55 | INFO | train_inner | epoch 003: 231 / 3002 loss=2.702, ppl=6.51, wps=5858.5, ups=0.09, wpb=64891, bsz=128, num_updates=6196, lr=9.99584e-05, gnorm=2.245, loss_scale=8, train_wall=11, gb_free=2.8, wall=71410 2021-06-19 14:29:06 | INFO | train_inner | epoch 003: 232 / 3002 loss=2.718, ppl=6.58, wps=5806.1, ups=0.09, wpb=64785, bsz=128, num_updates=6197, lr=9.99584e-05, gnorm=2.17, loss_scale=8, train_wall=11, gb_free=2.8, wall=71421 2021-06-19 14:29:17 | INFO | train_inner | epoch 003: 233 / 3002 loss=2.693, ppl=6.47, wps=5892.9, ups=0.09, wpb=64770, bsz=128, num_updates=6198, lr=9.99584e-05, gnorm=2.174, loss_scale=8, train_wall=11, gb_free=2.8, wall=71432 2021-06-19 14:29:28 | INFO | train_inner | epoch 003: 234 / 3002 loss=2.608, ppl=6.1, wps=5873.6, ups=0.09, wpb=64847, bsz=128, num_updates=6199, lr=9.99584e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=71443 2021-06-19 14:29:39 | INFO | train_inner | epoch 003: 235 / 3002 loss=2.645, ppl=6.26, wps=5926.6, ups=0.09, wpb=64805, bsz=128, num_updates=6200, lr=9.99584e-05, gnorm=2.105, loss_scale=8, train_wall=10, gb_free=2.8, wall=71454 2021-06-19 14:29:50 | INFO | train_inner | epoch 003: 236 / 3002 loss=2.756, ppl=6.75, wps=5872.7, ups=0.09, wpb=64791, bsz=128, num_updates=6201, lr=9.99584e-05, gnorm=3.095, loss_scale=8, train_wall=11, gb_free=2.8, wall=71465 2021-06-19 14:30:01 | INFO | train_inner | epoch 003: 237 / 3002 loss=2.791, ppl=6.92, wps=5835.6, ups=0.09, wpb=64769, bsz=128, num_updates=6202, lr=9.99584e-05, gnorm=2.391, loss_scale=8, train_wall=11, gb_free=2.8, wall=71476 2021-06-19 14:30:12 | INFO | train_inner | epoch 003: 238 / 3002 loss=2.444, ppl=5.44, wps=5912.8, ups=0.09, wpb=64837, bsz=128, num_updates=6203, lr=9.99584e-05, gnorm=2.285, loss_scale=8, train_wall=11, gb_free=2.8, wall=71487 2021-06-19 14:30:24 | INFO | train_inner | epoch 003: 239 / 3002 loss=2.656, ppl=6.3, wps=5758.3, ups=0.09, wpb=64823, bsz=128, num_updates=6204, lr=9.99584e-05, gnorm=2.232, loss_scale=8, train_wall=11, gb_free=2.8, wall=71498 2021-06-19 14:30:35 | INFO | train_inner | epoch 003: 240 / 3002 loss=2.653, ppl=6.29, wps=5802, ups=0.09, wpb=64791, bsz=128, num_updates=6205, lr=9.99584e-05, gnorm=2.223, loss_scale=8, train_wall=11, gb_free=2.8, wall=71509 2021-06-19 14:30:46 | INFO | train_inner | epoch 003: 241 / 3002 loss=2.771, ppl=6.83, wps=5849.3, ups=0.09, wpb=64808, bsz=128, num_updates=6206, lr=9.99583e-05, gnorm=2.209, loss_scale=8, train_wall=11, gb_free=2.8, wall=71520 2021-06-19 14:30:57 | INFO | train_inner | epoch 003: 242 / 3002 loss=2.97, ppl=7.84, wps=5814.6, ups=0.09, wpb=64725, bsz=128, num_updates=6207, lr=9.99583e-05, gnorm=2.35, loss_scale=8, train_wall=11, gb_free=2.8, wall=71531 2021-06-19 14:31:08 | INFO | train_inner | epoch 003: 243 / 3002 loss=2.803, ppl=6.98, wps=5894.6, ups=0.09, wpb=64768, bsz=128, num_updates=6208, lr=9.99583e-05, gnorm=2.263, loss_scale=8, train_wall=11, gb_free=2.8, wall=71542 2021-06-19 14:31:19 | INFO | train_inner | epoch 003: 244 / 3002 loss=2.676, ppl=6.39, wps=5737.7, ups=0.09, wpb=64770, bsz=128, num_updates=6209, lr=9.99583e-05, gnorm=2.109, loss_scale=8, train_wall=11, gb_free=2.8, wall=71554 2021-06-19 14:31:30 | INFO | train_inner | epoch 003: 245 / 3002 loss=2.7, ppl=6.5, wps=5984.2, ups=0.09, wpb=64861, bsz=128, num_updates=6210, lr=9.99583e-05, gnorm=2.2, loss_scale=8, train_wall=10, gb_free=2.8, wall=71564 2021-06-19 14:31:41 | INFO | train_inner | epoch 003: 246 / 3002 loss=2.888, ppl=7.4, wps=5798.6, ups=0.09, wpb=64854, bsz=128, num_updates=6211, lr=9.99583e-05, gnorm=2.292, loss_scale=8, train_wall=11, gb_free=2.8, wall=71576 2021-06-19 14:31:52 | INFO | train_inner | epoch 003: 247 / 3002 loss=2.676, ppl=6.39, wps=5862.3, ups=0.09, wpb=64793, bsz=128, num_updates=6212, lr=9.99583e-05, gnorm=4.334, loss_scale=8, train_wall=11, gb_free=2.8, wall=71587 2021-06-19 14:32:04 | INFO | train_inner | epoch 003: 248 / 3002 loss=2.609, ppl=6.1, wps=5793.6, ups=0.09, wpb=64872, bsz=128, num_updates=6213, lr=9.99583e-05, gnorm=2.109, loss_scale=8, train_wall=11, gb_free=2.8, wall=71598 2021-06-19 14:32:15 | INFO | train_inner | epoch 003: 249 / 3002 loss=2.576, ppl=5.96, wps=5829.8, ups=0.09, wpb=64872, bsz=128, num_updates=6214, lr=9.99583e-05, gnorm=2.175, loss_scale=8, train_wall=11, gb_free=2.8, wall=71609 2021-06-19 14:32:26 | INFO | train_inner | epoch 003: 250 / 3002 loss=2.791, ppl=6.92, wps=5886.7, ups=0.09, wpb=64923, bsz=128, num_updates=6215, lr=9.99583e-05, gnorm=2.349, loss_scale=8, train_wall=11, gb_free=2.8, wall=71620 2021-06-19 14:32:37 | INFO | train_inner | epoch 003: 251 / 3002 loss=2.679, ppl=6.4, wps=5850.7, ups=0.09, wpb=64829, bsz=128, num_updates=6216, lr=9.99583e-05, gnorm=2.42, loss_scale=8, train_wall=11, gb_free=2.8, wall=71631 2021-06-19 14:32:48 | INFO | train_inner | epoch 003: 252 / 3002 loss=2.573, ppl=5.95, wps=5854.4, ups=0.09, wpb=64865, bsz=128, num_updates=6217, lr=9.99583e-05, gnorm=2.354, loss_scale=8, train_wall=11, gb_free=2.8, wall=71642 2021-06-19 14:32:59 | INFO | train_inner | epoch 003: 253 / 3002 loss=2.634, ppl=6.21, wps=5763.9, ups=0.09, wpb=64816, bsz=128, num_updates=6218, lr=9.99583e-05, gnorm=2.109, loss_scale=8, train_wall=11, gb_free=2.8, wall=71653 2021-06-19 14:33:10 | INFO | train_inner | epoch 003: 254 / 3002 loss=2.765, ppl=6.8, wps=5840, ups=0.09, wpb=64809, bsz=128, num_updates=6219, lr=9.99582e-05, gnorm=3.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=71665 2021-06-19 14:33:21 | INFO | train_inner | epoch 003: 255 / 3002 loss=2.822, ppl=7.07, wps=5799.4, ups=0.09, wpb=64801, bsz=128, num_updates=6220, lr=9.99582e-05, gnorm=2.12, loss_scale=8, train_wall=11, gb_free=2.8, wall=71676 2021-06-19 14:33:32 | INFO | train_inner | epoch 003: 256 / 3002 loss=2.701, ppl=6.5, wps=5884.1, ups=0.09, wpb=64804, bsz=128, num_updates=6221, lr=9.99582e-05, gnorm=2.136, loss_scale=8, train_wall=11, gb_free=2.8, wall=71687 2021-06-19 14:33:43 | INFO | train_inner | epoch 003: 257 / 3002 loss=2.662, ppl=6.33, wps=5874.5, ups=0.09, wpb=64842, bsz=128, num_updates=6222, lr=9.99582e-05, gnorm=2.198, loss_scale=8, train_wall=11, gb_free=2.8, wall=71698 2021-06-19 14:33:55 | INFO | train_inner | epoch 003: 258 / 3002 loss=2.574, ppl=5.96, wps=5877.6, ups=0.09, wpb=64874, bsz=128, num_updates=6223, lr=9.99582e-05, gnorm=2.22, loss_scale=8, train_wall=11, gb_free=2.8, wall=71709 2021-06-19 14:34:05 | INFO | train_inner | epoch 003: 259 / 3002 loss=2.594, ppl=6.04, wps=5933, ups=0.09, wpb=64768, bsz=128, num_updates=6224, lr=9.99582e-05, gnorm=2.084, loss_scale=8, train_wall=10, gb_free=2.8, wall=71720 2021-06-19 14:34:17 | INFO | train_inner | epoch 003: 260 / 3002 loss=2.675, ppl=6.39, wps=5838, ups=0.09, wpb=64834, bsz=128, num_updates=6225, lr=9.99582e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=71731 2021-06-19 14:34:27 | INFO | train_inner | epoch 003: 261 / 3002 loss=2.633, ppl=6.2, wps=5934.5, ups=0.09, wpb=64907, bsz=128, num_updates=6226, lr=9.99582e-05, gnorm=2.183, loss_scale=8, train_wall=10, gb_free=2.8, wall=71742 2021-06-19 14:34:38 | INFO | train_inner | epoch 003: 262 / 3002 loss=2.641, ppl=6.24, wps=5894.6, ups=0.09, wpb=64836, bsz=128, num_updates=6227, lr=9.99582e-05, gnorm=2.169, loss_scale=8, train_wall=11, gb_free=2.8, wall=71753 2021-06-19 14:34:50 | INFO | train_inner | epoch 003: 263 / 3002 loss=2.561, ppl=5.9, wps=5822.7, ups=0.09, wpb=64863, bsz=128, num_updates=6228, lr=9.99582e-05, gnorm=2.186, loss_scale=8, train_wall=11, gb_free=2.8, wall=71764 2021-06-19 14:35:01 | INFO | train_inner | epoch 003: 264 / 3002 loss=2.613, ppl=6.12, wps=5738.8, ups=0.09, wpb=64798, bsz=128, num_updates=6229, lr=9.99582e-05, gnorm=2.165, loss_scale=8, train_wall=11, gb_free=2.8, wall=71775 2021-06-19 14:35:12 | INFO | train_inner | epoch 003: 265 / 3002 loss=2.851, ppl=7.21, wps=5768.3, ups=0.09, wpb=64851, bsz=128, num_updates=6230, lr=9.99582e-05, gnorm=2.313, loss_scale=8, train_wall=11, gb_free=2.8, wall=71786 2021-06-19 14:35:23 | INFO | train_inner | epoch 003: 266 / 3002 loss=2.631, ppl=6.19, wps=5828.1, ups=0.09, wpb=64851, bsz=128, num_updates=6231, lr=9.99581e-05, gnorm=2.278, loss_scale=8, train_wall=11, gb_free=2.8, wall=71798 2021-06-19 14:35:34 | INFO | train_inner | epoch 003: 267 / 3002 loss=2.734, ppl=6.65, wps=5818, ups=0.09, wpb=64830, bsz=128, num_updates=6232, lr=9.99581e-05, gnorm=2.254, loss_scale=8, train_wall=11, gb_free=2.8, wall=71809 2021-06-19 14:35:45 | INFO | train_inner | epoch 003: 268 / 3002 loss=2.574, ppl=5.95, wps=5884.5, ups=0.09, wpb=64889, bsz=128, num_updates=6233, lr=9.99581e-05, gnorm=2.15, loss_scale=8, train_wall=11, gb_free=2.8, wall=71820 2021-06-19 14:35:56 | INFO | train_inner | epoch 003: 269 / 3002 loss=2.553, ppl=5.87, wps=5950.7, ups=0.09, wpb=64908, bsz=128, num_updates=6234, lr=9.99581e-05, gnorm=2.243, loss_scale=8, train_wall=10, gb_free=2.8, wall=71831 2021-06-19 14:36:08 | INFO | train_inner | epoch 003: 270 / 3002 loss=2.606, ppl=6.09, wps=5807.6, ups=0.09, wpb=64841, bsz=128, num_updates=6235, lr=9.99581e-05, gnorm=2.362, loss_scale=8, train_wall=11, gb_free=2.8, wall=71842 2021-06-19 14:36:19 | INFO | train_inner | epoch 003: 271 / 3002 loss=2.906, ppl=7.49, wps=5740.9, ups=0.09, wpb=64805, bsz=128, num_updates=6236, lr=9.99581e-05, gnorm=2.222, loss_scale=8, train_wall=11, gb_free=2.8, wall=71853 2021-06-19 14:36:30 | INFO | train_inner | epoch 003: 272 / 3002 loss=2.769, ppl=6.81, wps=5736.4, ups=0.09, wpb=64782, bsz=128, num_updates=6237, lr=9.99581e-05, gnorm=2.104, loss_scale=8, train_wall=11, gb_free=2.8, wall=71864 2021-06-19 14:36:41 | INFO | train_inner | epoch 003: 273 / 3002 loss=2.731, ppl=6.64, wps=5752.1, ups=0.09, wpb=64847, bsz=128, num_updates=6238, lr=9.99581e-05, gnorm=2.184, loss_scale=16, train_wall=11, gb_free=2.8, wall=71876 2021-06-19 14:36:52 | INFO | train_inner | epoch 003: 274 / 3002 loss=2.741, ppl=6.68, wps=5851, ups=0.09, wpb=64853, bsz=128, num_updates=6239, lr=9.99581e-05, gnorm=2.189, loss_scale=16, train_wall=11, gb_free=2.8, wall=71887 2021-06-19 14:37:03 | INFO | train_inner | epoch 003: 275 / 3002 loss=2.556, ppl=5.88, wps=5975.7, ups=0.09, wpb=64799, bsz=128, num_updates=6240, lr=9.99581e-05, gnorm=2.167, loss_scale=16, train_wall=10, gb_free=2.8, wall=71898 2021-06-19 14:37:14 | INFO | train_inner | epoch 003: 276 / 3002 loss=2.705, ppl=6.52, wps=5780.5, ups=0.09, wpb=64723, bsz=128, num_updates=6241, lr=9.99581e-05, gnorm=2.172, loss_scale=16, train_wall=11, gb_free=2.8, wall=71909 2021-06-19 14:37:26 | INFO | train_inner | epoch 003: 277 / 3002 loss=2.525, ppl=5.76, wps=5685.1, ups=0.09, wpb=64867, bsz=128, num_updates=6242, lr=9.99581e-05, gnorm=2.112, loss_scale=16, train_wall=11, gb_free=2.8, wall=71920 2021-06-19 14:37:37 | INFO | train_inner | epoch 003: 278 / 3002 loss=2.689, ppl=6.45, wps=5973, ups=0.09, wpb=64885, bsz=128, num_updates=6243, lr=9.99581e-05, gnorm=2.208, loss_scale=16, train_wall=10, gb_free=2.8, wall=71931 2021-06-19 14:37:48 | INFO | train_inner | epoch 003: 279 / 3002 loss=2.871, ppl=7.32, wps=5756.2, ups=0.09, wpb=64820, bsz=128, num_updates=6244, lr=9.9958e-05, gnorm=2.626, loss_scale=16, train_wall=11, gb_free=2.8, wall=71942 2021-06-19 14:37:59 | INFO | train_inner | epoch 003: 280 / 3002 loss=2.563, ppl=5.91, wps=5921.4, ups=0.09, wpb=64883, bsz=128, num_updates=6245, lr=9.9958e-05, gnorm=2.428, loss_scale=16, train_wall=10, gb_free=2.8, wall=71953 2021-06-19 14:38:10 | INFO | train_inner | epoch 003: 281 / 3002 loss=2.641, ppl=6.24, wps=5784.4, ups=0.09, wpb=64869, bsz=128, num_updates=6246, lr=9.9958e-05, gnorm=2.761, loss_scale=16, train_wall=11, gb_free=2.8, wall=71965 2021-06-19 14:38:21 | INFO | train_inner | epoch 003: 282 / 3002 loss=2.813, ppl=7.03, wps=5933, ups=0.09, wpb=64779, bsz=128, num_updates=6247, lr=9.9958e-05, gnorm=2.241, loss_scale=16, train_wall=10, gb_free=2.8, wall=71975 2021-06-19 14:38:32 | INFO | train_inner | epoch 003: 283 / 3002 loss=2.717, ppl=6.58, wps=5888, ups=0.09, wpb=64884, bsz=128, num_updates=6248, lr=9.9958e-05, gnorm=2.202, loss_scale=16, train_wall=11, gb_free=2.8, wall=71986 2021-06-19 14:38:43 | INFO | train_inner | epoch 003: 284 / 3002 loss=2.751, ppl=6.73, wps=5810, ups=0.09, wpb=64834, bsz=128, num_updates=6249, lr=9.9958e-05, gnorm=2.246, loss_scale=16, train_wall=11, gb_free=2.8, wall=71998 2021-06-19 14:38:54 | INFO | train_inner | epoch 003: 285 / 3002 loss=2.471, ppl=5.54, wps=5919.2, ups=0.09, wpb=64900, bsz=128, num_updates=6250, lr=9.9958e-05, gnorm=2.11, loss_scale=16, train_wall=10, gb_free=2.8, wall=72009 2021-06-19 14:39:05 | INFO | train_inner | epoch 003: 286 / 3002 loss=2.723, ppl=6.6, wps=5876.9, ups=0.09, wpb=64733, bsz=128, num_updates=6251, lr=9.9958e-05, gnorm=2.142, loss_scale=16, train_wall=11, gb_free=2.8, wall=72020 2021-06-19 14:39:16 | INFO | train_inner | epoch 003: 287 / 3002 loss=2.764, ppl=6.79, wps=5847.7, ups=0.09, wpb=64859, bsz=128, num_updates=6252, lr=9.9958e-05, gnorm=2.327, loss_scale=16, train_wall=11, gb_free=2.8, wall=72031 2021-06-19 14:39:27 | INFO | train_inner | epoch 003: 288 / 3002 loss=2.625, ppl=6.17, wps=5832.1, ups=0.09, wpb=64753, bsz=128, num_updates=6253, lr=9.9958e-05, gnorm=2.354, loss_scale=16, train_wall=11, gb_free=2.8, wall=72042 2021-06-19 14:39:39 | INFO | train_inner | epoch 003: 289 / 3002 loss=2.786, ppl=6.9, wps=5852.7, ups=0.09, wpb=64862, bsz=128, num_updates=6254, lr=9.9958e-05, gnorm=2.251, loss_scale=16, train_wall=11, gb_free=2.8, wall=72053 2021-06-19 14:39:50 | INFO | train_inner | epoch 003: 290 / 3002 loss=2.787, ppl=6.9, wps=5828.1, ups=0.09, wpb=64767, bsz=128, num_updates=6255, lr=9.9958e-05, gnorm=2.242, loss_scale=16, train_wall=11, gb_free=2.8, wall=72064 2021-06-19 14:40:01 | INFO | train_inner | epoch 003: 291 / 3002 loss=2.67, ppl=6.37, wps=5916, ups=0.09, wpb=64914, bsz=128, num_updates=6256, lr=9.99579e-05, gnorm=2.139, loss_scale=16, train_wall=10, gb_free=2.8, wall=72075 2021-06-19 14:40:12 | INFO | train_inner | epoch 003: 292 / 3002 loss=2.687, ppl=6.44, wps=5776.2, ups=0.09, wpb=64754, bsz=128, num_updates=6257, lr=9.99579e-05, gnorm=2.102, loss_scale=16, train_wall=11, gb_free=2.8, wall=72086 2021-06-19 14:40:23 | INFO | train_inner | epoch 003: 293 / 3002 loss=2.575, ppl=5.96, wps=5805.9, ups=0.09, wpb=64812, bsz=128, num_updates=6258, lr=9.99579e-05, gnorm=2.246, loss_scale=16, train_wall=11, gb_free=2.8, wall=72097 2021-06-19 14:40:34 | INFO | train_inner | epoch 003: 294 / 3002 loss=2.687, ppl=6.44, wps=5813.3, ups=0.09, wpb=64860, bsz=128, num_updates=6259, lr=9.99579e-05, gnorm=2.269, loss_scale=16, train_wall=11, gb_free=2.8, wall=72109 2021-06-19 14:40:45 | INFO | train_inner | epoch 003: 295 / 3002 loss=2.693, ppl=6.46, wps=5760.7, ups=0.09, wpb=64840, bsz=128, num_updates=6260, lr=9.99579e-05, gnorm=2.81, loss_scale=16, train_wall=11, gb_free=2.8, wall=72120 2021-06-19 14:40:56 | INFO | train_inner | epoch 003: 296 / 3002 loss=2.657, ppl=6.31, wps=5946.4, ups=0.09, wpb=64845, bsz=128, num_updates=6261, lr=9.99579e-05, gnorm=2.234, loss_scale=16, train_wall=10, gb_free=2.8, wall=72131 2021-06-19 14:41:07 | INFO | train_inner | epoch 003: 297 / 3002 loss=2.701, ppl=6.5, wps=5831.5, ups=0.09, wpb=64897, bsz=128, num_updates=6262, lr=9.99579e-05, gnorm=2.364, loss_scale=16, train_wall=11, gb_free=2.8, wall=72142 2021-06-19 14:41:19 | INFO | train_inner | epoch 003: 298 / 3002 loss=2.608, ppl=6.1, wps=5821.3, ups=0.09, wpb=64789, bsz=128, num_updates=6263, lr=9.99579e-05, gnorm=2.128, loss_scale=16, train_wall=11, gb_free=2.8, wall=72153 2021-06-19 14:41:30 | INFO | train_inner | epoch 003: 299 / 3002 loss=2.577, ppl=5.97, wps=5808.5, ups=0.09, wpb=64867, bsz=128, num_updates=6264, lr=9.99579e-05, gnorm=3.39, loss_scale=16, train_wall=11, gb_free=2.8, wall=72164 2021-06-19 14:41:41 | INFO | train_inner | epoch 003: 300 / 3002 loss=2.779, ppl=6.86, wps=5884.7, ups=0.09, wpb=64845, bsz=128, num_updates=6265, lr=9.99579e-05, gnorm=3.805, loss_scale=16, train_wall=11, gb_free=2.8, wall=72175 2021-06-19 14:41:52 | INFO | train_inner | epoch 003: 301 / 3002 loss=2.715, ppl=6.56, wps=5737.8, ups=0.09, wpb=64846, bsz=128, num_updates=6266, lr=9.99579e-05, gnorm=2.178, loss_scale=16, train_wall=11, gb_free=2.8, wall=72186 2021-06-19 14:42:03 | INFO | train_inner | epoch 003: 302 / 3002 loss=2.641, ppl=6.24, wps=5951, ups=0.09, wpb=64840, bsz=128, num_updates=6267, lr=9.99579e-05, gnorm=2.124, loss_scale=16, train_wall=10, gb_free=2.8, wall=72197 2021-06-19 14:42:14 | INFO | train_inner | epoch 003: 303 / 3002 loss=2.81, ppl=7.01, wps=5779, ups=0.09, wpb=64847, bsz=128, num_updates=6268, lr=9.99579e-05, gnorm=2.281, loss_scale=16, train_wall=11, gb_free=2.8, wall=72209 2021-06-19 14:42:25 | INFO | train_inner | epoch 003: 304 / 3002 loss=2.63, ppl=6.19, wps=5806.4, ups=0.09, wpb=64771, bsz=128, num_updates=6269, lr=9.99578e-05, gnorm=2.101, loss_scale=16, train_wall=11, gb_free=2.8, wall=72220 2021-06-19 14:42:37 | INFO | train_inner | epoch 003: 305 / 3002 loss=2.574, ppl=5.96, wps=5800.7, ups=0.09, wpb=64786, bsz=128, num_updates=6270, lr=9.99578e-05, gnorm=2.19, loss_scale=16, train_wall=11, gb_free=2.8, wall=72231 2021-06-19 14:42:47 | INFO | train_inner | epoch 003: 306 / 3002 loss=2.641, ppl=6.24, wps=5927.1, ups=0.09, wpb=64762, bsz=128, num_updates=6271, lr=9.99578e-05, gnorm=2.225, loss_scale=16, train_wall=10, gb_free=2.8, wall=72242 2021-06-19 14:42:58 | INFO | train_inner | epoch 003: 307 / 3002 loss=2.71, ppl=6.54, wps=5949.7, ups=0.09, wpb=64901, bsz=128, num_updates=6272, lr=9.99578e-05, gnorm=2.234, loss_scale=16, train_wall=10, gb_free=2.8, wall=72253 2021-06-19 14:43:09 | INFO | train_inner | epoch 003: 308 / 3002 loss=2.679, ppl=6.4, wps=5935.6, ups=0.09, wpb=64834, bsz=128, num_updates=6273, lr=9.99578e-05, gnorm=2.869, loss_scale=16, train_wall=10, gb_free=2.8, wall=72264 2021-06-19 14:43:20 | INFO | train_inner | epoch 003: 309 / 3002 loss=2.832, ppl=7.12, wps=5900.6, ups=0.09, wpb=64793, bsz=128, num_updates=6274, lr=9.99578e-05, gnorm=2.224, loss_scale=16, train_wall=11, gb_free=2.8, wall=72275 2021-06-19 14:43:31 | INFO | train_inner | epoch 003: 310 / 3002 loss=2.793, ppl=6.93, wps=5828.9, ups=0.09, wpb=64797, bsz=128, num_updates=6275, lr=9.99578e-05, gnorm=2.179, loss_scale=16, train_wall=11, gb_free=2.8, wall=72286 2021-06-19 14:43:42 | INFO | train_inner | epoch 003: 311 / 3002 loss=2.75, ppl=6.73, wps=5852, ups=0.09, wpb=64774, bsz=128, num_updates=6276, lr=9.99578e-05, gnorm=2.158, loss_scale=16, train_wall=11, gb_free=2.8, wall=72297 2021-06-19 14:43:54 | INFO | train_inner | epoch 003: 312 / 3002 loss=2.599, ppl=6.06, wps=5736.8, ups=0.09, wpb=64855, bsz=128, num_updates=6277, lr=9.99578e-05, gnorm=2.237, loss_scale=16, train_wall=11, gb_free=2.8, wall=72308 2021-06-19 14:44:05 | INFO | train_inner | epoch 003: 313 / 3002 loss=2.684, ppl=6.43, wps=5853.8, ups=0.09, wpb=64803, bsz=128, num_updates=6278, lr=9.99578e-05, gnorm=2.181, loss_scale=16, train_wall=11, gb_free=2.8, wall=72319 2021-06-19 14:44:16 | INFO | train_inner | epoch 003: 314 / 3002 loss=2.788, ppl=6.9, wps=5924, ups=0.09, wpb=64785, bsz=128, num_updates=6279, lr=9.99578e-05, gnorm=2.337, loss_scale=16, train_wall=10, gb_free=2.8, wall=72330 2021-06-19 14:44:27 | INFO | train_inner | epoch 003: 315 / 3002 loss=2.794, ppl=6.94, wps=5758.1, ups=0.09, wpb=64828, bsz=128, num_updates=6280, lr=9.99578e-05, gnorm=2.16, loss_scale=16, train_wall=11, gb_free=2.8, wall=72341 2021-06-19 14:44:38 | INFO | train_inner | epoch 003: 316 / 3002 loss=2.76, ppl=6.77, wps=5887.5, ups=0.09, wpb=64904, bsz=128, num_updates=6281, lr=9.99577e-05, gnorm=2.111, loss_scale=16, train_wall=11, gb_free=2.8, wall=72352 2021-06-19 14:44:49 | INFO | train_inner | epoch 003: 317 / 3002 loss=2.751, ppl=6.73, wps=5800.8, ups=0.09, wpb=64809, bsz=128, num_updates=6282, lr=9.99577e-05, gnorm=2.189, loss_scale=16, train_wall=11, gb_free=2.8, wall=72364 2021-06-19 14:45:00 | INFO | train_inner | epoch 003: 318 / 3002 loss=2.565, ppl=5.92, wps=5802.3, ups=0.09, wpb=64821, bsz=128, num_updates=6283, lr=9.99577e-05, gnorm=2.104, loss_scale=16, train_wall=11, gb_free=2.8, wall=72375 2021-06-19 14:45:11 | INFO | train_inner | epoch 003: 319 / 3002 loss=2.705, ppl=6.52, wps=5891.9, ups=0.09, wpb=64818, bsz=128, num_updates=6284, lr=9.99577e-05, gnorm=2.194, loss_scale=16, train_wall=11, gb_free=2.8, wall=72386 2021-06-19 14:45:23 | INFO | train_inner | epoch 003: 320 / 3002 loss=2.552, ppl=5.86, wps=5705.5, ups=0.09, wpb=64830, bsz=128, num_updates=6285, lr=9.99577e-05, gnorm=2.23, loss_scale=16, train_wall=11, gb_free=2.8, wall=72397 2021-06-19 14:45:34 | INFO | train_inner | epoch 003: 321 / 3002 loss=2.615, ppl=6.12, wps=5843.6, ups=0.09, wpb=64804, bsz=128, num_updates=6286, lr=9.99577e-05, gnorm=2.136, loss_scale=16, train_wall=11, gb_free=2.8, wall=72408 2021-06-19 14:45:45 | INFO | train_inner | epoch 003: 322 / 3002 loss=2.543, ppl=5.83, wps=5984, ups=0.09, wpb=64848, bsz=128, num_updates=6287, lr=9.99577e-05, gnorm=2.239, loss_scale=16, train_wall=10, gb_free=2.8, wall=72419 2021-06-19 14:45:56 | INFO | train_inner | epoch 003: 323 / 3002 loss=2.587, ppl=6.01, wps=5971.5, ups=0.09, wpb=64856, bsz=128, num_updates=6288, lr=9.99577e-05, gnorm=2.13, loss_scale=16, train_wall=10, gb_free=2.8, wall=72430 2021-06-19 14:46:07 | INFO | train_inner | epoch 003: 324 / 3002 loss=2.539, ppl=5.81, wps=5844.9, ups=0.09, wpb=64802, bsz=128, num_updates=6289, lr=9.99577e-05, gnorm=2.245, loss_scale=16, train_wall=11, gb_free=2.8, wall=72441 2021-06-19 14:46:17 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-19 14:46:28 | INFO | train_inner | epoch 003: 326 / 3002 loss=2.671, ppl=6.37, wps=2971.6, ups=0.05, wpb=64812, bsz=128, num_updates=6290, lr=9.99577e-05, gnorm=2.13, loss_scale=8, train_wall=21, gb_free=2.8, wall=72463 2021-06-19 14:46:40 | INFO | train_inner | epoch 003: 327 / 3002 loss=2.713, ppl=6.56, wps=5762, ups=0.09, wpb=64795, bsz=128, num_updates=6291, lr=9.99577e-05, gnorm=2.104, loss_scale=8, train_wall=11, gb_free=2.8, wall=72474 2021-06-19 14:46:51 | INFO | train_inner | epoch 003: 328 / 3002 loss=2.709, ppl=6.54, wps=5882.7, ups=0.09, wpb=64894, bsz=128, num_updates=6292, lr=9.99577e-05, gnorm=2.152, loss_scale=8, train_wall=11, gb_free=2.8, wall=72485 2021-06-19 14:47:02 | INFO | train_inner | epoch 003: 329 / 3002 loss=2.769, ppl=6.82, wps=5880, ups=0.09, wpb=64837, bsz=128, num_updates=6293, lr=9.99577e-05, gnorm=2.332, loss_scale=8, train_wall=11, gb_free=2.8, wall=72496 2021-06-19 14:47:13 | INFO | train_inner | epoch 003: 330 / 3002 loss=2.536, ppl=5.8, wps=5939.6, ups=0.09, wpb=64827, bsz=128, num_updates=6294, lr=9.99576e-05, gnorm=2.08, loss_scale=8, train_wall=10, gb_free=2.8, wall=72507 2021-06-19 14:47:24 | INFO | train_inner | epoch 003: 331 / 3002 loss=2.618, ppl=6.14, wps=5810.1, ups=0.09, wpb=64863, bsz=128, num_updates=6295, lr=9.99576e-05, gnorm=2.125, loss_scale=8, train_wall=11, gb_free=2.8, wall=72518 2021-06-19 14:47:35 | INFO | train_inner | epoch 003: 332 / 3002 loss=2.647, ppl=6.26, wps=5752.2, ups=0.09, wpb=64697, bsz=128, num_updates=6296, lr=9.99576e-05, gnorm=2.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=72529 2021-06-19 14:47:46 | INFO | train_inner | epoch 003: 333 / 3002 loss=2.777, ppl=6.86, wps=5775.1, ups=0.09, wpb=64840, bsz=128, num_updates=6297, lr=9.99576e-05, gnorm=2.228, loss_scale=8, train_wall=11, gb_free=2.8, wall=72541 2021-06-19 14:47:58 | INFO | train_inner | epoch 003: 334 / 3002 loss=2.855, ppl=7.24, wps=5740.1, ups=0.09, wpb=64840, bsz=128, num_updates=6298, lr=9.99576e-05, gnorm=2.3, loss_scale=8, train_wall=11, gb_free=2.8, wall=72552 2021-06-19 14:48:09 | INFO | train_inner | epoch 003: 335 / 3002 loss=2.665, ppl=6.34, wps=5764, ups=0.09, wpb=64750, bsz=128, num_updates=6299, lr=9.99576e-05, gnorm=2.169, loss_scale=8, train_wall=11, gb_free=2.8, wall=72563 2021-06-19 14:48:20 | INFO | train_inner | epoch 003: 336 / 3002 loss=2.56, ppl=5.9, wps=5742.2, ups=0.09, wpb=64809, bsz=128, num_updates=6300, lr=9.99576e-05, gnorm=2.147, loss_scale=8, train_wall=11, gb_free=2.8, wall=72574 2021-06-19 14:48:31 | INFO | train_inner | epoch 003: 337 / 3002 loss=2.726, ppl=6.62, wps=5812.6, ups=0.09, wpb=64836, bsz=128, num_updates=6301, lr=9.99576e-05, gnorm=2.239, loss_scale=8, train_wall=11, gb_free=2.8, wall=72586 2021-06-19 14:48:42 | INFO | train_inner | epoch 003: 338 / 3002 loss=2.566, ppl=5.92, wps=5804.9, ups=0.09, wpb=64849, bsz=128, num_updates=6302, lr=9.99576e-05, gnorm=2.19, loss_scale=8, train_wall=11, gb_free=2.8, wall=72597 2021-06-19 14:48:54 | INFO | train_inner | epoch 003: 339 / 3002 loss=2.678, ppl=6.4, wps=5703.8, ups=0.09, wpb=64842, bsz=128, num_updates=6303, lr=9.99576e-05, gnorm=2.24, loss_scale=8, train_wall=11, gb_free=2.8, wall=72608 2021-06-19 14:49:05 | INFO | train_inner | epoch 003: 340 / 3002 loss=2.564, ppl=5.91, wps=5896.6, ups=0.09, wpb=64914, bsz=128, num_updates=6304, lr=9.99576e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=72619 2021-06-19 14:49:16 | INFO | train_inner | epoch 003: 341 / 3002 loss=2.683, ppl=6.42, wps=5703.3, ups=0.09, wpb=64861, bsz=128, num_updates=6305, lr=9.99576e-05, gnorm=2.135, loss_scale=8, train_wall=11, gb_free=2.8, wall=72631 2021-06-19 14:49:27 | INFO | train_inner | epoch 003: 342 / 3002 loss=2.719, ppl=6.58, wps=5822.2, ups=0.09, wpb=64847, bsz=128, num_updates=6306, lr=9.99575e-05, gnorm=2.282, loss_scale=8, train_wall=11, gb_free=2.8, wall=72642 2021-06-19 14:49:39 | INFO | train_inner | epoch 003: 343 / 3002 loss=2.632, ppl=6.2, wps=5790.6, ups=0.09, wpb=64877, bsz=128, num_updates=6307, lr=9.99575e-05, gnorm=2.293, loss_scale=8, train_wall=11, gb_free=2.8, wall=72653 2021-06-19 14:49:49 | INFO | train_inner | epoch 003: 344 / 3002 loss=2.678, ppl=6.4, wps=5924.2, ups=0.09, wpb=64839, bsz=128, num_updates=6308, lr=9.99575e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=72664 2021-06-19 14:50:00 | INFO | train_inner | epoch 003: 345 / 3002 loss=2.643, ppl=6.25, wps=5899.2, ups=0.09, wpb=64890, bsz=128, num_updates=6309, lr=9.99575e-05, gnorm=2.243, loss_scale=8, train_wall=11, gb_free=2.8, wall=72675 2021-06-19 14:50:12 | INFO | train_inner | epoch 003: 346 / 3002 loss=2.569, ppl=5.93, wps=5816.9, ups=0.09, wpb=64938, bsz=128, num_updates=6310, lr=9.99575e-05, gnorm=2.277, loss_scale=8, train_wall=11, gb_free=2.8, wall=72686 2021-06-19 14:50:23 | INFO | train_inner | epoch 003: 347 / 3002 loss=2.689, ppl=6.45, wps=5772.9, ups=0.09, wpb=64853, bsz=128, num_updates=6311, lr=9.99575e-05, gnorm=2.159, loss_scale=8, train_wall=11, gb_free=2.8, wall=72697 2021-06-19 14:50:34 | INFO | train_inner | epoch 003: 348 / 3002 loss=2.57, ppl=5.94, wps=5744.2, ups=0.09, wpb=64836, bsz=128, num_updates=6312, lr=9.99575e-05, gnorm=2.152, loss_scale=8, train_wall=11, gb_free=2.8, wall=72709 2021-06-19 14:50:45 | INFO | train_inner | epoch 003: 349 / 3002 loss=2.544, ppl=5.83, wps=5831.3, ups=0.09, wpb=64846, bsz=128, num_updates=6313, lr=9.99575e-05, gnorm=2.117, loss_scale=8, train_wall=11, gb_free=2.8, wall=72720 2021-06-19 14:50:57 | INFO | train_inner | epoch 003: 350 / 3002 loss=2.481, ppl=5.58, wps=5754.4, ups=0.09, wpb=64796, bsz=128, num_updates=6314, lr=9.99575e-05, gnorm=2.137, loss_scale=8, train_wall=11, gb_free=2.8, wall=72731 2021-06-19 14:51:08 | INFO | train_inner | epoch 003: 351 / 3002 loss=2.635, ppl=6.21, wps=5875.7, ups=0.09, wpb=64736, bsz=128, num_updates=6315, lr=9.99575e-05, gnorm=2.233, loss_scale=8, train_wall=11, gb_free=2.8, wall=72742 2021-06-19 14:51:19 | INFO | train_inner | epoch 003: 352 / 3002 loss=2.806, ppl=6.99, wps=5749, ups=0.09, wpb=64766, bsz=128, num_updates=6316, lr=9.99575e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=72753 2021-06-19 14:51:30 | INFO | train_inner | epoch 003: 353 / 3002 loss=2.506, ppl=5.68, wps=6013, ups=0.09, wpb=64891, bsz=128, num_updates=6317, lr=9.99575e-05, gnorm=2.238, loss_scale=8, train_wall=10, gb_free=2.8, wall=72764 2021-06-19 14:51:41 | INFO | train_inner | epoch 003: 354 / 3002 loss=2.673, ppl=6.38, wps=5769, ups=0.09, wpb=64862, bsz=128, num_updates=6318, lr=9.99575e-05, gnorm=2.064, loss_scale=8, train_wall=11, gb_free=2.8, wall=72775 2021-06-19 14:51:52 | INFO | train_inner | epoch 003: 355 / 3002 loss=2.59, ppl=6.02, wps=5876.9, ups=0.09, wpb=64752, bsz=128, num_updates=6319, lr=9.99574e-05, gnorm=2.397, loss_scale=8, train_wall=11, gb_free=2.8, wall=72786 2021-06-19 14:52:03 | INFO | train_inner | epoch 003: 356 / 3002 loss=2.716, ppl=6.57, wps=5832.8, ups=0.09, wpb=64821, bsz=128, num_updates=6320, lr=9.99574e-05, gnorm=2.216, loss_scale=8, train_wall=11, gb_free=2.8, wall=72797 2021-06-19 14:52:14 | INFO | train_inner | epoch 003: 357 / 3002 loss=2.644, ppl=6.25, wps=5890, ups=0.09, wpb=64789, bsz=128, num_updates=6321, lr=9.99574e-05, gnorm=2.308, loss_scale=8, train_wall=11, gb_free=2.8, wall=72808 2021-06-19 14:52:25 | INFO | train_inner | epoch 003: 358 / 3002 loss=2.714, ppl=6.56, wps=5905.5, ups=0.09, wpb=64822, bsz=128, num_updates=6322, lr=9.99574e-05, gnorm=2.227, loss_scale=8, train_wall=11, gb_free=2.8, wall=72819 2021-06-19 14:52:36 | INFO | train_inner | epoch 003: 359 / 3002 loss=2.554, ppl=5.87, wps=5736, ups=0.09, wpb=64849, bsz=128, num_updates=6323, lr=9.99574e-05, gnorm=2.311, loss_scale=8, train_wall=11, gb_free=2.8, wall=72831 2021-06-19 14:52:47 | INFO | train_inner | epoch 003: 360 / 3002 loss=2.701, ppl=6.5, wps=5858.2, ups=0.09, wpb=64820, bsz=128, num_updates=6324, lr=9.99574e-05, gnorm=3.136, loss_scale=8, train_wall=11, gb_free=2.8, wall=72842 2021-06-19 14:52:58 | INFO | train_inner | epoch 003: 361 / 3002 loss=2.678, ppl=6.4, wps=5817.4, ups=0.09, wpb=64837, bsz=128, num_updates=6325, lr=9.99574e-05, gnorm=2.151, loss_scale=8, train_wall=11, gb_free=2.8, wall=72853 2021-06-19 14:53:10 | INFO | train_inner | epoch 003: 362 / 3002 loss=2.465, ppl=5.52, wps=5808.4, ups=0.09, wpb=64860, bsz=128, num_updates=6326, lr=9.99574e-05, gnorm=2.29, loss_scale=8, train_wall=11, gb_free=2.8, wall=72864 2021-06-19 14:53:21 | INFO | train_inner | epoch 003: 363 / 3002 loss=2.71, ppl=6.54, wps=5852, ups=0.09, wpb=64811, bsz=128, num_updates=6327, lr=9.99574e-05, gnorm=2.224, loss_scale=8, train_wall=11, gb_free=2.8, wall=72875 2021-06-19 14:53:32 | INFO | train_inner | epoch 003: 364 / 3002 loss=2.564, ppl=5.91, wps=5778.5, ups=0.09, wpb=64871, bsz=128, num_updates=6328, lr=9.99574e-05, gnorm=2.23, loss_scale=8, train_wall=11, gb_free=2.8, wall=72886 2021-06-19 14:53:43 | INFO | train_inner | epoch 003: 365 / 3002 loss=2.655, ppl=6.3, wps=5925.3, ups=0.09, wpb=64875, bsz=128, num_updates=6329, lr=9.99574e-05, gnorm=2.214, loss_scale=8, train_wall=10, gb_free=2.8, wall=72897 2021-06-19 14:53:54 | INFO | train_inner | epoch 003: 366 / 3002 loss=2.621, ppl=6.15, wps=5847, ups=0.09, wpb=64852, bsz=128, num_updates=6330, lr=9.99574e-05, gnorm=2.171, loss_scale=8, train_wall=11, gb_free=2.8, wall=72908 2021-06-19 14:54:05 | INFO | train_inner | epoch 003: 367 / 3002 loss=2.61, ppl=6.11, wps=5897.7, ups=0.09, wpb=64814, bsz=128, num_updates=6331, lr=9.99573e-05, gnorm=2.081, loss_scale=8, train_wall=11, gb_free=2.8, wall=72919 2021-06-19 14:54:16 | INFO | train_inner | epoch 003: 368 / 3002 loss=2.729, ppl=6.63, wps=5803.2, ups=0.09, wpb=64861, bsz=128, num_updates=6332, lr=9.99573e-05, gnorm=2.232, loss_scale=8, train_wall=11, gb_free=2.8, wall=72931 2021-06-19 14:54:27 | INFO | train_inner | epoch 003: 369 / 3002 loss=2.484, ppl=5.59, wps=5880.9, ups=0.09, wpb=64826, bsz=128, num_updates=6333, lr=9.99573e-05, gnorm=2.095, loss_scale=8, train_wall=11, gb_free=2.8, wall=72942 2021-06-19 14:54:38 | INFO | train_inner | epoch 003: 370 / 3002 loss=2.68, ppl=6.41, wps=5885.8, ups=0.09, wpb=64869, bsz=128, num_updates=6334, lr=9.99573e-05, gnorm=3.481, loss_scale=8, train_wall=11, gb_free=2.8, wall=72953 2021-06-19 14:54:49 | INFO | train_inner | epoch 003: 371 / 3002 loss=2.574, ppl=5.96, wps=5867.4, ups=0.09, wpb=64845, bsz=128, num_updates=6335, lr=9.99573e-05, gnorm=2.394, loss_scale=8, train_wall=11, gb_free=2.8, wall=72964 2021-06-19 14:55:00 | INFO | train_inner | epoch 003: 372 / 3002 loss=2.645, ppl=6.25, wps=5951.2, ups=0.09, wpb=64788, bsz=128, num_updates=6336, lr=9.99573e-05, gnorm=2.235, loss_scale=8, train_wall=10, gb_free=2.8, wall=72974 2021-06-19 14:55:11 | INFO | train_inner | epoch 003: 373 / 3002 loss=2.648, ppl=6.27, wps=5891.4, ups=0.09, wpb=64822, bsz=128, num_updates=6337, lr=9.99573e-05, gnorm=2.178, loss_scale=8, train_wall=11, gb_free=2.8, wall=72985 2021-06-19 14:55:22 | INFO | train_inner | epoch 003: 374 / 3002 loss=2.517, ppl=5.72, wps=5872.1, ups=0.09, wpb=64806, bsz=128, num_updates=6338, lr=9.99573e-05, gnorm=2.275, loss_scale=8, train_wall=11, gb_free=2.8, wall=72997 2021-06-19 14:55:33 | INFO | train_inner | epoch 003: 375 / 3002 loss=2.716, ppl=6.57, wps=5848.1, ups=0.09, wpb=64795, bsz=128, num_updates=6339, lr=9.99573e-05, gnorm=2.229, loss_scale=8, train_wall=11, gb_free=2.8, wall=73008 2021-06-19 14:55:44 | INFO | train_inner | epoch 003: 376 / 3002 loss=2.715, ppl=6.57, wps=5882.3, ups=0.09, wpb=64789, bsz=128, num_updates=6340, lr=9.99573e-05, gnorm=2.169, loss_scale=8, train_wall=11, gb_free=2.8, wall=73019 2021-06-19 14:55:55 | INFO | train_inner | epoch 003: 377 / 3002 loss=2.68, ppl=6.41, wps=5879.4, ups=0.09, wpb=64856, bsz=128, num_updates=6341, lr=9.99573e-05, gnorm=2.24, loss_scale=8, train_wall=11, gb_free=2.8, wall=73030 2021-06-19 14:56:06 | INFO | train_inner | epoch 003: 378 / 3002 loss=2.581, ppl=5.98, wps=5851.6, ups=0.09, wpb=64757, bsz=128, num_updates=6342, lr=9.99573e-05, gnorm=2.14, loss_scale=8, train_wall=11, gb_free=2.8, wall=73041 2021-06-19 14:56:17 | INFO | train_inner | epoch 003: 379 / 3002 loss=2.763, ppl=6.79, wps=5830, ups=0.09, wpb=64733, bsz=128, num_updates=6343, lr=9.99573e-05, gnorm=2.065, loss_scale=8, train_wall=11, gb_free=2.8, wall=73052 2021-06-19 14:56:29 | INFO | train_inner | epoch 003: 380 / 3002 loss=2.667, ppl=6.35, wps=5828.5, ups=0.09, wpb=64837, bsz=128, num_updates=6344, lr=9.99572e-05, gnorm=8.297, loss_scale=8, train_wall=11, gb_free=2.8, wall=73063 2021-06-19 14:56:40 | INFO | train_inner | epoch 003: 381 / 3002 loss=2.699, ppl=6.49, wps=5813, ups=0.09, wpb=64815, bsz=128, num_updates=6345, lr=9.99572e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=73074 2021-06-19 14:56:51 | INFO | train_inner | epoch 003: 382 / 3002 loss=2.53, ppl=5.78, wps=5823.5, ups=0.09, wpb=64861, bsz=128, num_updates=6346, lr=9.99572e-05, gnorm=2.19, loss_scale=8, train_wall=11, gb_free=2.8, wall=73085 2021-06-19 14:57:02 | INFO | train_inner | epoch 003: 383 / 3002 loss=2.692, ppl=6.46, wps=5714.4, ups=0.09, wpb=64831, bsz=128, num_updates=6347, lr=9.99572e-05, gnorm=8.155, loss_scale=8, train_wall=11, gb_free=2.8, wall=73097 2021-06-19 14:57:13 | INFO | train_inner | epoch 003: 384 / 3002 loss=2.697, ppl=6.48, wps=5853.6, ups=0.09, wpb=64836, bsz=128, num_updates=6348, lr=9.99572e-05, gnorm=2.134, loss_scale=8, train_wall=11, gb_free=2.8, wall=73108 2021-06-19 14:57:24 | INFO | train_inner | epoch 003: 385 / 3002 loss=2.819, ppl=7.06, wps=5862.7, ups=0.09, wpb=64844, bsz=128, num_updates=6349, lr=9.99572e-05, gnorm=2.801, loss_scale=8, train_wall=11, gb_free=2.8, wall=73119 2021-06-19 14:57:35 | INFO | train_inner | epoch 003: 386 / 3002 loss=2.697, ppl=6.48, wps=5863, ups=0.09, wpb=64762, bsz=128, num_updates=6350, lr=9.99572e-05, gnorm=2.274, loss_scale=8, train_wall=11, gb_free=2.8, wall=73130 2021-06-19 14:57:46 | INFO | train_inner | epoch 003: 387 / 3002 loss=2.764, ppl=6.79, wps=5888.3, ups=0.09, wpb=64823, bsz=128, num_updates=6351, lr=9.99572e-05, gnorm=2.332, loss_scale=8, train_wall=11, gb_free=2.8, wall=73141 2021-06-19 14:57:58 | INFO | train_inner | epoch 003: 388 / 3002 loss=2.712, ppl=6.55, wps=5839.6, ups=0.09, wpb=64880, bsz=128, num_updates=6352, lr=9.99572e-05, gnorm=2.285, loss_scale=8, train_wall=11, gb_free=2.8, wall=73152 2021-06-19 14:58:09 | INFO | train_inner | epoch 003: 389 / 3002 loss=2.853, ppl=7.22, wps=5827.7, ups=0.09, wpb=64757, bsz=128, num_updates=6353, lr=9.99572e-05, gnorm=2.372, loss_scale=8, train_wall=11, gb_free=2.8, wall=73163 2021-06-19 14:58:20 | INFO | train_inner | epoch 003: 390 / 3002 loss=2.635, ppl=6.21, wps=5783.6, ups=0.09, wpb=64863, bsz=128, num_updates=6354, lr=9.99572e-05, gnorm=2.19, loss_scale=8, train_wall=11, gb_free=2.8, wall=73174 2021-06-19 14:58:31 | INFO | train_inner | epoch 003: 391 / 3002 loss=2.637, ppl=6.22, wps=5787.3, ups=0.09, wpb=64792, bsz=128, num_updates=6355, lr=9.99572e-05, gnorm=2.291, loss_scale=8, train_wall=11, gb_free=2.8, wall=73185 2021-06-19 14:58:43 | INFO | train_inner | epoch 003: 392 / 3002 loss=2.601, ppl=6.07, wps=5668.8, ups=0.09, wpb=64865, bsz=128, num_updates=6356, lr=9.99571e-05, gnorm=2.207, loss_scale=8, train_wall=11, gb_free=2.8, wall=73197 2021-06-19 14:58:54 | INFO | train_inner | epoch 003: 393 / 3002 loss=2.702, ppl=6.51, wps=5720.5, ups=0.09, wpb=64777, bsz=128, num_updates=6357, lr=9.99571e-05, gnorm=2.214, loss_scale=8, train_wall=11, gb_free=2.8, wall=73208 2021-06-19 14:59:05 | INFO | train_inner | epoch 003: 394 / 3002 loss=2.69, ppl=6.45, wps=5768.8, ups=0.09, wpb=64827, bsz=128, num_updates=6358, lr=9.99571e-05, gnorm=2.159, loss_scale=8, train_wall=11, gb_free=2.8, wall=73219 2021-06-19 14:59:16 | INFO | train_inner | epoch 003: 395 / 3002 loss=2.553, ppl=5.87, wps=5854.9, ups=0.09, wpb=64816, bsz=128, num_updates=6359, lr=9.99571e-05, gnorm=2.111, loss_scale=8, train_wall=11, gb_free=2.8, wall=73230 2021-06-19 14:59:27 | INFO | train_inner | epoch 003: 396 / 3002 loss=2.616, ppl=6.13, wps=5879, ups=0.09, wpb=64864, bsz=128, num_updates=6360, lr=9.99571e-05, gnorm=2.22, loss_scale=8, train_wall=11, gb_free=2.8, wall=73242 2021-06-19 14:59:38 | INFO | train_inner | epoch 003: 397 / 3002 loss=2.668, ppl=6.36, wps=5932.5, ups=0.09, wpb=64927, bsz=128, num_updates=6361, lr=9.99571e-05, gnorm=2.324, loss_scale=8, train_wall=10, gb_free=2.8, wall=73252 2021-06-19 14:59:49 | INFO | train_inner | epoch 003: 398 / 3002 loss=2.652, ppl=6.28, wps=5767.3, ups=0.09, wpb=64725, bsz=128, num_updates=6362, lr=9.99571e-05, gnorm=2.817, loss_scale=8, train_wall=11, gb_free=2.8, wall=73264 2021-06-19 15:00:01 | INFO | train_inner | epoch 003: 399 / 3002 loss=2.74, ppl=6.68, wps=5769.7, ups=0.09, wpb=64839, bsz=128, num_updates=6363, lr=9.99571e-05, gnorm=2.249, loss_scale=8, train_wall=11, gb_free=2.8, wall=73275 2021-06-19 15:00:12 | INFO | train_inner | epoch 003: 400 / 3002 loss=2.566, ppl=5.92, wps=5831.7, ups=0.09, wpb=64830, bsz=128, num_updates=6364, lr=9.99571e-05, gnorm=2.156, loss_scale=8, train_wall=11, gb_free=2.8, wall=73286 2021-06-19 15:00:23 | INFO | train_inner | epoch 003: 401 / 3002 loss=2.568, ppl=5.93, wps=5880.8, ups=0.09, wpb=64803, bsz=128, num_updates=6365, lr=9.99571e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=73297 2021-06-19 15:00:34 | INFO | train_inner | epoch 003: 402 / 3002 loss=2.729, ppl=6.63, wps=5819.4, ups=0.09, wpb=64743, bsz=128, num_updates=6366, lr=9.99571e-05, gnorm=2.146, loss_scale=8, train_wall=11, gb_free=2.8, wall=73308 2021-06-19 15:00:45 | INFO | train_inner | epoch 003: 403 / 3002 loss=2.63, ppl=6.19, wps=5883.6, ups=0.09, wpb=64872, bsz=128, num_updates=6367, lr=9.99571e-05, gnorm=2.284, loss_scale=8, train_wall=11, gb_free=2.8, wall=73319 2021-06-19 15:00:56 | INFO | train_inner | epoch 003: 404 / 3002 loss=2.603, ppl=6.07, wps=5824.1, ups=0.09, wpb=64794, bsz=128, num_updates=6368, lr=9.99571e-05, gnorm=2.243, loss_scale=8, train_wall=11, gb_free=2.8, wall=73330 2021-06-19 15:01:07 | INFO | train_inner | epoch 003: 405 / 3002 loss=2.583, ppl=5.99, wps=5874.5, ups=0.09, wpb=64786, bsz=128, num_updates=6369, lr=9.9957e-05, gnorm=2.279, loss_scale=8, train_wall=11, gb_free=2.8, wall=73341 2021-06-19 15:01:18 | INFO | train_inner | epoch 003: 406 / 3002 loss=2.669, ppl=6.36, wps=5738.5, ups=0.09, wpb=64882, bsz=128, num_updates=6370, lr=9.9957e-05, gnorm=2.27, loss_scale=8, train_wall=11, gb_free=2.8, wall=73353 2021-06-19 15:01:29 | INFO | train_inner | epoch 003: 407 / 3002 loss=2.792, ppl=6.92, wps=5851.6, ups=0.09, wpb=64829, bsz=128, num_updates=6371, lr=9.9957e-05, gnorm=2.377, loss_scale=8, train_wall=11, gb_free=2.8, wall=73364 2021-06-19 15:01:41 | INFO | train_inner | epoch 003: 408 / 3002 loss=2.632, ppl=6.2, wps=5825.5, ups=0.09, wpb=64809, bsz=128, num_updates=6372, lr=9.9957e-05, gnorm=2.154, loss_scale=8, train_wall=11, gb_free=2.8, wall=73375 2021-06-19 15:01:52 | INFO | train_inner | epoch 003: 409 / 3002 loss=2.765, ppl=6.8, wps=5852.3, ups=0.09, wpb=64922, bsz=128, num_updates=6373, lr=9.9957e-05, gnorm=2.36, loss_scale=8, train_wall=11, gb_free=2.8, wall=73386 2021-06-19 15:02:03 | INFO | train_inner | epoch 003: 410 / 3002 loss=2.723, ppl=6.6, wps=5828.2, ups=0.09, wpb=64896, bsz=128, num_updates=6374, lr=9.9957e-05, gnorm=2.482, loss_scale=8, train_wall=11, gb_free=2.8, wall=73397 2021-06-19 15:02:14 | INFO | train_inner | epoch 003: 411 / 3002 loss=2.624, ppl=6.17, wps=5840.8, ups=0.09, wpb=64815, bsz=128, num_updates=6375, lr=9.9957e-05, gnorm=2.136, loss_scale=8, train_wall=11, gb_free=2.8, wall=73408 2021-06-19 15:02:25 | INFO | train_inner | epoch 003: 412 / 3002 loss=2.685, ppl=6.43, wps=5786.8, ups=0.09, wpb=64868, bsz=128, num_updates=6376, lr=9.9957e-05, gnorm=2.189, loss_scale=8, train_wall=11, gb_free=2.8, wall=73419 2021-06-19 15:02:36 | INFO | train_inner | epoch 003: 413 / 3002 loss=2.68, ppl=6.41, wps=5756.5, ups=0.09, wpb=64848, bsz=128, num_updates=6377, lr=9.9957e-05, gnorm=2.16, loss_scale=8, train_wall=11, gb_free=2.8, wall=73431 2021-06-19 15:02:47 | INFO | train_inner | epoch 003: 414 / 3002 loss=2.735, ppl=6.66, wps=5816.3, ups=0.09, wpb=64782, bsz=128, num_updates=6378, lr=9.9957e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=73442 2021-06-19 15:02:59 | INFO | train_inner | epoch 003: 415 / 3002 loss=2.825, ppl=7.08, wps=5836.8, ups=0.09, wpb=64901, bsz=128, num_updates=6379, lr=9.9957e-05, gnorm=2.327, loss_scale=8, train_wall=11, gb_free=2.8, wall=73453 2021-06-19 15:03:10 | INFO | train_inner | epoch 003: 416 / 3002 loss=2.674, ppl=6.38, wps=5904.5, ups=0.09, wpb=64806, bsz=128, num_updates=6380, lr=9.9957e-05, gnorm=3.078, loss_scale=8, train_wall=11, gb_free=2.8, wall=73464 2021-06-19 15:03:21 | INFO | train_inner | epoch 003: 417 / 3002 loss=2.626, ppl=6.17, wps=5862.3, ups=0.09, wpb=64806, bsz=128, num_updates=6381, lr=9.99569e-05, gnorm=2.736, loss_scale=8, train_wall=11, gb_free=2.8, wall=73475 2021-06-19 15:03:32 | INFO | train_inner | epoch 003: 418 / 3002 loss=2.636, ppl=6.21, wps=5815.2, ups=0.09, wpb=64836, bsz=128, num_updates=6382, lr=9.99569e-05, gnorm=2.143, loss_scale=8, train_wall=11, gb_free=2.8, wall=73486 2021-06-19 15:03:43 | INFO | train_inner | epoch 003: 419 / 3002 loss=2.535, ppl=5.8, wps=5794.8, ups=0.09, wpb=64880, bsz=128, num_updates=6383, lr=9.99569e-05, gnorm=2.33, loss_scale=8, train_wall=11, gb_free=2.8, wall=73497 2021-06-19 15:03:54 | INFO | train_inner | epoch 003: 420 / 3002 loss=2.798, ppl=6.95, wps=5928.1, ups=0.09, wpb=64847, bsz=128, num_updates=6384, lr=9.99569e-05, gnorm=2.793, loss_scale=8, train_wall=10, gb_free=2.8, wall=73508 2021-06-19 15:04:05 | INFO | train_inner | epoch 003: 421 / 3002 loss=2.596, ppl=6.04, wps=5936.5, ups=0.09, wpb=64845, bsz=128, num_updates=6385, lr=9.99569e-05, gnorm=2.072, loss_scale=8, train_wall=10, gb_free=2.8, wall=73519 2021-06-19 15:04:16 | INFO | train_inner | epoch 003: 422 / 3002 loss=2.621, ppl=6.15, wps=5838.1, ups=0.09, wpb=64801, bsz=128, num_updates=6386, lr=9.99569e-05, gnorm=2.164, loss_scale=8, train_wall=11, gb_free=2.8, wall=73530 2021-06-19 15:04:27 | INFO | train_inner | epoch 003: 423 / 3002 loss=2.754, ppl=6.75, wps=5984.5, ups=0.09, wpb=64893, bsz=128, num_updates=6387, lr=9.99569e-05, gnorm=2.287, loss_scale=8, train_wall=10, gb_free=2.8, wall=73541 2021-06-19 15:04:38 | INFO | train_inner | epoch 003: 424 / 3002 loss=2.778, ppl=6.86, wps=5916, ups=0.09, wpb=64845, bsz=128, num_updates=6388, lr=9.99569e-05, gnorm=2.239, loss_scale=8, train_wall=11, gb_free=2.8, wall=73552 2021-06-19 15:04:49 | INFO | train_inner | epoch 003: 425 / 3002 loss=2.757, ppl=6.76, wps=5777.9, ups=0.09, wpb=64756, bsz=128, num_updates=6389, lr=9.99569e-05, gnorm=2.235, loss_scale=8, train_wall=11, gb_free=2.8, wall=73563 2021-06-19 15:05:00 | INFO | train_inner | epoch 003: 426 / 3002 loss=2.576, ppl=5.96, wps=5892.3, ups=0.09, wpb=64816, bsz=128, num_updates=6390, lr=9.99569e-05, gnorm=2.115, loss_scale=8, train_wall=11, gb_free=2.8, wall=73574 2021-06-19 15:05:11 | INFO | train_inner | epoch 003: 427 / 3002 loss=2.645, ppl=6.25, wps=5899.1, ups=0.09, wpb=64879, bsz=128, num_updates=6391, lr=9.99569e-05, gnorm=2.128, loss_scale=8, train_wall=11, gb_free=2.8, wall=73585 2021-06-19 15:05:22 | INFO | train_inner | epoch 003: 428 / 3002 loss=2.606, ppl=6.09, wps=5868.7, ups=0.09, wpb=64790, bsz=128, num_updates=6392, lr=9.99569e-05, gnorm=2.174, loss_scale=8, train_wall=11, gb_free=2.8, wall=73596 2021-06-19 15:05:33 | INFO | train_inner | epoch 003: 429 / 3002 loss=2.682, ppl=6.42, wps=5847.8, ups=0.09, wpb=64892, bsz=128, num_updates=6393, lr=9.99569e-05, gnorm=4.648, loss_scale=8, train_wall=11, gb_free=2.8, wall=73607 2021-06-19 15:05:44 | INFO | train_inner | epoch 003: 430 / 3002 loss=2.649, ppl=6.27, wps=5933.5, ups=0.09, wpb=64828, bsz=128, num_updates=6394, lr=9.99568e-05, gnorm=2.144, loss_scale=8, train_wall=10, gb_free=2.8, wall=73618 2021-06-19 15:05:55 | INFO | train_inner | epoch 003: 431 / 3002 loss=2.83, ppl=7.11, wps=5871.3, ups=0.09, wpb=64688, bsz=128, num_updates=6395, lr=9.99568e-05, gnorm=2.189, loss_scale=8, train_wall=11, gb_free=2.8, wall=73629 2021-06-19 15:06:06 | INFO | train_inner | epoch 003: 432 / 3002 loss=2.628, ppl=6.18, wps=5762.7, ups=0.09, wpb=64791, bsz=128, num_updates=6396, lr=9.99568e-05, gnorm=2.126, loss_scale=8, train_wall=11, gb_free=2.8, wall=73641 2021-06-19 15:06:17 | INFO | train_inner | epoch 003: 433 / 3002 loss=2.489, ppl=5.61, wps=5806.5, ups=0.09, wpb=64795, bsz=128, num_updates=6397, lr=9.99568e-05, gnorm=2.217, loss_scale=8, train_wall=11, gb_free=2.8, wall=73652 2021-06-19 15:06:28 | INFO | train_inner | epoch 003: 434 / 3002 loss=2.66, ppl=6.32, wps=5955.4, ups=0.09, wpb=64903, bsz=128, num_updates=6398, lr=9.99568e-05, gnorm=2.22, loss_scale=8, train_wall=10, gb_free=2.8, wall=73663 2021-06-19 15:06:39 | INFO | train_inner | epoch 003: 435 / 3002 loss=2.608, ppl=6.1, wps=5856.4, ups=0.09, wpb=64889, bsz=128, num_updates=6399, lr=9.99568e-05, gnorm=2.507, loss_scale=8, train_wall=11, gb_free=2.8, wall=73674 2021-06-19 15:06:50 | INFO | train_inner | epoch 003: 436 / 3002 loss=2.683, ppl=6.42, wps=5886.8, ups=0.09, wpb=64768, bsz=128, num_updates=6400, lr=9.99568e-05, gnorm=2.588, loss_scale=8, train_wall=11, gb_free=2.8, wall=73685 2021-06-19 15:07:02 | INFO | train_inner | epoch 003: 437 / 3002 loss=2.78, ppl=6.87, wps=5820.9, ups=0.09, wpb=64795, bsz=128, num_updates=6401, lr=9.99568e-05, gnorm=2.162, loss_scale=8, train_wall=11, gb_free=2.8, wall=73696 2021-06-19 15:07:13 | INFO | train_inner | epoch 003: 438 / 3002 loss=2.513, ppl=5.71, wps=5828.3, ups=0.09, wpb=64844, bsz=128, num_updates=6402, lr=9.99568e-05, gnorm=2.142, loss_scale=8, train_wall=11, gb_free=2.8, wall=73707 2021-06-19 15:07:24 | INFO | train_inner | epoch 003: 439 / 3002 loss=2.775, ppl=6.85, wps=5899.5, ups=0.09, wpb=64827, bsz=128, num_updates=6403, lr=9.99568e-05, gnorm=2.072, loss_scale=8, train_wall=11, gb_free=2.8, wall=73718 2021-06-19 15:07:35 | INFO | train_inner | epoch 003: 440 / 3002 loss=2.533, ppl=5.79, wps=5800.9, ups=0.09, wpb=64847, bsz=128, num_updates=6404, lr=9.99568e-05, gnorm=2.171, loss_scale=8, train_wall=11, gb_free=2.8, wall=73729 2021-06-19 15:07:46 | INFO | train_inner | epoch 003: 441 / 3002 loss=2.642, ppl=6.24, wps=5852.9, ups=0.09, wpb=64768, bsz=128, num_updates=6405, lr=9.99568e-05, gnorm=2.104, loss_scale=8, train_wall=11, gb_free=2.8, wall=73740 2021-06-19 15:07:57 | INFO | train_inner | epoch 003: 442 / 3002 loss=2.851, ppl=7.22, wps=5958.1, ups=0.09, wpb=64822, bsz=128, num_updates=6406, lr=9.99567e-05, gnorm=2.136, loss_scale=8, train_wall=10, gb_free=2.8, wall=73751 2021-06-19 15:08:08 | INFO | train_inner | epoch 003: 443 / 3002 loss=2.635, ppl=6.21, wps=5898.3, ups=0.09, wpb=64857, bsz=128, num_updates=6407, lr=9.99567e-05, gnorm=2.202, loss_scale=8, train_wall=11, gb_free=2.8, wall=73762 2021-06-19 15:08:19 | INFO | train_inner | epoch 003: 444 / 3002 loss=2.69, ppl=6.45, wps=5895.8, ups=0.09, wpb=64851, bsz=128, num_updates=6408, lr=9.99567e-05, gnorm=2.128, loss_scale=8, train_wall=11, gb_free=2.8, wall=73773 2021-06-19 15:08:30 | INFO | train_inner | epoch 003: 445 / 3002 loss=2.583, ppl=5.99, wps=5726.2, ups=0.09, wpb=64790, bsz=128, num_updates=6409, lr=9.99567e-05, gnorm=2.085, loss_scale=8, train_wall=11, gb_free=2.8, wall=73784 2021-06-19 15:08:41 | INFO | train_inner | epoch 003: 446 / 3002 loss=2.686, ppl=6.44, wps=5806.3, ups=0.09, wpb=64791, bsz=128, num_updates=6410, lr=9.99567e-05, gnorm=2.306, loss_scale=8, train_wall=11, gb_free=2.8, wall=73796 2021-06-19 15:08:52 | INFO | train_inner | epoch 003: 447 / 3002 loss=2.722, ppl=6.6, wps=5846.8, ups=0.09, wpb=64833, bsz=128, num_updates=6411, lr=9.99567e-05, gnorm=2.219, loss_scale=8, train_wall=11, gb_free=2.8, wall=73807 2021-06-19 15:09:04 | INFO | train_inner | epoch 003: 448 / 3002 loss=2.672, ppl=6.37, wps=5796, ups=0.09, wpb=64772, bsz=128, num_updates=6412, lr=9.99567e-05, gnorm=2.232, loss_scale=8, train_wall=11, gb_free=2.8, wall=73818 2021-06-19 15:09:15 | INFO | train_inner | epoch 003: 449 / 3002 loss=2.548, ppl=5.85, wps=5745.5, ups=0.09, wpb=64890, bsz=128, num_updates=6413, lr=9.99567e-05, gnorm=2.128, loss_scale=8, train_wall=11, gb_free=2.8, wall=73829 2021-06-19 15:09:26 | INFO | train_inner | epoch 003: 450 / 3002 loss=2.588, ppl=6.01, wps=5802.6, ups=0.09, wpb=64871, bsz=128, num_updates=6414, lr=9.99567e-05, gnorm=2.13, loss_scale=8, train_wall=11, gb_free=2.8, wall=73840 2021-06-19 15:09:37 | INFO | train_inner | epoch 003: 451 / 3002 loss=2.8, ppl=6.96, wps=5844.1, ups=0.09, wpb=64931, bsz=128, num_updates=6415, lr=9.99567e-05, gnorm=2.214, loss_scale=8, train_wall=11, gb_free=2.8, wall=73851 2021-06-19 15:09:48 | INFO | train_inner | epoch 003: 452 / 3002 loss=2.678, ppl=6.4, wps=5800.3, ups=0.09, wpb=64809, bsz=128, num_updates=6416, lr=9.99567e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=73863 2021-06-19 15:10:00 | INFO | train_inner | epoch 003: 453 / 3002 loss=2.697, ppl=6.48, wps=5743.7, ups=0.09, wpb=64854, bsz=128, num_updates=6417, lr=9.99567e-05, gnorm=7.469, loss_scale=16, train_wall=11, gb_free=2.8, wall=73874 2021-06-19 15:10:11 | INFO | train_inner | epoch 003: 454 / 3002 loss=2.768, ppl=6.81, wps=5835.7, ups=0.09, wpb=64888, bsz=128, num_updates=6418, lr=9.99567e-05, gnorm=2.215, loss_scale=16, train_wall=11, gb_free=2.8, wall=73885 2021-06-19 15:10:22 | INFO | train_inner | epoch 003: 455 / 3002 loss=2.729, ppl=6.63, wps=5959.1, ups=0.09, wpb=64864, bsz=128, num_updates=6419, lr=9.99566e-05, gnorm=2.184, loss_scale=16, train_wall=10, gb_free=2.8, wall=73896 2021-06-19 15:10:33 | INFO | train_inner | epoch 003: 456 / 3002 loss=2.636, ppl=6.21, wps=5836.3, ups=0.09, wpb=64901, bsz=128, num_updates=6420, lr=9.99566e-05, gnorm=2.188, loss_scale=16, train_wall=11, gb_free=2.8, wall=73907 2021-06-19 15:10:44 | INFO | train_inner | epoch 003: 457 / 3002 loss=2.605, ppl=6.08, wps=5842.7, ups=0.09, wpb=64839, bsz=128, num_updates=6421, lr=9.99566e-05, gnorm=2.179, loss_scale=16, train_wall=11, gb_free=2.8, wall=73918 2021-06-19 15:10:55 | INFO | train_inner | epoch 003: 458 / 3002 loss=2.618, ppl=6.14, wps=5767.3, ups=0.09, wpb=64885, bsz=128, num_updates=6422, lr=9.99566e-05, gnorm=2.259, loss_scale=16, train_wall=11, gb_free=2.8, wall=73929 2021-06-19 15:11:06 | INFO | train_inner | epoch 003: 459 / 3002 loss=2.728, ppl=6.62, wps=5825.9, ups=0.09, wpb=64829, bsz=128, num_updates=6423, lr=9.99566e-05, gnorm=2.182, loss_scale=16, train_wall=11, gb_free=2.8, wall=73941 2021-06-19 15:11:17 | INFO | train_inner | epoch 003: 460 / 3002 loss=2.731, ppl=6.64, wps=5736.5, ups=0.09, wpb=64826, bsz=128, num_updates=6424, lr=9.99566e-05, gnorm=2.129, loss_scale=16, train_wall=11, gb_free=2.8, wall=73952 2021-06-19 15:11:29 | INFO | train_inner | epoch 003: 461 / 3002 loss=2.589, ppl=6.02, wps=5771.6, ups=0.09, wpb=64849, bsz=128, num_updates=6425, lr=9.99566e-05, gnorm=2.114, loss_scale=16, train_wall=11, gb_free=2.8, wall=73963 2021-06-19 15:11:40 | INFO | train_inner | epoch 003: 462 / 3002 loss=2.597, ppl=6.05, wps=5745.2, ups=0.09, wpb=64691, bsz=128, num_updates=6426, lr=9.99566e-05, gnorm=2.197, loss_scale=16, train_wall=11, gb_free=2.8, wall=73974 2021-06-19 15:11:51 | INFO | train_inner | epoch 003: 463 / 3002 loss=2.749, ppl=6.72, wps=5832.6, ups=0.09, wpb=64857, bsz=128, num_updates=6427, lr=9.99566e-05, gnorm=2.253, loss_scale=16, train_wall=11, gb_free=2.8, wall=73985 2021-06-19 15:12:02 | INFO | train_inner | epoch 003: 464 / 3002 loss=2.642, ppl=6.24, wps=5763.8, ups=0.09, wpb=64877, bsz=128, num_updates=6428, lr=9.99566e-05, gnorm=2.186, loss_scale=16, train_wall=11, gb_free=2.8, wall=73997 2021-06-19 15:12:14 | INFO | train_inner | epoch 003: 465 / 3002 loss=2.539, ppl=5.81, wps=5772.7, ups=0.09, wpb=64836, bsz=128, num_updates=6429, lr=9.99566e-05, gnorm=2.24, loss_scale=16, train_wall=11, gb_free=2.8, wall=74008 2021-06-19 15:12:25 | INFO | train_inner | epoch 003: 466 / 3002 loss=2.709, ppl=6.54, wps=5822.4, ups=0.09, wpb=64751, bsz=128, num_updates=6430, lr=9.99566e-05, gnorm=2.398, loss_scale=16, train_wall=11, gb_free=2.8, wall=74019 2021-06-19 15:12:36 | INFO | train_inner | epoch 003: 467 / 3002 loss=2.586, ppl=6, wps=5901.9, ups=0.09, wpb=64868, bsz=128, num_updates=6431, lr=9.99565e-05, gnorm=2.157, loss_scale=16, train_wall=11, gb_free=2.8, wall=74030 2021-06-19 15:12:47 | INFO | train_inner | epoch 003: 468 / 3002 loss=2.683, ppl=6.42, wps=5937.9, ups=0.09, wpb=64878, bsz=128, num_updates=6432, lr=9.99565e-05, gnorm=2.158, loss_scale=16, train_wall=10, gb_free=2.8, wall=74041 2021-06-19 15:12:58 | INFO | train_inner | epoch 003: 469 / 3002 loss=2.688, ppl=6.44, wps=5891, ups=0.09, wpb=64919, bsz=128, num_updates=6433, lr=9.99565e-05, gnorm=2.155, loss_scale=16, train_wall=11, gb_free=2.8, wall=74052 2021-06-19 15:13:09 | INFO | train_inner | epoch 003: 470 / 3002 loss=2.78, ppl=6.87, wps=5895.7, ups=0.09, wpb=64826, bsz=128, num_updates=6434, lr=9.99565e-05, gnorm=2.721, loss_scale=16, train_wall=11, gb_free=2.8, wall=74063 2021-06-19 15:13:20 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-19 15:13:31 | INFO | train_inner | epoch 003: 472 / 3002 loss=2.729, ppl=6.63, wps=2931, ups=0.05, wpb=64752, bsz=128, num_updates=6435, lr=9.99565e-05, gnorm=2.204, loss_scale=8, train_wall=21, gb_free=2.8, wall=74085 2021-06-19 15:13:42 | INFO | train_inner | epoch 003: 473 / 3002 loss=2.634, ppl=6.21, wps=5696, ups=0.09, wpb=64609, bsz=128, num_updates=6436, lr=9.99565e-05, gnorm=2.115, loss_scale=8, train_wall=11, gb_free=2.8, wall=74096 2021-06-19 15:13:53 | INFO | train_inner | epoch 003: 474 / 3002 loss=2.689, ppl=6.45, wps=5777.8, ups=0.09, wpb=64816, bsz=128, num_updates=6437, lr=9.99565e-05, gnorm=2.112, loss_scale=8, train_wall=11, gb_free=2.8, wall=74108 2021-06-19 15:14:05 | INFO | train_inner | epoch 003: 475 / 3002 loss=2.781, ppl=6.87, wps=5750.9, ups=0.09, wpb=64825, bsz=128, num_updates=6438, lr=9.99565e-05, gnorm=2.274, loss_scale=8, train_wall=11, gb_free=2.8, wall=74119 2021-06-19 15:14:16 | INFO | train_inner | epoch 003: 476 / 3002 loss=2.634, ppl=6.21, wps=5777.4, ups=0.09, wpb=64815, bsz=128, num_updates=6439, lr=9.99565e-05, gnorm=2.115, loss_scale=8, train_wall=11, gb_free=2.8, wall=74130 2021-06-19 15:14:27 | INFO | train_inner | epoch 003: 477 / 3002 loss=2.689, ppl=6.45, wps=5782, ups=0.09, wpb=64849, bsz=128, num_updates=6440, lr=9.99565e-05, gnorm=2.198, loss_scale=8, train_wall=11, gb_free=2.8, wall=74141 2021-06-19 15:14:38 | INFO | train_inner | epoch 003: 478 / 3002 loss=2.625, ppl=6.17, wps=5927.5, ups=0.09, wpb=64900, bsz=128, num_updates=6441, lr=9.99565e-05, gnorm=2.208, loss_scale=8, train_wall=11, gb_free=2.8, wall=74152 2021-06-19 15:14:49 | INFO | train_inner | epoch 003: 479 / 3002 loss=2.554, ppl=5.87, wps=5895.4, ups=0.09, wpb=64786, bsz=128, num_updates=6442, lr=9.99565e-05, gnorm=2.208, loss_scale=8, train_wall=10, gb_free=2.8, wall=74163 2021-06-19 15:15:00 | INFO | train_inner | epoch 003: 480 / 3002 loss=2.602, ppl=6.07, wps=5890.9, ups=0.09, wpb=64853, bsz=128, num_updates=6443, lr=9.99565e-05, gnorm=5.064, loss_scale=8, train_wall=11, gb_free=2.8, wall=74174 2021-06-19 15:15:11 | INFO | train_inner | epoch 003: 481 / 3002 loss=2.788, ppl=6.91, wps=5833.7, ups=0.09, wpb=64808, bsz=128, num_updates=6444, lr=9.99564e-05, gnorm=2.154, loss_scale=8, train_wall=11, gb_free=2.8, wall=74185 2021-06-19 15:15:22 | INFO | train_inner | epoch 003: 482 / 3002 loss=2.731, ppl=6.64, wps=5968.8, ups=0.09, wpb=64855, bsz=128, num_updates=6445, lr=9.99564e-05, gnorm=2.088, loss_scale=8, train_wall=10, gb_free=2.8, wall=74196 2021-06-19 15:15:33 | INFO | train_inner | epoch 003: 483 / 3002 loss=2.612, ppl=6.12, wps=5799.2, ups=0.09, wpb=64791, bsz=128, num_updates=6446, lr=9.99564e-05, gnorm=2.237, loss_scale=8, train_wall=11, gb_free=2.8, wall=74207 2021-06-19 15:15:44 | INFO | train_inner | epoch 003: 484 / 3002 loss=2.616, ppl=6.13, wps=5989.6, ups=0.09, wpb=64810, bsz=128, num_updates=6447, lr=9.99564e-05, gnorm=2.536, loss_scale=8, train_wall=10, gb_free=2.8, wall=74218 2021-06-19 15:15:55 | INFO | train_inner | epoch 003: 485 / 3002 loss=2.516, ppl=5.72, wps=5925, ups=0.09, wpb=64849, bsz=128, num_updates=6448, lr=9.99564e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=74229 2021-06-19 15:16:06 | INFO | train_inner | epoch 003: 486 / 3002 loss=2.583, ppl=5.99, wps=5980.9, ups=0.09, wpb=64864, bsz=128, num_updates=6449, lr=9.99564e-05, gnorm=2.248, loss_scale=8, train_wall=10, gb_free=2.8, wall=74240 2021-06-19 15:16:17 | INFO | train_inner | epoch 003: 487 / 3002 loss=2.909, ppl=7.51, wps=5879.3, ups=0.09, wpb=64781, bsz=128, num_updates=6450, lr=9.99564e-05, gnorm=2.228, loss_scale=8, train_wall=11, gb_free=2.8, wall=74251 2021-06-19 15:16:28 | INFO | train_inner | epoch 003: 488 / 3002 loss=2.845, ppl=7.19, wps=5797.3, ups=0.09, wpb=64804, bsz=128, num_updates=6451, lr=9.99564e-05, gnorm=2.23, loss_scale=8, train_wall=11, gb_free=2.8, wall=74262 2021-06-19 15:16:39 | INFO | train_inner | epoch 003: 489 / 3002 loss=2.666, ppl=6.35, wps=5775.7, ups=0.09, wpb=64839, bsz=128, num_updates=6452, lr=9.99564e-05, gnorm=2.165, loss_scale=8, train_wall=11, gb_free=2.8, wall=74273 2021-06-19 15:16:50 | INFO | train_inner | epoch 003: 490 / 3002 loss=2.593, ppl=6.03, wps=5908.7, ups=0.09, wpb=64790, bsz=128, num_updates=6453, lr=9.99564e-05, gnorm=2.29, loss_scale=8, train_wall=11, gb_free=2.8, wall=74284 2021-06-19 15:17:01 | INFO | train_inner | epoch 003: 491 / 3002 loss=2.774, ppl=6.84, wps=5967.1, ups=0.09, wpb=64801, bsz=128, num_updates=6454, lr=9.99564e-05, gnorm=2.211, loss_scale=8, train_wall=10, gb_free=2.8, wall=74295 2021-06-19 15:17:12 | INFO | train_inner | epoch 003: 492 / 3002 loss=2.668, ppl=6.36, wps=5889.8, ups=0.09, wpb=64738, bsz=128, num_updates=6455, lr=9.99564e-05, gnorm=2.17, loss_scale=8, train_wall=11, gb_free=2.8, wall=74306 2021-06-19 15:17:23 | INFO | train_inner | epoch 003: 493 / 3002 loss=2.576, ppl=5.96, wps=5716.3, ups=0.09, wpb=64837, bsz=128, num_updates=6456, lr=9.99563e-05, gnorm=2.183, loss_scale=8, train_wall=11, gb_free=2.8, wall=74318 2021-06-19 15:17:34 | INFO | train_inner | epoch 003: 494 / 3002 loss=2.569, ppl=5.93, wps=5834, ups=0.09, wpb=64828, bsz=128, num_updates=6457, lr=9.99563e-05, gnorm=2.151, loss_scale=8, train_wall=11, gb_free=2.8, wall=74329 2021-06-19 15:17:45 | INFO | train_inner | epoch 003: 495 / 3002 loss=2.754, ppl=6.75, wps=5861.8, ups=0.09, wpb=64712, bsz=128, num_updates=6458, lr=9.99563e-05, gnorm=2.191, loss_scale=8, train_wall=11, gb_free=2.8, wall=74340 2021-06-19 15:17:56 | INFO | train_inner | epoch 003: 496 / 3002 loss=2.665, ppl=6.34, wps=5943, ups=0.09, wpb=64841, bsz=128, num_updates=6459, lr=9.99563e-05, gnorm=2.142, loss_scale=8, train_wall=10, gb_free=2.8, wall=74351 2021-06-19 15:18:07 | INFO | train_inner | epoch 003: 497 / 3002 loss=2.666, ppl=6.35, wps=5851.4, ups=0.09, wpb=64804, bsz=128, num_updates=6460, lr=9.99563e-05, gnorm=2.394, loss_scale=8, train_wall=11, gb_free=2.8, wall=74362 2021-06-19 15:18:18 | INFO | train_inner | epoch 003: 498 / 3002 loss=2.658, ppl=6.31, wps=5863, ups=0.09, wpb=64880, bsz=128, num_updates=6461, lr=9.99563e-05, gnorm=2.506, loss_scale=8, train_wall=11, gb_free=2.8, wall=74373 2021-06-19 15:18:30 | INFO | train_inner | epoch 003: 499 / 3002 loss=2.592, ppl=6.03, wps=5772.2, ups=0.09, wpb=64754, bsz=128, num_updates=6462, lr=9.99563e-05, gnorm=2.108, loss_scale=8, train_wall=11, gb_free=2.8, wall=74384 2021-06-19 15:18:41 | INFO | train_inner | epoch 003: 500 / 3002 loss=2.624, ppl=6.16, wps=5794.2, ups=0.09, wpb=64769, bsz=128, num_updates=6463, lr=9.99563e-05, gnorm=2.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=74395 2021-06-19 15:18:52 | INFO | train_inner | epoch 003: 501 / 3002 loss=2.523, ppl=5.75, wps=5876.2, ups=0.09, wpb=64904, bsz=128, num_updates=6464, lr=9.99563e-05, gnorm=2.2, loss_scale=8, train_wall=11, gb_free=2.8, wall=74406 2021-06-19 15:19:03 | INFO | train_inner | epoch 003: 502 / 3002 loss=2.514, ppl=5.71, wps=5838.8, ups=0.09, wpb=64832, bsz=128, num_updates=6465, lr=9.99563e-05, gnorm=3.054, loss_scale=8, train_wall=11, gb_free=2.8, wall=74417 2021-06-19 15:19:14 | INFO | train_inner | epoch 003: 503 / 3002 loss=2.646, ppl=6.26, wps=5812.5, ups=0.09, wpb=64892, bsz=128, num_updates=6466, lr=9.99563e-05, gnorm=2.238, loss_scale=8, train_wall=11, gb_free=2.8, wall=74429 2021-06-19 15:19:25 | INFO | train_inner | epoch 003: 504 / 3002 loss=2.813, ppl=7.03, wps=5868.6, ups=0.09, wpb=64827, bsz=128, num_updates=6467, lr=9.99563e-05, gnorm=2.107, loss_scale=8, train_wall=11, gb_free=2.8, wall=74440 2021-06-19 15:19:36 | INFO | train_inner | epoch 003: 505 / 3002 loss=2.745, ppl=6.7, wps=5848.5, ups=0.09, wpb=64780, bsz=128, num_updates=6468, lr=9.99563e-05, gnorm=2.144, loss_scale=8, train_wall=11, gb_free=2.8, wall=74451 2021-06-19 15:19:47 | INFO | train_inner | epoch 003: 506 / 3002 loss=2.907, ppl=7.5, wps=5841.4, ups=0.09, wpb=64850, bsz=128, num_updates=6469, lr=9.99562e-05, gnorm=2.211, loss_scale=8, train_wall=11, gb_free=2.8, wall=74462 2021-06-19 15:19:59 | INFO | train_inner | epoch 003: 507 / 3002 loss=2.785, ppl=6.89, wps=5795.3, ups=0.09, wpb=64809, bsz=128, num_updates=6470, lr=9.99562e-05, gnorm=2.096, loss_scale=8, train_wall=11, gb_free=2.8, wall=74473 2021-06-19 15:20:10 | INFO | train_inner | epoch 003: 508 / 3002 loss=2.619, ppl=6.14, wps=5794.2, ups=0.09, wpb=64853, bsz=128, num_updates=6471, lr=9.99562e-05, gnorm=2.258, loss_scale=8, train_wall=11, gb_free=2.8, wall=74484 2021-06-19 15:20:21 | INFO | train_inner | epoch 003: 509 / 3002 loss=2.564, ppl=5.91, wps=5899.7, ups=0.09, wpb=64928, bsz=128, num_updates=6472, lr=9.99562e-05, gnorm=2.064, loss_scale=8, train_wall=11, gb_free=2.8, wall=74495 2021-06-19 15:20:32 | INFO | train_inner | epoch 003: 510 / 3002 loss=2.868, ppl=7.3, wps=5843.5, ups=0.09, wpb=64798, bsz=128, num_updates=6473, lr=9.99562e-05, gnorm=2.192, loss_scale=8, train_wall=11, gb_free=2.8, wall=74506 2021-06-19 15:20:43 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-19 15:20:54 | INFO | train_inner | epoch 003: 512 / 3002 loss=2.714, ppl=6.56, wps=2915.3, ups=0.05, wpb=64773, bsz=128, num_updates=6474, lr=9.99562e-05, gnorm=2.496, loss_scale=4, train_wall=21, gb_free=2.8, wall=74528 2021-06-19 15:21:05 | INFO | train_inner | epoch 003: 513 / 3002 loss=2.687, ppl=6.44, wps=5787.3, ups=0.09, wpb=64797, bsz=128, num_updates=6475, lr=9.99562e-05, gnorm=2.137, loss_scale=4, train_wall=11, gb_free=2.8, wall=74540 2021-06-19 15:21:16 | INFO | train_inner | epoch 003: 514 / 3002 loss=2.637, ppl=6.22, wps=5884.9, ups=0.09, wpb=64741, bsz=128, num_updates=6476, lr=9.99562e-05, gnorm=15.806, loss_scale=4, train_wall=11, gb_free=2.8, wall=74551 2021-06-19 15:21:27 | INFO | train_inner | epoch 003: 515 / 3002 loss=2.807, ppl=7, wps=5914, ups=0.09, wpb=64891, bsz=128, num_updates=6477, lr=9.99562e-05, gnorm=2.142, loss_scale=4, train_wall=10, gb_free=2.8, wall=74562 2021-06-19 15:21:38 | INFO | train_inner | epoch 003: 516 / 3002 loss=2.588, ppl=6.01, wps=5879.9, ups=0.09, wpb=64801, bsz=128, num_updates=6478, lr=9.99562e-05, gnorm=2.15, loss_scale=4, train_wall=11, gb_free=2.8, wall=74573 2021-06-19 15:21:50 | INFO | train_inner | epoch 003: 517 / 3002 loss=2.749, ppl=6.72, wps=5730.5, ups=0.09, wpb=64803, bsz=128, num_updates=6479, lr=9.99562e-05, gnorm=2.239, loss_scale=4, train_wall=11, gb_free=2.8, wall=74584 2021-06-19 15:22:01 | INFO | train_inner | epoch 003: 518 / 3002 loss=2.697, ppl=6.48, wps=5966.8, ups=0.09, wpb=64916, bsz=128, num_updates=6480, lr=9.99562e-05, gnorm=2.235, loss_scale=4, train_wall=10, gb_free=2.8, wall=74595 2021-06-19 15:22:12 | INFO | train_inner | epoch 003: 519 / 3002 loss=2.659, ppl=6.31, wps=5861.7, ups=0.09, wpb=64851, bsz=128, num_updates=6481, lr=9.99561e-05, gnorm=2.098, loss_scale=4, train_wall=11, gb_free=2.8, wall=74606 2021-06-19 15:22:23 | INFO | train_inner | epoch 003: 520 / 3002 loss=2.527, ppl=5.76, wps=5862, ups=0.09, wpb=64727, bsz=128, num_updates=6482, lr=9.99561e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=74617 2021-06-19 15:22:34 | INFO | train_inner | epoch 003: 521 / 3002 loss=2.618, ppl=6.14, wps=5911.1, ups=0.09, wpb=64821, bsz=128, num_updates=6483, lr=9.99561e-05, gnorm=2.256, loss_scale=4, train_wall=11, gb_free=2.8, wall=74628 2021-06-19 15:22:45 | INFO | train_inner | epoch 003: 522 / 3002 loss=2.813, ppl=7.03, wps=5913, ups=0.09, wpb=64790, bsz=128, num_updates=6484, lr=9.99561e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=74639 2021-06-19 15:22:56 | INFO | train_inner | epoch 003: 523 / 3002 loss=2.705, ppl=6.52, wps=5829.1, ups=0.09, wpb=64833, bsz=128, num_updates=6485, lr=9.99561e-05, gnorm=2.215, loss_scale=4, train_wall=11, gb_free=2.8, wall=74650 2021-06-19 15:23:07 | INFO | train_inner | epoch 003: 524 / 3002 loss=2.664, ppl=6.34, wps=5861.6, ups=0.09, wpb=64830, bsz=128, num_updates=6486, lr=9.99561e-05, gnorm=2.198, loss_scale=4, train_wall=11, gb_free=2.8, wall=74661 2021-06-19 15:23:18 | INFO | train_inner | epoch 003: 525 / 3002 loss=2.687, ppl=6.44, wps=5862.3, ups=0.09, wpb=64755, bsz=128, num_updates=6487, lr=9.99561e-05, gnorm=2.3, loss_scale=4, train_wall=11, gb_free=2.8, wall=74672 2021-06-19 15:23:29 | INFO | train_inner | epoch 003: 526 / 3002 loss=2.746, ppl=6.71, wps=5920.8, ups=0.09, wpb=64763, bsz=128, num_updates=6488, lr=9.99561e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=74683 2021-06-19 15:23:40 | INFO | train_inner | epoch 003: 527 / 3002 loss=2.544, ppl=5.83, wps=5924, ups=0.09, wpb=64793, bsz=128, num_updates=6489, lr=9.99561e-05, gnorm=2.142, loss_scale=4, train_wall=11, gb_free=2.8, wall=74694 2021-06-19 15:23:51 | INFO | train_inner | epoch 003: 528 / 3002 loss=2.609, ppl=6.1, wps=5870, ups=0.09, wpb=64853, bsz=128, num_updates=6490, lr=9.99561e-05, gnorm=2.275, loss_scale=4, train_wall=11, gb_free=2.8, wall=74705 2021-06-19 15:24:02 | INFO | train_inner | epoch 003: 529 / 3002 loss=2.65, ppl=6.28, wps=5807.8, ups=0.09, wpb=64904, bsz=128, num_updates=6491, lr=9.99561e-05, gnorm=2.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=74716 2021-06-19 15:24:13 | INFO | train_inner | epoch 003: 530 / 3002 loss=2.554, ppl=5.87, wps=5743, ups=0.09, wpb=64822, bsz=128, num_updates=6492, lr=9.99561e-05, gnorm=2.14, loss_scale=4, train_wall=11, gb_free=2.8, wall=74727 2021-06-19 15:24:24 | INFO | train_inner | epoch 003: 531 / 3002 loss=2.654, ppl=6.29, wps=5752.5, ups=0.09, wpb=64891, bsz=128, num_updates=6493, lr=9.99561e-05, gnorm=2.233, loss_scale=4, train_wall=11, gb_free=2.8, wall=74739 2021-06-19 15:24:36 | INFO | train_inner | epoch 003: 532 / 3002 loss=2.78, ppl=6.87, wps=5831.1, ups=0.09, wpb=64850, bsz=128, num_updates=6494, lr=9.9956e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=74750 2021-06-19 15:24:47 | INFO | train_inner | epoch 003: 533 / 3002 loss=2.627, ppl=6.18, wps=5748.7, ups=0.09, wpb=64766, bsz=128, num_updates=6495, lr=9.9956e-05, gnorm=2.137, loss_scale=4, train_wall=11, gb_free=2.8, wall=74761 2021-06-19 15:24:58 | INFO | train_inner | epoch 003: 534 / 3002 loss=2.58, ppl=5.98, wps=5829.1, ups=0.09, wpb=64844, bsz=128, num_updates=6496, lr=9.9956e-05, gnorm=2.145, loss_scale=4, train_wall=11, gb_free=2.8, wall=74772 2021-06-19 15:25:09 | INFO | train_inner | epoch 003: 535 / 3002 loss=2.594, ppl=6.04, wps=5823.2, ups=0.09, wpb=64862, bsz=128, num_updates=6497, lr=9.9956e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=74783 2021-06-19 15:25:20 | INFO | train_inner | epoch 003: 536 / 3002 loss=2.63, ppl=6.19, wps=5872.6, ups=0.09, wpb=64831, bsz=128, num_updates=6498, lr=9.9956e-05, gnorm=2.312, loss_scale=4, train_wall=11, gb_free=2.8, wall=74794 2021-06-19 15:25:31 | INFO | train_inner | epoch 003: 537 / 3002 loss=2.692, ppl=6.46, wps=5860.2, ups=0.09, wpb=64853, bsz=128, num_updates=6499, lr=9.9956e-05, gnorm=2.327, loss_scale=4, train_wall=11, gb_free=2.8, wall=74806 2021-06-19 15:25:42 | INFO | train_inner | epoch 003: 538 / 3002 loss=2.739, ppl=6.68, wps=5925, ups=0.09, wpb=64785, bsz=128, num_updates=6500, lr=9.9956e-05, gnorm=2.239, loss_scale=4, train_wall=10, gb_free=2.8, wall=74816 2021-06-19 15:25:53 | INFO | train_inner | epoch 003: 539 / 3002 loss=2.546, ppl=5.84, wps=5928.3, ups=0.09, wpb=64784, bsz=128, num_updates=6501, lr=9.9956e-05, gnorm=2.25, loss_scale=4, train_wall=10, gb_free=2.8, wall=74827 2021-06-19 15:26:04 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-19 15:26:15 | INFO | train_inner | epoch 003: 541 / 3002 loss=2.589, ppl=6.01, wps=2952.9, ups=0.05, wpb=64843, bsz=128, num_updates=6502, lr=9.9956e-05, gnorm=2.274, loss_scale=2, train_wall=21, gb_free=2.8, wall=74849 2021-06-19 15:26:26 | INFO | train_inner | epoch 003: 542 / 3002 loss=2.698, ppl=6.49, wps=5840.9, ups=0.09, wpb=64740, bsz=128, num_updates=6503, lr=9.9956e-05, gnorm=2.174, loss_scale=2, train_wall=11, gb_free=2.8, wall=74860 2021-06-19 15:26:37 | INFO | train_inner | epoch 003: 543 / 3002 loss=2.498, ppl=5.65, wps=5891.4, ups=0.09, wpb=64875, bsz=128, num_updates=6504, lr=9.9956e-05, gnorm=2.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=74871 2021-06-19 15:26:48 | INFO | train_inner | epoch 003: 544 / 3002 loss=2.628, ppl=6.18, wps=5701.9, ups=0.09, wpb=64868, bsz=128, num_updates=6505, lr=9.9956e-05, gnorm=2.177, loss_scale=2, train_wall=11, gb_free=2.8, wall=74883 2021-06-19 15:27:00 | INFO | train_inner | epoch 003: 545 / 3002 loss=2.58, ppl=5.98, wps=5732.9, ups=0.09, wpb=64820, bsz=128, num_updates=6506, lr=9.99559e-05, gnorm=2.257, loss_scale=2, train_wall=11, gb_free=2.8, wall=74894 2021-06-19 15:27:11 | INFO | train_inner | epoch 003: 546 / 3002 loss=2.613, ppl=6.12, wps=5804.1, ups=0.09, wpb=64772, bsz=128, num_updates=6507, lr=9.99559e-05, gnorm=2.141, loss_scale=2, train_wall=11, gb_free=2.8, wall=74905 2021-06-19 15:27:22 | INFO | train_inner | epoch 003: 547 / 3002 loss=2.701, ppl=6.5, wps=5924.1, ups=0.09, wpb=64860, bsz=128, num_updates=6508, lr=9.99559e-05, gnorm=2.254, loss_scale=2, train_wall=10, gb_free=2.8, wall=74916 2021-06-19 15:27:33 | INFO | train_inner | epoch 003: 548 / 3002 loss=2.775, ppl=6.84, wps=5817.7, ups=0.09, wpb=64794, bsz=128, num_updates=6509, lr=9.99559e-05, gnorm=2.739, loss_scale=2, train_wall=11, gb_free=2.8, wall=74927 2021-06-19 15:27:44 | INFO | train_inner | epoch 003: 549 / 3002 loss=2.591, ppl=6.03, wps=5764, ups=0.09, wpb=64878, bsz=128, num_updates=6510, lr=9.99559e-05, gnorm=2.826, loss_scale=2, train_wall=11, gb_free=2.8, wall=74939 2021-06-19 15:27:55 | INFO | train_inner | epoch 003: 550 / 3002 loss=2.529, ppl=5.77, wps=5880.6, ups=0.09, wpb=64898, bsz=128, num_updates=6511, lr=9.99559e-05, gnorm=2.162, loss_scale=2, train_wall=11, gb_free=2.8, wall=74950 2021-06-19 15:28:07 | INFO | train_inner | epoch 003: 551 / 3002 loss=2.592, ppl=6.03, wps=5774.9, ups=0.09, wpb=64832, bsz=128, num_updates=6512, lr=9.99559e-05, gnorm=2.194, loss_scale=2, train_wall=11, gb_free=2.8, wall=74961 2021-06-19 15:28:18 | INFO | train_inner | epoch 003: 552 / 3002 loss=2.748, ppl=6.72, wps=5917, ups=0.09, wpb=64849, bsz=128, num_updates=6513, lr=9.99559e-05, gnorm=2.232, loss_scale=2, train_wall=11, gb_free=2.8, wall=74972 2021-06-19 15:28:28 | INFO | train_inner | epoch 003: 553 / 3002 loss=2.726, ppl=6.61, wps=5999.9, ups=0.09, wpb=64858, bsz=128, num_updates=6514, lr=9.99559e-05, gnorm=2.095, loss_scale=2, train_wall=10, gb_free=2.8, wall=74983 2021-06-19 15:28:40 | INFO | train_inner | epoch 003: 554 / 3002 loss=2.632, ppl=6.2, wps=5786.2, ups=0.09, wpb=64847, bsz=128, num_updates=6515, lr=9.99559e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=74994 2021-06-19 15:28:51 | INFO | train_inner | epoch 003: 555 / 3002 loss=2.759, ppl=6.77, wps=5863.3, ups=0.09, wpb=64863, bsz=128, num_updates=6516, lr=9.99559e-05, gnorm=2.194, loss_scale=2, train_wall=11, gb_free=2.8, wall=75005 2021-06-19 15:29:02 | INFO | train_inner | epoch 003: 556 / 3002 loss=2.733, ppl=6.65, wps=5873.4, ups=0.09, wpb=64823, bsz=128, num_updates=6517, lr=9.99559e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=75016 2021-06-19 15:29:13 | INFO | train_inner | epoch 003: 557 / 3002 loss=2.454, ppl=5.48, wps=5830.5, ups=0.09, wpb=64887, bsz=128, num_updates=6518, lr=9.99559e-05, gnorm=2.076, loss_scale=2, train_wall=11, gb_free=2.8, wall=75027 2021-06-19 15:29:24 | INFO | train_inner | epoch 003: 558 / 3002 loss=2.653, ppl=6.29, wps=5715.7, ups=0.09, wpb=64883, bsz=128, num_updates=6519, lr=9.99558e-05, gnorm=2.168, loss_scale=2, train_wall=11, gb_free=2.8, wall=75038 2021-06-19 15:29:35 | INFO | train_inner | epoch 003: 559 / 3002 loss=2.657, ppl=6.31, wps=5801.6, ups=0.09, wpb=64823, bsz=128, num_updates=6520, lr=9.99558e-05, gnorm=2.186, loss_scale=2, train_wall=11, gb_free=2.8, wall=75050 2021-06-19 15:29:47 | INFO | train_inner | epoch 003: 560 / 3002 loss=2.831, ppl=7.12, wps=5722.1, ups=0.09, wpb=64807, bsz=128, num_updates=6521, lr=9.99558e-05, gnorm=2.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=75061 2021-06-19 15:29:57 | INFO | train_inner | epoch 003: 561 / 3002 loss=2.546, ppl=5.84, wps=5986.8, ups=0.09, wpb=64842, bsz=128, num_updates=6522, lr=9.99558e-05, gnorm=2.194, loss_scale=2, train_wall=10, gb_free=2.8, wall=75072 2021-06-19 15:30:09 | INFO | train_inner | epoch 003: 562 / 3002 loss=2.68, ppl=6.41, wps=5848.7, ups=0.09, wpb=64919, bsz=128, num_updates=6523, lr=9.99558e-05, gnorm=2.212, loss_scale=2, train_wall=11, gb_free=2.8, wall=75083 2021-06-19 15:30:20 | INFO | train_inner | epoch 003: 563 / 3002 loss=2.727, ppl=6.62, wps=5787.5, ups=0.09, wpb=64781, bsz=128, num_updates=6524, lr=9.99558e-05, gnorm=2.158, loss_scale=2, train_wall=11, gb_free=2.8, wall=75094 2021-06-19 15:30:31 | INFO | train_inner | epoch 003: 564 / 3002 loss=2.602, ppl=6.07, wps=5817.9, ups=0.09, wpb=64821, bsz=128, num_updates=6525, lr=9.99558e-05, gnorm=2.268, loss_scale=2, train_wall=11, gb_free=2.8, wall=75105 2021-06-19 15:30:42 | INFO | train_inner | epoch 003: 565 / 3002 loss=2.588, ppl=6.01, wps=5822.9, ups=0.09, wpb=64866, bsz=128, num_updates=6526, lr=9.99558e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=75116 2021-06-19 15:30:53 | INFO | train_inner | epoch 003: 566 / 3002 loss=2.796, ppl=6.94, wps=5954.5, ups=0.09, wpb=64911, bsz=128, num_updates=6527, lr=9.99558e-05, gnorm=2.262, loss_scale=2, train_wall=10, gb_free=2.8, wall=75127 2021-06-19 15:31:04 | INFO | train_inner | epoch 003: 567 / 3002 loss=2.743, ppl=6.69, wps=5773.9, ups=0.09, wpb=64778, bsz=128, num_updates=6528, lr=9.99558e-05, gnorm=2.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=75138 2021-06-19 15:31:15 | INFO | train_inner | epoch 003: 568 / 3002 loss=2.674, ppl=6.38, wps=5912.8, ups=0.09, wpb=64801, bsz=128, num_updates=6529, lr=9.99558e-05, gnorm=2.652, loss_scale=2, train_wall=10, gb_free=2.8, wall=75149 2021-06-19 15:31:26 | INFO | train_inner | epoch 003: 569 / 3002 loss=2.655, ppl=6.3, wps=5751.4, ups=0.09, wpb=64875, bsz=128, num_updates=6530, lr=9.99558e-05, gnorm=2.151, loss_scale=2, train_wall=11, gb_free=2.8, wall=75161 2021-06-19 15:31:38 | INFO | train_inner | epoch 003: 570 / 3002 loss=2.785, ppl=6.89, wps=5805.8, ups=0.09, wpb=64806, bsz=128, num_updates=6531, lr=9.99557e-05, gnorm=2.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=75172 2021-06-19 15:31:48 | INFO | train_inner | epoch 003: 571 / 3002 loss=2.602, ppl=6.07, wps=5966.1, ups=0.09, wpb=64872, bsz=128, num_updates=6532, lr=9.99557e-05, gnorm=2.149, loss_scale=2, train_wall=10, gb_free=2.8, wall=75183 2021-06-19 15:32:00 | INFO | train_inner | epoch 003: 572 / 3002 loss=2.782, ppl=6.88, wps=5752.3, ups=0.09, wpb=64836, bsz=128, num_updates=6533, lr=9.99557e-05, gnorm=2.319, loss_scale=2, train_wall=11, gb_free=2.8, wall=75194 2021-06-19 15:32:11 | INFO | train_inner | epoch 003: 573 / 3002 loss=2.826, ppl=7.09, wps=5887, ups=0.09, wpb=64749, bsz=128, num_updates=6534, lr=9.99557e-05, gnorm=2.144, loss_scale=2, train_wall=11, gb_free=2.8, wall=75205 2021-06-19 15:32:22 | INFO | train_inner | epoch 003: 574 / 3002 loss=2.678, ppl=6.4, wps=5780, ups=0.09, wpb=64755, bsz=128, num_updates=6535, lr=9.99557e-05, gnorm=2.542, loss_scale=2, train_wall=11, gb_free=2.8, wall=75216 2021-06-19 15:32:33 | INFO | train_inner | epoch 003: 575 / 3002 loss=2.581, ppl=5.98, wps=5899.1, ups=0.09, wpb=64820, bsz=128, num_updates=6536, lr=9.99557e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=75227 2021-06-19 15:32:44 | INFO | train_inner | epoch 003: 576 / 3002 loss=2.622, ppl=6.15, wps=5787.6, ups=0.09, wpb=64839, bsz=128, num_updates=6537, lr=9.99557e-05, gnorm=2.24, loss_scale=2, train_wall=11, gb_free=2.8, wall=75238 2021-06-19 15:32:55 | INFO | train_inner | epoch 003: 577 / 3002 loss=2.571, ppl=5.94, wps=5845.4, ups=0.09, wpb=64960, bsz=128, num_updates=6538, lr=9.99557e-05, gnorm=2.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=75250 2021-06-19 15:33:06 | INFO | train_inner | epoch 003: 578 / 3002 loss=2.818, ppl=7.05, wps=5913.9, ups=0.09, wpb=64914, bsz=128, num_updates=6539, lr=9.99557e-05, gnorm=2.194, loss_scale=2, train_wall=11, gb_free=2.8, wall=75261 2021-06-19 15:33:17 | INFO | train_inner | epoch 003: 579 / 3002 loss=2.509, ppl=5.69, wps=5973.2, ups=0.09, wpb=64819, bsz=128, num_updates=6540, lr=9.99557e-05, gnorm=3.497, loss_scale=2, train_wall=10, gb_free=2.8, wall=75271 2021-06-19 15:33:28 | INFO | train_inner | epoch 003: 580 / 3002 loss=2.868, ppl=7.3, wps=5915, ups=0.09, wpb=64840, bsz=128, num_updates=6541, lr=9.99557e-05, gnorm=2.249, loss_scale=2, train_wall=11, gb_free=2.8, wall=75282 2021-06-19 15:33:39 | INFO | train_inner | epoch 003: 581 / 3002 loss=2.603, ppl=6.07, wps=5759.2, ups=0.09, wpb=64865, bsz=128, num_updates=6542, lr=9.99557e-05, gnorm=5.38, loss_scale=2, train_wall=11, gb_free=2.8, wall=75294 2021-06-19 15:33:50 | INFO | train_inner | epoch 003: 582 / 3002 loss=2.611, ppl=6.11, wps=5915.8, ups=0.09, wpb=64756, bsz=128, num_updates=6543, lr=9.99557e-05, gnorm=2.123, loss_scale=2, train_wall=11, gb_free=2.8, wall=75305 2021-06-19 15:34:01 | INFO | train_inner | epoch 003: 583 / 3002 loss=2.519, ppl=5.73, wps=5984.2, ups=0.09, wpb=64915, bsz=128, num_updates=6544, lr=9.99556e-05, gnorm=2.122, loss_scale=2, train_wall=10, gb_free=2.8, wall=75315 2021-06-19 15:34:12 | INFO | train_inner | epoch 003: 584 / 3002 loss=2.836, ppl=7.14, wps=5973.4, ups=0.09, wpb=64818, bsz=128, num_updates=6545, lr=9.99556e-05, gnorm=2.107, loss_scale=2, train_wall=10, gb_free=2.8, wall=75326 2021-06-19 15:34:23 | INFO | train_inner | epoch 003: 585 / 3002 loss=2.649, ppl=6.27, wps=5811.5, ups=0.09, wpb=64816, bsz=128, num_updates=6546, lr=9.99556e-05, gnorm=3.762, loss_scale=2, train_wall=11, gb_free=2.8, wall=75337 2021-06-19 15:34:34 | INFO | train_inner | epoch 003: 586 / 3002 loss=2.649, ppl=6.27, wps=5821.8, ups=0.09, wpb=64814, bsz=128, num_updates=6547, lr=9.99556e-05, gnorm=2.176, loss_scale=2, train_wall=11, gb_free=2.8, wall=75349 2021-06-19 15:34:45 | INFO | train_inner | epoch 003: 587 / 3002 loss=2.698, ppl=6.49, wps=5912.6, ups=0.09, wpb=64779, bsz=128, num_updates=6548, lr=9.99556e-05, gnorm=2.228, loss_scale=2, train_wall=10, gb_free=2.8, wall=75359 2021-06-19 15:34:56 | INFO | train_inner | epoch 003: 588 / 3002 loss=2.778, ppl=6.86, wps=5880.4, ups=0.09, wpb=64817, bsz=128, num_updates=6549, lr=9.99556e-05, gnorm=2.185, loss_scale=2, train_wall=11, gb_free=2.8, wall=75371 2021-06-19 15:35:07 | INFO | train_inner | epoch 003: 589 / 3002 loss=2.672, ppl=6.37, wps=5858, ups=0.09, wpb=64754, bsz=128, num_updates=6550, lr=9.99556e-05, gnorm=2.159, loss_scale=2, train_wall=11, gb_free=2.8, wall=75382 2021-06-19 15:35:18 | INFO | train_inner | epoch 003: 590 / 3002 loss=2.739, ppl=6.68, wps=5810.3, ups=0.09, wpb=64804, bsz=128, num_updates=6551, lr=9.99556e-05, gnorm=2.193, loss_scale=2, train_wall=11, gb_free=2.8, wall=75393 2021-06-19 15:35:29 | INFO | train_inner | epoch 003: 591 / 3002 loss=2.564, ppl=5.91, wps=5846.5, ups=0.09, wpb=64910, bsz=128, num_updates=6552, lr=9.99556e-05, gnorm=2.175, loss_scale=2, train_wall=11, gb_free=2.8, wall=75404 2021-06-19 15:35:40 | INFO | train_inner | epoch 003: 592 / 3002 loss=2.669, ppl=6.36, wps=5909.4, ups=0.09, wpb=64834, bsz=128, num_updates=6553, lr=9.99556e-05, gnorm=2.1, loss_scale=2, train_wall=11, gb_free=2.8, wall=75415 2021-06-19 15:35:52 | INFO | train_inner | epoch 003: 593 / 3002 loss=2.596, ppl=6.05, wps=5827.2, ups=0.09, wpb=64842, bsz=128, num_updates=6554, lr=9.99556e-05, gnorm=2.12, loss_scale=2, train_wall=11, gb_free=2.8, wall=75426 2021-06-19 15:36:03 | INFO | train_inner | epoch 003: 594 / 3002 loss=2.45, ppl=5.46, wps=5916.2, ups=0.09, wpb=64853, bsz=128, num_updates=6555, lr=9.99556e-05, gnorm=2.199, loss_scale=2, train_wall=11, gb_free=2.8, wall=75437 2021-06-19 15:36:14 | INFO | train_inner | epoch 003: 595 / 3002 loss=2.575, ppl=5.96, wps=5857.5, ups=0.09, wpb=64867, bsz=128, num_updates=6556, lr=9.99555e-05, gnorm=2.212, loss_scale=2, train_wall=11, gb_free=2.8, wall=75448 2021-06-19 15:36:25 | INFO | train_inner | epoch 003: 596 / 3002 loss=2.554, ppl=5.87, wps=5748.3, ups=0.09, wpb=64821, bsz=128, num_updates=6557, lr=9.99555e-05, gnorm=2.255, loss_scale=2, train_wall=11, gb_free=2.8, wall=75459 2021-06-19 15:36:36 | INFO | train_inner | epoch 003: 597 / 3002 loss=2.478, ppl=5.57, wps=5870.6, ups=0.09, wpb=64822, bsz=128, num_updates=6558, lr=9.99555e-05, gnorm=2.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=75470 2021-06-19 15:36:47 | INFO | train_inner | epoch 003: 598 / 3002 loss=2.696, ppl=6.48, wps=5969.4, ups=0.09, wpb=64922, bsz=128, num_updates=6559, lr=9.99555e-05, gnorm=2.224, loss_scale=2, train_wall=10, gb_free=2.8, wall=75481 2021-06-19 15:36:58 | INFO | train_inner | epoch 003: 599 / 3002 loss=2.7, ppl=6.5, wps=5942.2, ups=0.09, wpb=64826, bsz=128, num_updates=6560, lr=9.99555e-05, gnorm=2.083, loss_scale=2, train_wall=10, gb_free=2.8, wall=75492 2021-06-19 15:37:09 | INFO | train_inner | epoch 003: 600 / 3002 loss=2.772, ppl=6.83, wps=5831.4, ups=0.09, wpb=64774, bsz=128, num_updates=6561, lr=9.99555e-05, gnorm=2.155, loss_scale=2, train_wall=11, gb_free=2.8, wall=75503 2021-06-19 15:37:20 | INFO | train_inner | epoch 003: 601 / 3002 loss=2.751, ppl=6.73, wps=5723.3, ups=0.09, wpb=64791, bsz=128, num_updates=6562, lr=9.99555e-05, gnorm=2.241, loss_scale=2, train_wall=11, gb_free=2.8, wall=75514 2021-06-19 15:37:31 | INFO | train_inner | epoch 003: 602 / 3002 loss=2.782, ppl=6.88, wps=5853, ups=0.09, wpb=64869, bsz=128, num_updates=6563, lr=9.99555e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=75526 2021-06-19 15:37:42 | INFO | train_inner | epoch 003: 603 / 3002 loss=2.643, ppl=6.24, wps=5818.7, ups=0.09, wpb=64772, bsz=128, num_updates=6564, lr=9.99555e-05, gnorm=2.141, loss_scale=2, train_wall=11, gb_free=2.8, wall=75537 2021-06-19 15:37:53 | INFO | train_inner | epoch 003: 604 / 3002 loss=2.573, ppl=5.95, wps=5856.2, ups=0.09, wpb=64917, bsz=128, num_updates=6565, lr=9.99555e-05, gnorm=2.162, loss_scale=2, train_wall=11, gb_free=2.8, wall=75548 2021-06-19 15:38:04 | INFO | train_inner | epoch 003: 605 / 3002 loss=2.701, ppl=6.5, wps=5988.2, ups=0.09, wpb=64921, bsz=128, num_updates=6566, lr=9.99555e-05, gnorm=2.225, loss_scale=2, train_wall=10, gb_free=2.8, wall=75559 2021-06-19 15:38:15 | INFO | train_inner | epoch 003: 606 / 3002 loss=2.594, ppl=6.04, wps=5904.1, ups=0.09, wpb=64955, bsz=128, num_updates=6567, lr=9.99555e-05, gnorm=2.151, loss_scale=2, train_wall=11, gb_free=2.8, wall=75570 2021-06-19 15:38:26 | INFO | train_inner | epoch 003: 607 / 3002 loss=2.663, ppl=6.33, wps=5978.3, ups=0.09, wpb=64834, bsz=128, num_updates=6568, lr=9.99555e-05, gnorm=2.109, loss_scale=2, train_wall=10, gb_free=2.8, wall=75580 2021-06-19 15:38:37 | INFO | train_inner | epoch 003: 608 / 3002 loss=2.738, ppl=6.67, wps=5855.1, ups=0.09, wpb=64904, bsz=128, num_updates=6569, lr=9.99554e-05, gnorm=2.25, loss_scale=2, train_wall=11, gb_free=2.8, wall=75592 2021-06-19 15:38:48 | INFO | train_inner | epoch 003: 609 / 3002 loss=2.726, ppl=6.62, wps=5858.3, ups=0.09, wpb=64884, bsz=128, num_updates=6570, lr=9.99554e-05, gnorm=2.781, loss_scale=2, train_wall=11, gb_free=2.8, wall=75603 2021-06-19 15:38:59 | INFO | train_inner | epoch 003: 610 / 3002 loss=2.542, ppl=5.82, wps=5904.3, ups=0.09, wpb=64875, bsz=128, num_updates=6571, lr=9.99554e-05, gnorm=2.176, loss_scale=2, train_wall=11, gb_free=2.8, wall=75614 2021-06-19 15:39:10 | INFO | train_inner | epoch 003: 611 / 3002 loss=2.84, ppl=7.16, wps=5822, ups=0.09, wpb=64802, bsz=128, num_updates=6572, lr=9.99554e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=75625 2021-06-19 15:39:21 | INFO | train_inner | epoch 003: 612 / 3002 loss=2.664, ppl=6.34, wps=5868.4, ups=0.09, wpb=64817, bsz=128, num_updates=6573, lr=9.99554e-05, gnorm=2.176, loss_scale=2, train_wall=11, gb_free=2.8, wall=75636 2021-06-19 15:39:33 | INFO | train_inner | epoch 003: 613 / 3002 loss=2.723, ppl=6.6, wps=5858.5, ups=0.09, wpb=64835, bsz=128, num_updates=6574, lr=9.99554e-05, gnorm=2.178, loss_scale=2, train_wall=11, gb_free=2.8, wall=75647 2021-06-19 15:39:43 | INFO | train_inner | epoch 003: 614 / 3002 loss=2.535, ppl=5.79, wps=5949.4, ups=0.09, wpb=64821, bsz=128, num_updates=6575, lr=9.99554e-05, gnorm=3.817, loss_scale=2, train_wall=10, gb_free=2.8, wall=75658 2021-06-19 15:39:55 | INFO | train_inner | epoch 003: 615 / 3002 loss=2.568, ppl=5.93, wps=5838.5, ups=0.09, wpb=64860, bsz=128, num_updates=6576, lr=9.99554e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=75669 2021-06-19 15:40:06 | INFO | train_inner | epoch 003: 616 / 3002 loss=2.604, ppl=6.08, wps=5751.7, ups=0.09, wpb=64771, bsz=128, num_updates=6577, lr=9.99554e-05, gnorm=2.275, loss_scale=2, train_wall=11, gb_free=2.8, wall=75680 2021-06-19 15:40:17 | INFO | train_inner | epoch 003: 617 / 3002 loss=2.79, ppl=6.92, wps=5763.9, ups=0.09, wpb=64806, bsz=128, num_updates=6578, lr=9.99554e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=75691 2021-06-19 15:40:28 | INFO | train_inner | epoch 003: 618 / 3002 loss=2.727, ppl=6.62, wps=5832.3, ups=0.09, wpb=64814, bsz=128, num_updates=6579, lr=9.99554e-05, gnorm=2.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=75702 2021-06-19 15:40:39 | INFO | train_inner | epoch 003: 619 / 3002 loss=2.708, ppl=6.53, wps=5825.4, ups=0.09, wpb=64890, bsz=128, num_updates=6580, lr=9.99554e-05, gnorm=2.17, loss_scale=2, train_wall=11, gb_free=2.8, wall=75714 2021-06-19 15:40:50 | INFO | train_inner | epoch 003: 620 / 3002 loss=2.73, ppl=6.64, wps=5792.6, ups=0.09, wpb=64771, bsz=128, num_updates=6581, lr=9.99553e-05, gnorm=2.07, loss_scale=2, train_wall=11, gb_free=2.8, wall=75725 2021-06-19 15:41:02 | INFO | train_inner | epoch 003: 621 / 3002 loss=2.573, ppl=5.95, wps=5818.8, ups=0.09, wpb=64789, bsz=128, num_updates=6582, lr=9.99553e-05, gnorm=2.142, loss_scale=2, train_wall=11, gb_free=2.8, wall=75736 2021-06-19 15:41:13 | INFO | train_inner | epoch 003: 622 / 3002 loss=2.701, ppl=6.5, wps=5900.6, ups=0.09, wpb=64825, bsz=128, num_updates=6583, lr=9.99553e-05, gnorm=2.213, loss_scale=2, train_wall=11, gb_free=2.8, wall=75747 2021-06-19 15:41:24 | INFO | train_inner | epoch 003: 623 / 3002 loss=2.803, ppl=6.98, wps=5832.1, ups=0.09, wpb=64881, bsz=128, num_updates=6584, lr=9.99553e-05, gnorm=2.633, loss_scale=2, train_wall=11, gb_free=2.8, wall=75758 2021-06-19 15:41:35 | INFO | train_inner | epoch 003: 624 / 3002 loss=2.69, ppl=6.45, wps=5925.6, ups=0.09, wpb=64822, bsz=128, num_updates=6585, lr=9.99553e-05, gnorm=12.447, loss_scale=2, train_wall=10, gb_free=2.8, wall=75769 2021-06-19 15:41:46 | INFO | train_inner | epoch 003: 625 / 3002 loss=2.768, ppl=6.81, wps=5921, ups=0.09, wpb=64781, bsz=128, num_updates=6586, lr=9.99553e-05, gnorm=2.321, loss_scale=2, train_wall=10, gb_free=2.8, wall=75780 2021-06-19 15:41:57 | INFO | train_inner | epoch 003: 626 / 3002 loss=2.708, ppl=6.54, wps=5872.3, ups=0.09, wpb=64854, bsz=128, num_updates=6587, lr=9.99553e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=75791 2021-06-19 15:42:08 | INFO | train_inner | epoch 003: 627 / 3002 loss=2.762, ppl=6.78, wps=5808.9, ups=0.09, wpb=64857, bsz=128, num_updates=6588, lr=9.99553e-05, gnorm=2.201, loss_scale=2, train_wall=11, gb_free=2.8, wall=75802 2021-06-19 15:42:19 | INFO | train_inner | epoch 003: 628 / 3002 loss=2.782, ppl=6.88, wps=5861.3, ups=0.09, wpb=64734, bsz=128, num_updates=6589, lr=9.99553e-05, gnorm=2.25, loss_scale=2, train_wall=11, gb_free=2.8, wall=75813 2021-06-19 15:42:30 | INFO | train_inner | epoch 003: 629 / 3002 loss=2.681, ppl=6.42, wps=5836, ups=0.09, wpb=64857, bsz=128, num_updates=6590, lr=9.99553e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=75824 2021-06-19 15:42:41 | INFO | train_inner | epoch 003: 630 / 3002 loss=2.588, ppl=6.01, wps=5947.7, ups=0.09, wpb=64845, bsz=128, num_updates=6591, lr=9.99553e-05, gnorm=2.142, loss_scale=2, train_wall=10, gb_free=2.8, wall=75835 2021-06-19 15:42:52 | INFO | train_inner | epoch 003: 631 / 3002 loss=2.683, ppl=6.42, wps=5947.1, ups=0.09, wpb=64785, bsz=128, num_updates=6592, lr=9.99553e-05, gnorm=3.211, loss_scale=2, train_wall=10, gb_free=2.8, wall=75846 2021-06-19 15:43:03 | INFO | train_inner | epoch 003: 632 / 3002 loss=2.619, ppl=6.14, wps=5925.8, ups=0.09, wpb=64845, bsz=128, num_updates=6593, lr=9.99553e-05, gnorm=2.224, loss_scale=2, train_wall=11, gb_free=2.8, wall=75857 2021-06-19 15:43:14 | INFO | train_inner | epoch 003: 633 / 3002 loss=2.703, ppl=6.51, wps=5902.6, ups=0.09, wpb=64934, bsz=128, num_updates=6594, lr=9.99552e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=75868 2021-06-19 15:43:25 | INFO | train_inner | epoch 003: 634 / 3002 loss=2.708, ppl=6.53, wps=5966.3, ups=0.09, wpb=64802, bsz=128, num_updates=6595, lr=9.99552e-05, gnorm=2.196, loss_scale=2, train_wall=10, gb_free=2.8, wall=75879 2021-06-19 15:43:36 | INFO | train_inner | epoch 003: 635 / 3002 loss=2.775, ppl=6.85, wps=5850.5, ups=0.09, wpb=64863, bsz=128, num_updates=6596, lr=9.99552e-05, gnorm=2.191, loss_scale=2, train_wall=11, gb_free=2.8, wall=75890 2021-06-19 15:43:47 | INFO | train_inner | epoch 003: 636 / 3002 loss=2.666, ppl=6.35, wps=5795.7, ups=0.09, wpb=64825, bsz=128, num_updates=6597, lr=9.99552e-05, gnorm=2.189, loss_scale=2, train_wall=11, gb_free=2.8, wall=75901 2021-06-19 15:43:58 | INFO | train_inner | epoch 003: 637 / 3002 loss=2.596, ppl=6.05, wps=5886.8, ups=0.09, wpb=64866, bsz=128, num_updates=6598, lr=9.99552e-05, gnorm=2.305, loss_scale=2, train_wall=11, gb_free=2.8, wall=75912 2021-06-19 15:44:09 | INFO | train_inner | epoch 003: 638 / 3002 loss=2.558, ppl=5.89, wps=5747, ups=0.09, wpb=64832, bsz=128, num_updates=6599, lr=9.99552e-05, gnorm=2.194, loss_scale=2, train_wall=11, gb_free=2.8, wall=75923 2021-06-19 15:44:20 | INFO | train_inner | epoch 003: 639 / 3002 loss=2.826, ppl=7.09, wps=5810.3, ups=0.09, wpb=64847, bsz=128, num_updates=6600, lr=9.99552e-05, gnorm=2.52, loss_scale=2, train_wall=11, gb_free=2.8, wall=75935 2021-06-19 15:44:31 | INFO | train_inner | epoch 003: 640 / 3002 loss=2.751, ppl=6.73, wps=5876.5, ups=0.09, wpb=64883, bsz=128, num_updates=6601, lr=9.99552e-05, gnorm=2.165, loss_scale=2, train_wall=11, gb_free=2.8, wall=75946 2021-06-19 15:44:42 | INFO | train_inner | epoch 003: 641 / 3002 loss=2.507, ppl=5.69, wps=5844.1, ups=0.09, wpb=64909, bsz=128, num_updates=6602, lr=9.99552e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=75957 2021-06-19 15:44:54 | INFO | train_inner | epoch 003: 642 / 3002 loss=2.688, ppl=6.44, wps=5755.5, ups=0.09, wpb=64805, bsz=128, num_updates=6603, lr=9.99552e-05, gnorm=2.225, loss_scale=2, train_wall=11, gb_free=2.8, wall=75968 2021-06-19 15:45:05 | INFO | train_inner | epoch 003: 643 / 3002 loss=2.63, ppl=6.19, wps=5825.4, ups=0.09, wpb=64867, bsz=128, num_updates=6604, lr=9.99552e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=75979 2021-06-19 15:45:16 | INFO | train_inner | epoch 003: 644 / 3002 loss=2.731, ppl=6.64, wps=5787.4, ups=0.09, wpb=64791, bsz=128, num_updates=6605, lr=9.99552e-05, gnorm=2.213, loss_scale=2, train_wall=11, gb_free=2.8, wall=75990 2021-06-19 15:45:27 | INFO | train_inner | epoch 003: 645 / 3002 loss=2.576, ppl=5.96, wps=5866, ups=0.09, wpb=64863, bsz=128, num_updates=6606, lr=9.99551e-05, gnorm=2.569, loss_scale=2, train_wall=11, gb_free=2.8, wall=76001 2021-06-19 15:45:38 | INFO | train_inner | epoch 003: 646 / 3002 loss=2.62, ppl=6.15, wps=5934.1, ups=0.09, wpb=64910, bsz=128, num_updates=6607, lr=9.99551e-05, gnorm=2.265, loss_scale=2, train_wall=10, gb_free=2.8, wall=76012 2021-06-19 15:45:49 | INFO | train_inner | epoch 003: 647 / 3002 loss=2.652, ppl=6.28, wps=5912.3, ups=0.09, wpb=64719, bsz=128, num_updates=6608, lr=9.99551e-05, gnorm=2.086, loss_scale=2, train_wall=11, gb_free=2.8, wall=76023 2021-06-19 15:46:00 | INFO | train_inner | epoch 003: 648 / 3002 loss=2.625, ppl=6.17, wps=5750, ups=0.09, wpb=64778, bsz=128, num_updates=6609, lr=9.99551e-05, gnorm=2.049, loss_scale=2, train_wall=11, gb_free=2.8, wall=76035 2021-06-19 15:46:11 | INFO | train_inner | epoch 003: 649 / 3002 loss=2.749, ppl=6.72, wps=5825.1, ups=0.09, wpb=64916, bsz=128, num_updates=6610, lr=9.99551e-05, gnorm=2.739, loss_scale=2, train_wall=11, gb_free=2.8, wall=76046 2021-06-19 15:46:22 | INFO | train_inner | epoch 003: 650 / 3002 loss=2.809, ppl=7.01, wps=5944, ups=0.09, wpb=64865, bsz=128, num_updates=6611, lr=9.99551e-05, gnorm=2.148, loss_scale=2, train_wall=10, gb_free=2.8, wall=76057 2021-06-19 15:46:33 | INFO | train_inner | epoch 003: 651 / 3002 loss=2.542, ppl=5.82, wps=5900.9, ups=0.09, wpb=64819, bsz=128, num_updates=6612, lr=9.99551e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=76068 2021-06-19 15:46:45 | INFO | train_inner | epoch 003: 652 / 3002 loss=2.576, ppl=5.96, wps=5712.5, ups=0.09, wpb=64801, bsz=128, num_updates=6613, lr=9.99551e-05, gnorm=2.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=76079 2021-06-19 15:46:56 | INFO | train_inner | epoch 003: 653 / 3002 loss=2.725, ppl=6.61, wps=5855.6, ups=0.09, wpb=64855, bsz=128, num_updates=6614, lr=9.99551e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=76090 2021-06-19 15:47:07 | INFO | train_inner | epoch 003: 654 / 3002 loss=2.697, ppl=6.48, wps=5858.4, ups=0.09, wpb=64698, bsz=128, num_updates=6615, lr=9.99551e-05, gnorm=2.119, loss_scale=2, train_wall=11, gb_free=2.8, wall=76101 2021-06-19 15:47:18 | INFO | train_inner | epoch 003: 655 / 3002 loss=2.789, ppl=6.91, wps=5749.7, ups=0.09, wpb=64743, bsz=128, num_updates=6616, lr=9.99551e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=76112 2021-06-19 15:47:29 | INFO | train_inner | epoch 003: 656 / 3002 loss=2.721, ppl=6.59, wps=5764.2, ups=0.09, wpb=64715, bsz=128, num_updates=6617, lr=9.99551e-05, gnorm=8.004, loss_scale=2, train_wall=11, gb_free=2.8, wall=76124 2021-06-19 15:47:40 | INFO | train_inner | epoch 003: 657 / 3002 loss=2.665, ppl=6.34, wps=5898.6, ups=0.09, wpb=64864, bsz=128, num_updates=6618, lr=9.99551e-05, gnorm=2.24, loss_scale=2, train_wall=11, gb_free=2.8, wall=76135 2021-06-19 15:47:51 | INFO | train_inner | epoch 003: 658 / 3002 loss=2.62, ppl=6.15, wps=5873.4, ups=0.09, wpb=64812, bsz=128, num_updates=6619, lr=9.9955e-05, gnorm=2.177, loss_scale=2, train_wall=11, gb_free=2.8, wall=76146 2021-06-19 15:48:02 | INFO | train_inner | epoch 003: 659 / 3002 loss=2.764, ppl=6.79, wps=5942.7, ups=0.09, wpb=64857, bsz=128, num_updates=6620, lr=9.9955e-05, gnorm=2.269, loss_scale=2, train_wall=10, gb_free=2.8, wall=76157 2021-06-19 15:48:13 | INFO | train_inner | epoch 003: 660 / 3002 loss=2.557, ppl=5.89, wps=5820.9, ups=0.09, wpb=64819, bsz=128, num_updates=6621, lr=9.9955e-05, gnorm=2.253, loss_scale=2, train_wall=11, gb_free=2.8, wall=76168 2021-06-19 15:48:25 | INFO | train_inner | epoch 003: 661 / 3002 loss=2.756, ppl=6.75, wps=5753.9, ups=0.09, wpb=64760, bsz=128, num_updates=6622, lr=9.9955e-05, gnorm=2.689, loss_scale=2, train_wall=11, gb_free=2.8, wall=76179 2021-06-19 15:48:36 | INFO | train_inner | epoch 003: 662 / 3002 loss=2.677, ppl=6.4, wps=5933, ups=0.09, wpb=64881, bsz=128, num_updates=6623, lr=9.9955e-05, gnorm=2.324, loss_scale=2, train_wall=11, gb_free=2.8, wall=76190 2021-06-19 15:48:47 | INFO | train_inner | epoch 003: 663 / 3002 loss=2.693, ppl=6.47, wps=5682.6, ups=0.09, wpb=64771, bsz=128, num_updates=6624, lr=9.9955e-05, gnorm=2.554, loss_scale=2, train_wall=11, gb_free=2.8, wall=76201 2021-06-19 15:48:58 | INFO | train_inner | epoch 003: 664 / 3002 loss=2.717, ppl=6.57, wps=5877.2, ups=0.09, wpb=64844, bsz=128, num_updates=6625, lr=9.9955e-05, gnorm=2.756, loss_scale=2, train_wall=11, gb_free=2.8, wall=76212 2021-06-19 15:49:09 | INFO | train_inner | epoch 003: 665 / 3002 loss=2.718, ppl=6.58, wps=5849.6, ups=0.09, wpb=64807, bsz=128, num_updates=6626, lr=9.9955e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=76223 2021-06-19 15:49:20 | INFO | train_inner | epoch 003: 666 / 3002 loss=2.667, ppl=6.35, wps=5917.3, ups=0.09, wpb=64860, bsz=128, num_updates=6627, lr=9.9955e-05, gnorm=2.368, loss_scale=2, train_wall=10, gb_free=2.8, wall=76234 2021-06-19 15:49:31 | INFO | train_inner | epoch 003: 667 / 3002 loss=2.66, ppl=6.32, wps=5771.2, ups=0.09, wpb=64837, bsz=128, num_updates=6628, lr=9.9955e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=76246 2021-06-19 15:49:42 | INFO | train_inner | epoch 003: 668 / 3002 loss=2.659, ppl=6.32, wps=6027.7, ups=0.09, wpb=64918, bsz=128, num_updates=6629, lr=9.9955e-05, gnorm=2.184, loss_scale=4, train_wall=10, gb_free=2.8, wall=76256 2021-06-19 15:49:53 | INFO | train_inner | epoch 003: 669 / 3002 loss=2.593, ppl=6.03, wps=5780.2, ups=0.09, wpb=64755, bsz=128, num_updates=6630, lr=9.9955e-05, gnorm=2.54, loss_scale=4, train_wall=11, gb_free=2.8, wall=76268 2021-06-19 15:50:04 | INFO | train_inner | epoch 003: 670 / 3002 loss=2.62, ppl=6.15, wps=5751.3, ups=0.09, wpb=64744, bsz=128, num_updates=6631, lr=9.99549e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=76279 2021-06-19 15:50:16 | INFO | train_inner | epoch 003: 671 / 3002 loss=2.636, ppl=6.22, wps=5845.6, ups=0.09, wpb=64834, bsz=128, num_updates=6632, lr=9.99549e-05, gnorm=2.367, loss_scale=4, train_wall=11, gb_free=2.8, wall=76290 2021-06-19 15:50:27 | INFO | train_inner | epoch 003: 672 / 3002 loss=2.771, ppl=6.83, wps=5771.3, ups=0.09, wpb=64828, bsz=128, num_updates=6633, lr=9.99549e-05, gnorm=2.308, loss_scale=4, train_wall=11, gb_free=2.8, wall=76301 2021-06-19 15:50:38 | INFO | train_inner | epoch 003: 673 / 3002 loss=2.708, ppl=6.53, wps=5674.5, ups=0.09, wpb=64764, bsz=128, num_updates=6634, lr=9.99549e-05, gnorm=2.3, loss_scale=4, train_wall=11, gb_free=2.8, wall=76313 2021-06-19 15:50:49 | INFO | train_inner | epoch 003: 674 / 3002 loss=2.681, ppl=6.41, wps=5745.7, ups=0.09, wpb=64844, bsz=128, num_updates=6635, lr=9.99549e-05, gnorm=2.084, loss_scale=4, train_wall=11, gb_free=2.8, wall=76324 2021-06-19 15:51:00 | INFO | train_inner | epoch 003: 675 / 3002 loss=2.75, ppl=6.73, wps=5886.6, ups=0.09, wpb=64768, bsz=128, num_updates=6636, lr=9.99549e-05, gnorm=2.273, loss_scale=4, train_wall=11, gb_free=2.8, wall=76335 2021-06-19 15:51:12 | INFO | train_inner | epoch 003: 676 / 3002 loss=2.744, ppl=6.7, wps=5763.6, ups=0.09, wpb=64829, bsz=128, num_updates=6637, lr=9.99549e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=76346 2021-06-19 15:51:23 | INFO | train_inner | epoch 003: 677 / 3002 loss=2.721, ppl=6.59, wps=5811, ups=0.09, wpb=64842, bsz=128, num_updates=6638, lr=9.99549e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=76357 2021-06-19 15:51:34 | INFO | train_inner | epoch 003: 678 / 3002 loss=2.632, ppl=6.2, wps=5841.4, ups=0.09, wpb=64850, bsz=128, num_updates=6639, lr=9.99549e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=76368 2021-06-19 15:51:45 | INFO | train_inner | epoch 003: 679 / 3002 loss=2.502, ppl=5.66, wps=5823.8, ups=0.09, wpb=64831, bsz=128, num_updates=6640, lr=9.99549e-05, gnorm=2.152, loss_scale=4, train_wall=11, gb_free=2.8, wall=76379 2021-06-19 15:51:56 | INFO | train_inner | epoch 003: 680 / 3002 loss=2.767, ppl=6.81, wps=5864.3, ups=0.09, wpb=64810, bsz=128, num_updates=6641, lr=9.99549e-05, gnorm=2.462, loss_scale=4, train_wall=11, gb_free=2.8, wall=76391 2021-06-19 15:52:07 | INFO | train_inner | epoch 003: 681 / 3002 loss=2.721, ppl=6.59, wps=5803.6, ups=0.09, wpb=64848, bsz=128, num_updates=6642, lr=9.99549e-05, gnorm=2.291, loss_scale=4, train_wall=11, gb_free=2.8, wall=76402 2021-06-19 15:52:18 | INFO | train_inner | epoch 003: 682 / 3002 loss=2.593, ppl=6.03, wps=5912.7, ups=0.09, wpb=64789, bsz=128, num_updates=6643, lr=9.99549e-05, gnorm=2.696, loss_scale=4, train_wall=11, gb_free=2.8, wall=76413 2021-06-19 15:52:29 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-19 15:52:40 | INFO | train_inner | epoch 003: 684 / 3002 loss=2.667, ppl=6.35, wps=2952.5, ups=0.05, wpb=64775, bsz=128, num_updates=6644, lr=9.99548e-05, gnorm=2.201, loss_scale=2, train_wall=21, gb_free=2.8, wall=76435 2021-06-19 15:52:52 | INFO | train_inner | epoch 003: 685 / 3002 loss=2.625, ppl=6.17, wps=5767.7, ups=0.09, wpb=64864, bsz=128, num_updates=6645, lr=9.99548e-05, gnorm=2.195, loss_scale=2, train_wall=11, gb_free=2.8, wall=76446 2021-06-19 15:53:02 | INFO | train_inner | epoch 003: 686 / 3002 loss=2.631, ppl=6.19, wps=5960.4, ups=0.09, wpb=64836, bsz=128, num_updates=6646, lr=9.99548e-05, gnorm=2.201, loss_scale=2, train_wall=10, gb_free=2.8, wall=76457 2021-06-19 15:53:14 | INFO | train_inner | epoch 003: 687 / 3002 loss=2.655, ppl=6.3, wps=5746.2, ups=0.09, wpb=64797, bsz=128, num_updates=6647, lr=9.99548e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=76468 2021-06-19 15:53:25 | INFO | train_inner | epoch 003: 688 / 3002 loss=2.723, ppl=6.6, wps=5764.9, ups=0.09, wpb=64758, bsz=128, num_updates=6648, lr=9.99548e-05, gnorm=2.177, loss_scale=2, train_wall=11, gb_free=2.8, wall=76479 2021-06-19 15:53:36 | INFO | train_inner | epoch 003: 689 / 3002 loss=2.571, ppl=5.94, wps=5789.8, ups=0.09, wpb=64817, bsz=128, num_updates=6649, lr=9.99548e-05, gnorm=2.265, loss_scale=2, train_wall=11, gb_free=2.8, wall=76490 2021-06-19 15:53:47 | INFO | train_inner | epoch 003: 690 / 3002 loss=2.611, ppl=6.11, wps=5721.3, ups=0.09, wpb=64836, bsz=128, num_updates=6650, lr=9.99548e-05, gnorm=2.797, loss_scale=2, train_wall=11, gb_free=2.8, wall=76502 2021-06-19 15:53:59 | INFO | train_inner | epoch 003: 691 / 3002 loss=2.74, ppl=6.68, wps=5780.8, ups=0.09, wpb=64833, bsz=128, num_updates=6651, lr=9.99548e-05, gnorm=2.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=76513 2021-06-19 15:54:10 | INFO | train_inner | epoch 003: 692 / 3002 loss=2.675, ppl=6.39, wps=5946.1, ups=0.09, wpb=64867, bsz=128, num_updates=6652, lr=9.99548e-05, gnorm=2.2, loss_scale=2, train_wall=10, gb_free=2.8, wall=76524 2021-06-19 15:54:20 | INFO | train_inner | epoch 003: 693 / 3002 loss=2.632, ppl=6.2, wps=5926, ups=0.09, wpb=64888, bsz=128, num_updates=6653, lr=9.99548e-05, gnorm=4.985, loss_scale=2, train_wall=10, gb_free=2.8, wall=76535 2021-06-19 15:54:31 | INFO | train_inner | epoch 003: 694 / 3002 loss=2.57, ppl=5.94, wps=5912.5, ups=0.09, wpb=64758, bsz=128, num_updates=6654, lr=9.99548e-05, gnorm=3.179, loss_scale=2, train_wall=11, gb_free=2.8, wall=76546 2021-06-19 15:54:42 | INFO | train_inner | epoch 003: 695 / 3002 loss=2.79, ppl=6.92, wps=5870.8, ups=0.09, wpb=64886, bsz=128, num_updates=6655, lr=9.99548e-05, gnorm=2.153, loss_scale=2, train_wall=11, gb_free=2.8, wall=76557 2021-06-19 15:54:54 | INFO | train_inner | epoch 003: 696 / 3002 loss=2.59, ppl=6.02, wps=5760.7, ups=0.09, wpb=64846, bsz=128, num_updates=6656, lr=9.99547e-05, gnorm=2.259, loss_scale=2, train_wall=11, gb_free=2.8, wall=76568 2021-06-19 15:55:05 | INFO | train_inner | epoch 003: 697 / 3002 loss=2.568, ppl=5.93, wps=5777.7, ups=0.09, wpb=64825, bsz=128, num_updates=6657, lr=9.99547e-05, gnorm=2.282, loss_scale=2, train_wall=11, gb_free=2.8, wall=76579 2021-06-19 15:55:16 | INFO | train_inner | epoch 003: 698 / 3002 loss=2.598, ppl=6.05, wps=5849.9, ups=0.09, wpb=64881, bsz=128, num_updates=6658, lr=9.99547e-05, gnorm=2.23, loss_scale=2, train_wall=11, gb_free=2.8, wall=76590 2021-06-19 15:55:27 | INFO | train_inner | epoch 003: 699 / 3002 loss=2.631, ppl=6.2, wps=5734.9, ups=0.09, wpb=64781, bsz=128, num_updates=6659, lr=9.99547e-05, gnorm=2.273, loss_scale=2, train_wall=11, gb_free=2.8, wall=76602 2021-06-19 15:55:38 | INFO | train_inner | epoch 003: 700 / 3002 loss=2.488, ppl=5.61, wps=5872.1, ups=0.09, wpb=64847, bsz=128, num_updates=6660, lr=9.99547e-05, gnorm=2.239, loss_scale=2, train_wall=11, gb_free=2.8, wall=76613 2021-06-19 15:55:50 | INFO | train_inner | epoch 003: 701 / 3002 loss=2.666, ppl=6.35, wps=5789.9, ups=0.09, wpb=64846, bsz=128, num_updates=6661, lr=9.99547e-05, gnorm=2.363, loss_scale=2, train_wall=11, gb_free=2.8, wall=76624 2021-06-19 15:56:01 | INFO | train_inner | epoch 003: 702 / 3002 loss=2.715, ppl=6.57, wps=5775.1, ups=0.09, wpb=64692, bsz=128, num_updates=6662, lr=9.99547e-05, gnorm=2.2, loss_scale=2, train_wall=11, gb_free=2.8, wall=76635 2021-06-19 15:56:12 | INFO | train_inner | epoch 003: 703 / 3002 loss=2.709, ppl=6.54, wps=5778.7, ups=0.09, wpb=64876, bsz=128, num_updates=6663, lr=9.99547e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=76646 2021-06-19 15:56:23 | INFO | train_inner | epoch 003: 704 / 3002 loss=2.742, ppl=6.69, wps=5812.6, ups=0.09, wpb=64816, bsz=128, num_updates=6664, lr=9.99547e-05, gnorm=2.225, loss_scale=2, train_wall=11, gb_free=2.8, wall=76658 2021-06-19 15:56:34 | INFO | train_inner | epoch 003: 705 / 3002 loss=2.58, ppl=5.98, wps=5832.7, ups=0.09, wpb=64907, bsz=128, num_updates=6665, lr=9.99547e-05, gnorm=2.131, loss_scale=2, train_wall=11, gb_free=2.8, wall=76669 2021-06-19 15:56:45 | INFO | train_inner | epoch 003: 706 / 3002 loss=2.829, ppl=7.11, wps=5867.9, ups=0.09, wpb=64811, bsz=128, num_updates=6666, lr=9.99547e-05, gnorm=2.323, loss_scale=2, train_wall=11, gb_free=2.8, wall=76680 2021-06-19 15:56:56 | INFO | train_inner | epoch 003: 707 / 3002 loss=2.502, ppl=5.66, wps=5835.8, ups=0.09, wpb=64887, bsz=128, num_updates=6667, lr=9.99547e-05, gnorm=2.051, loss_scale=2, train_wall=11, gb_free=2.8, wall=76691 2021-06-19 15:57:07 | INFO | train_inner | epoch 003: 708 / 3002 loss=2.689, ppl=6.45, wps=5934, ups=0.09, wpb=64776, bsz=128, num_updates=6668, lr=9.99547e-05, gnorm=2.211, loss_scale=2, train_wall=10, gb_free=2.8, wall=76702 2021-06-19 15:57:19 | INFO | train_inner | epoch 003: 709 / 3002 loss=2.559, ppl=5.89, wps=5830.1, ups=0.09, wpb=64889, bsz=128, num_updates=6669, lr=9.99546e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=76713 2021-06-19 15:57:30 | INFO | train_inner | epoch 003: 710 / 3002 loss=2.742, ppl=6.69, wps=5888.8, ups=0.09, wpb=64849, bsz=128, num_updates=6670, lr=9.99546e-05, gnorm=2.275, loss_scale=2, train_wall=11, gb_free=2.8, wall=76724 2021-06-19 15:57:41 | INFO | train_inner | epoch 003: 711 / 3002 loss=2.774, ppl=6.84, wps=5825.5, ups=0.09, wpb=64794, bsz=128, num_updates=6671, lr=9.99546e-05, gnorm=2.369, loss_scale=2, train_wall=11, gb_free=2.8, wall=76735 2021-06-19 15:57:52 | INFO | train_inner | epoch 003: 712 / 3002 loss=2.642, ppl=6.24, wps=5887.6, ups=0.09, wpb=64828, bsz=128, num_updates=6672, lr=9.99546e-05, gnorm=2.174, loss_scale=2, train_wall=11, gb_free=2.8, wall=76746 2021-06-19 15:58:02 | INFO | train_inner | epoch 003: 713 / 3002 loss=2.665, ppl=6.34, wps=6020.6, ups=0.09, wpb=64920, bsz=128, num_updates=6673, lr=9.99546e-05, gnorm=2.1, loss_scale=2, train_wall=10, gb_free=2.8, wall=76757 2021-06-19 15:58:13 | INFO | train_inner | epoch 003: 714 / 3002 loss=2.684, ppl=6.43, wps=5894.4, ups=0.09, wpb=64866, bsz=128, num_updates=6674, lr=9.99546e-05, gnorm=2.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=76768 2021-06-19 15:58:24 | INFO | train_inner | epoch 003: 715 / 3002 loss=2.71, ppl=6.55, wps=5964.2, ups=0.09, wpb=64865, bsz=128, num_updates=6675, lr=9.99546e-05, gnorm=2.076, loss_scale=2, train_wall=10, gb_free=2.8, wall=76779 2021-06-19 15:58:36 | INFO | train_inner | epoch 003: 716 / 3002 loss=2.552, ppl=5.86, wps=5801.7, ups=0.09, wpb=64769, bsz=128, num_updates=6676, lr=9.99546e-05, gnorm=2.066, loss_scale=2, train_wall=11, gb_free=2.8, wall=76790 2021-06-19 15:58:46 | INFO | train_inner | epoch 003: 717 / 3002 loss=2.534, ppl=5.79, wps=5976.8, ups=0.09, wpb=64779, bsz=128, num_updates=6677, lr=9.99546e-05, gnorm=2.078, loss_scale=2, train_wall=10, gb_free=2.8, wall=76801 2021-06-19 15:58:58 | INFO | train_inner | epoch 003: 718 / 3002 loss=2.683, ppl=6.42, wps=5750.6, ups=0.09, wpb=64790, bsz=128, num_updates=6678, lr=9.99546e-05, gnorm=2.246, loss_scale=2, train_wall=11, gb_free=2.8, wall=76812 2021-06-19 15:59:09 | INFO | train_inner | epoch 003: 719 / 3002 loss=2.612, ppl=6.12, wps=5832, ups=0.09, wpb=64886, bsz=128, num_updates=6679, lr=9.99546e-05, gnorm=2.154, loss_scale=2, train_wall=11, gb_free=2.8, wall=76823 2021-06-19 15:59:20 | INFO | train_inner | epoch 003: 720 / 3002 loss=2.892, ppl=7.42, wps=5986.6, ups=0.09, wpb=64803, bsz=128, num_updates=6680, lr=9.99546e-05, gnorm=2.167, loss_scale=2, train_wall=10, gb_free=2.8, wall=76834 2021-06-19 15:59:31 | INFO | train_inner | epoch 003: 721 / 3002 loss=2.655, ppl=6.3, wps=5802, ups=0.09, wpb=64790, bsz=128, num_updates=6681, lr=9.99545e-05, gnorm=2.05, loss_scale=2, train_wall=11, gb_free=2.8, wall=76845 2021-06-19 15:59:42 | INFO | train_inner | epoch 003: 722 / 3002 loss=2.692, ppl=6.46, wps=6014, ups=0.09, wpb=64930, bsz=128, num_updates=6682, lr=9.99545e-05, gnorm=2.252, loss_scale=2, train_wall=10, gb_free=2.8, wall=76856 2021-06-19 15:59:52 | INFO | train_inner | epoch 003: 723 / 3002 loss=2.812, ppl=7.02, wps=5996.3, ups=0.09, wpb=64927, bsz=128, num_updates=6683, lr=9.99545e-05, gnorm=2.197, loss_scale=2, train_wall=10, gb_free=2.8, wall=76867 2021-06-19 16:00:03 | INFO | train_inner | epoch 003: 724 / 3002 loss=2.587, ppl=6.01, wps=5816.9, ups=0.09, wpb=64785, bsz=128, num_updates=6684, lr=9.99545e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=76878 2021-06-19 16:00:15 | INFO | train_inner | epoch 003: 725 / 3002 loss=2.628, ppl=6.18, wps=5849.8, ups=0.09, wpb=64822, bsz=128, num_updates=6685, lr=9.99545e-05, gnorm=2.173, loss_scale=2, train_wall=11, gb_free=2.8, wall=76889 2021-06-19 16:00:26 | INFO | train_inner | epoch 003: 726 / 3002 loss=2.572, ppl=5.95, wps=5836.1, ups=0.09, wpb=64810, bsz=128, num_updates=6686, lr=9.99545e-05, gnorm=2.267, loss_scale=2, train_wall=11, gb_free=2.8, wall=76900 2021-06-19 16:00:37 | INFO | train_inner | epoch 003: 727 / 3002 loss=2.59, ppl=6.02, wps=5766.3, ups=0.09, wpb=64866, bsz=128, num_updates=6687, lr=9.99545e-05, gnorm=2.136, loss_scale=2, train_wall=11, gb_free=2.8, wall=76911 2021-06-19 16:00:48 | INFO | train_inner | epoch 003: 728 / 3002 loss=2.641, ppl=6.24, wps=5834.3, ups=0.09, wpb=64854, bsz=128, num_updates=6688, lr=9.99545e-05, gnorm=2.154, loss_scale=2, train_wall=11, gb_free=2.8, wall=76922 2021-06-19 16:00:59 | INFO | train_inner | epoch 003: 729 / 3002 loss=2.626, ppl=6.17, wps=5860.1, ups=0.09, wpb=64879, bsz=128, num_updates=6689, lr=9.99545e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=76933 2021-06-19 16:01:10 | INFO | train_inner | epoch 003: 730 / 3002 loss=2.708, ppl=6.54, wps=5870.4, ups=0.09, wpb=64879, bsz=128, num_updates=6690, lr=9.99545e-05, gnorm=2.13, loss_scale=2, train_wall=11, gb_free=2.8, wall=76944 2021-06-19 16:01:21 | INFO | train_inner | epoch 003: 731 / 3002 loss=2.574, ppl=5.95, wps=5923.9, ups=0.09, wpb=64832, bsz=128, num_updates=6691, lr=9.99545e-05, gnorm=2.999, loss_scale=2, train_wall=11, gb_free=2.8, wall=76955 2021-06-19 16:01:32 | INFO | train_inner | epoch 003: 732 / 3002 loss=2.566, ppl=5.92, wps=5975, ups=0.09, wpb=64786, bsz=128, num_updates=6692, lr=9.99545e-05, gnorm=2.185, loss_scale=2, train_wall=10, gb_free=2.8, wall=76966 2021-06-19 16:01:43 | INFO | train_inner | epoch 003: 733 / 3002 loss=2.781, ppl=6.87, wps=5874.5, ups=0.09, wpb=64845, bsz=128, num_updates=6693, lr=9.99545e-05, gnorm=2.144, loss_scale=2, train_wall=11, gb_free=2.8, wall=76977 2021-06-19 16:01:54 | INFO | train_inner | epoch 003: 734 / 3002 loss=2.591, ppl=6.03, wps=5804.9, ups=0.09, wpb=64879, bsz=128, num_updates=6694, lr=9.99544e-05, gnorm=2.125, loss_scale=2, train_wall=11, gb_free=2.8, wall=76989 2021-06-19 16:02:05 | INFO | train_inner | epoch 003: 735 / 3002 loss=2.547, ppl=5.84, wps=5901.8, ups=0.09, wpb=64865, bsz=128, num_updates=6695, lr=9.99544e-05, gnorm=2.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=76999 2021-06-19 16:02:16 | INFO | train_inner | epoch 003: 736 / 3002 loss=2.515, ppl=5.72, wps=5864.9, ups=0.09, wpb=64817, bsz=128, num_updates=6696, lr=9.99544e-05, gnorm=2.248, loss_scale=2, train_wall=11, gb_free=2.8, wall=77011 2021-06-19 16:02:27 | INFO | train_inner | epoch 003: 737 / 3002 loss=2.759, ppl=6.77, wps=5829.4, ups=0.09, wpb=64737, bsz=128, num_updates=6697, lr=9.99544e-05, gnorm=2.184, loss_scale=2, train_wall=11, gb_free=2.8, wall=77022 2021-06-19 16:02:38 | INFO | train_inner | epoch 003: 738 / 3002 loss=2.702, ppl=6.51, wps=5837.8, ups=0.09, wpb=64881, bsz=128, num_updates=6698, lr=9.99544e-05, gnorm=2.165, loss_scale=2, train_wall=11, gb_free=2.8, wall=77033 2021-06-19 16:02:50 | INFO | train_inner | epoch 003: 739 / 3002 loss=2.83, ppl=7.11, wps=5851.8, ups=0.09, wpb=64836, bsz=128, num_updates=6699, lr=9.99544e-05, gnorm=2.16, loss_scale=2, train_wall=11, gb_free=2.8, wall=77044 2021-06-19 16:03:01 | INFO | train_inner | epoch 003: 740 / 3002 loss=2.673, ppl=6.38, wps=5773, ups=0.09, wpb=64878, bsz=128, num_updates=6700, lr=9.99544e-05, gnorm=2.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=77055 2021-06-19 16:03:12 | INFO | train_inner | epoch 003: 741 / 3002 loss=2.694, ppl=6.47, wps=5855.9, ups=0.09, wpb=64816, bsz=128, num_updates=6701, lr=9.99544e-05, gnorm=2.185, loss_scale=2, train_wall=11, gb_free=2.8, wall=77066 2021-06-19 16:03:23 | INFO | train_inner | epoch 003: 742 / 3002 loss=2.637, ppl=6.22, wps=5744.9, ups=0.09, wpb=64894, bsz=128, num_updates=6702, lr=9.99544e-05, gnorm=2.198, loss_scale=2, train_wall=11, gb_free=2.8, wall=77077 2021-06-19 16:03:34 | INFO | train_inner | epoch 003: 743 / 3002 loss=2.704, ppl=6.51, wps=5849.3, ups=0.09, wpb=64810, bsz=128, num_updates=6703, lr=9.99544e-05, gnorm=2.205, loss_scale=2, train_wall=11, gb_free=2.8, wall=77089 2021-06-19 16:03:45 | INFO | train_inner | epoch 003: 744 / 3002 loss=2.663, ppl=6.33, wps=5835.5, ups=0.09, wpb=64916, bsz=128, num_updates=6704, lr=9.99544e-05, gnorm=2.095, loss_scale=2, train_wall=11, gb_free=2.8, wall=77100 2021-06-19 16:03:56 | INFO | train_inner | epoch 003: 745 / 3002 loss=2.579, ppl=5.97, wps=5983.2, ups=0.09, wpb=64846, bsz=128, num_updates=6705, lr=9.99544e-05, gnorm=2.152, loss_scale=2, train_wall=10, gb_free=2.8, wall=77110 2021-06-19 16:04:07 | INFO | train_inner | epoch 003: 746 / 3002 loss=2.662, ppl=6.33, wps=5782.1, ups=0.09, wpb=64827, bsz=128, num_updates=6706, lr=9.99543e-05, gnorm=5.654, loss_scale=2, train_wall=11, gb_free=2.8, wall=77122 2021-06-19 16:04:19 | INFO | train_inner | epoch 003: 747 / 3002 loss=2.632, ppl=6.2, wps=5740.8, ups=0.09, wpb=64927, bsz=128, num_updates=6707, lr=9.99543e-05, gnorm=6.008, loss_scale=2, train_wall=11, gb_free=2.8, wall=77133 2021-06-19 16:04:30 | INFO | train_inner | epoch 003: 748 / 3002 loss=2.706, ppl=6.52, wps=5843.4, ups=0.09, wpb=64805, bsz=128, num_updates=6708, lr=9.99543e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=77144 2021-06-19 16:04:41 | INFO | train_inner | epoch 003: 749 / 3002 loss=2.636, ppl=6.21, wps=5766.3, ups=0.09, wpb=64798, bsz=128, num_updates=6709, lr=9.99543e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=77155 2021-06-19 16:04:52 | INFO | train_inner | epoch 003: 750 / 3002 loss=2.642, ppl=6.24, wps=5773.1, ups=0.09, wpb=64813, bsz=128, num_updates=6710, lr=9.99543e-05, gnorm=2.141, loss_scale=2, train_wall=11, gb_free=2.8, wall=77167 2021-06-19 16:05:04 | INFO | train_inner | epoch 003: 751 / 3002 loss=2.497, ppl=5.64, wps=5743.4, ups=0.09, wpb=64776, bsz=128, num_updates=6711, lr=9.99543e-05, gnorm=2.264, loss_scale=2, train_wall=11, gb_free=2.8, wall=77178 2021-06-19 16:05:15 | INFO | train_inner | epoch 003: 752 / 3002 loss=2.656, ppl=6.3, wps=5877, ups=0.09, wpb=64804, bsz=128, num_updates=6712, lr=9.99543e-05, gnorm=3.183, loss_scale=2, train_wall=11, gb_free=2.8, wall=77189 2021-06-19 16:05:26 | INFO | train_inner | epoch 003: 753 / 3002 loss=2.689, ppl=6.45, wps=5859.1, ups=0.09, wpb=64911, bsz=128, num_updates=6713, lr=9.99543e-05, gnorm=2.409, loss_scale=2, train_wall=11, gb_free=2.8, wall=77200 2021-06-19 16:05:37 | INFO | train_inner | epoch 003: 754 / 3002 loss=2.89, ppl=7.41, wps=5758.8, ups=0.09, wpb=64833, bsz=128, num_updates=6714, lr=9.99543e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=77211 2021-06-19 16:05:48 | INFO | train_inner | epoch 003: 755 / 3002 loss=2.781, ppl=6.88, wps=5752.3, ups=0.09, wpb=64812, bsz=128, num_updates=6715, lr=9.99543e-05, gnorm=2.235, loss_scale=2, train_wall=11, gb_free=2.8, wall=77222 2021-06-19 16:05:59 | INFO | train_inner | epoch 003: 756 / 3002 loss=2.651, ppl=6.28, wps=5795.8, ups=0.09, wpb=64862, bsz=128, num_updates=6716, lr=9.99543e-05, gnorm=2.313, loss_scale=2, train_wall=11, gb_free=2.8, wall=77234 2021-06-19 16:06:10 | INFO | train_inner | epoch 003: 757 / 3002 loss=2.584, ppl=6, wps=5878.8, ups=0.09, wpb=64832, bsz=128, num_updates=6717, lr=9.99543e-05, gnorm=2.135, loss_scale=2, train_wall=11, gb_free=2.8, wall=77245 2021-06-19 16:06:22 | INFO | train_inner | epoch 003: 758 / 3002 loss=2.709, ppl=6.54, wps=5820.6, ups=0.09, wpb=64865, bsz=128, num_updates=6718, lr=9.99543e-05, gnorm=2.162, loss_scale=2, train_wall=11, gb_free=2.8, wall=77256 2021-06-19 16:06:32 | INFO | train_inner | epoch 003: 759 / 3002 loss=2.723, ppl=6.6, wps=5908.3, ups=0.09, wpb=64873, bsz=128, num_updates=6719, lr=9.99542e-05, gnorm=2.625, loss_scale=2, train_wall=11, gb_free=2.8, wall=77267 2021-06-19 16:06:44 | INFO | train_inner | epoch 003: 760 / 3002 loss=2.697, ppl=6.49, wps=5843.4, ups=0.09, wpb=64790, bsz=128, num_updates=6720, lr=9.99542e-05, gnorm=2.358, loss_scale=2, train_wall=11, gb_free=2.8, wall=77278 2021-06-19 16:06:54 | INFO | train_inner | epoch 003: 761 / 3002 loss=2.611, ppl=6.11, wps=5960.9, ups=0.09, wpb=64840, bsz=128, num_updates=6721, lr=9.99542e-05, gnorm=2.151, loss_scale=2, train_wall=10, gb_free=2.8, wall=77289 2021-06-19 16:07:06 | INFO | train_inner | epoch 003: 762 / 3002 loss=2.654, ppl=6.29, wps=5821.7, ups=0.09, wpb=64787, bsz=128, num_updates=6722, lr=9.99542e-05, gnorm=2.242, loss_scale=2, train_wall=11, gb_free=2.8, wall=77300 2021-06-19 16:07:17 | INFO | train_inner | epoch 003: 763 / 3002 loss=2.679, ppl=6.4, wps=5862.6, ups=0.09, wpb=64757, bsz=128, num_updates=6723, lr=9.99542e-05, gnorm=2.234, loss_scale=2, train_wall=11, gb_free=2.8, wall=77311 2021-06-19 16:07:28 | INFO | train_inner | epoch 003: 764 / 3002 loss=2.667, ppl=6.35, wps=5928.6, ups=0.09, wpb=64760, bsz=128, num_updates=6724, lr=9.99542e-05, gnorm=2.131, loss_scale=2, train_wall=10, gb_free=2.8, wall=77322 2021-06-19 16:07:39 | INFO | train_inner | epoch 003: 765 / 3002 loss=2.665, ppl=6.34, wps=5796.8, ups=0.09, wpb=64924, bsz=128, num_updates=6725, lr=9.99542e-05, gnorm=3.499, loss_scale=2, train_wall=11, gb_free=2.8, wall=77333 2021-06-19 16:07:50 | INFO | train_inner | epoch 003: 766 / 3002 loss=2.705, ppl=6.52, wps=5826.3, ups=0.09, wpb=64830, bsz=128, num_updates=6726, lr=9.99542e-05, gnorm=2.154, loss_scale=2, train_wall=11, gb_free=2.8, wall=77344 2021-06-19 16:08:01 | INFO | train_inner | epoch 003: 767 / 3002 loss=2.528, ppl=5.77, wps=5876.4, ups=0.09, wpb=64878, bsz=128, num_updates=6727, lr=9.99542e-05, gnorm=2.503, loss_scale=2, train_wall=11, gb_free=2.8, wall=77355 2021-06-19 16:08:12 | INFO | train_inner | epoch 003: 768 / 3002 loss=2.742, ppl=6.69, wps=5814.1, ups=0.09, wpb=64785, bsz=128, num_updates=6728, lr=9.99542e-05, gnorm=2.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=77366 2021-06-19 16:08:23 | INFO | train_inner | epoch 003: 769 / 3002 loss=2.624, ppl=6.16, wps=5846.5, ups=0.09, wpb=64851, bsz=128, num_updates=6729, lr=9.99542e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=77377 2021-06-19 16:08:34 | INFO | train_inner | epoch 003: 770 / 3002 loss=2.739, ppl=6.68, wps=5827.9, ups=0.09, wpb=64894, bsz=128, num_updates=6730, lr=9.99542e-05, gnorm=7.585, loss_scale=2, train_wall=11, gb_free=2.8, wall=77389 2021-06-19 16:08:45 | INFO | train_inner | epoch 003: 771 / 3002 loss=2.557, ppl=5.89, wps=5915.2, ups=0.09, wpb=64864, bsz=128, num_updates=6731, lr=9.99541e-05, gnorm=2.055, loss_scale=2, train_wall=11, gb_free=2.8, wall=77400 2021-06-19 16:08:56 | INFO | train_inner | epoch 003: 772 / 3002 loss=2.632, ppl=6.2, wps=5983, ups=0.09, wpb=64854, bsz=128, num_updates=6732, lr=9.99541e-05, gnorm=2.297, loss_scale=2, train_wall=10, gb_free=2.8, wall=77410 2021-06-19 16:09:07 | INFO | train_inner | epoch 003: 773 / 3002 loss=2.637, ppl=6.22, wps=5801, ups=0.09, wpb=64844, bsz=128, num_updates=6733, lr=9.99541e-05, gnorm=2.431, loss_scale=2, train_wall=11, gb_free=2.8, wall=77422 2021-06-19 16:09:18 | INFO | train_inner | epoch 003: 774 / 3002 loss=2.628, ppl=6.18, wps=5872, ups=0.09, wpb=64782, bsz=128, num_updates=6734, lr=9.99541e-05, gnorm=2.365, loss_scale=2, train_wall=11, gb_free=2.8, wall=77433 2021-06-19 16:09:29 | INFO | train_inner | epoch 003: 775 / 3002 loss=2.585, ppl=6, wps=5807.8, ups=0.09, wpb=64809, bsz=128, num_updates=6735, lr=9.99541e-05, gnorm=2.745, loss_scale=2, train_wall=11, gb_free=2.8, wall=77444 2021-06-19 16:09:40 | INFO | train_inner | epoch 003: 776 / 3002 loss=2.68, ppl=6.41, wps=5902.7, ups=0.09, wpb=64761, bsz=128, num_updates=6736, lr=9.99541e-05, gnorm=2.238, loss_scale=2, train_wall=11, gb_free=2.8, wall=77455 2021-06-19 16:09:52 | INFO | train_inner | epoch 003: 777 / 3002 loss=2.624, ppl=6.17, wps=5855.9, ups=0.09, wpb=64876, bsz=128, num_updates=6737, lr=9.99541e-05, gnorm=2.523, loss_scale=2, train_wall=11, gb_free=2.8, wall=77466 2021-06-19 16:10:03 | INFO | train_inner | epoch 003: 778 / 3002 loss=2.701, ppl=6.5, wps=5862.8, ups=0.09, wpb=64813, bsz=128, num_updates=6738, lr=9.99541e-05, gnorm=2.354, loss_scale=2, train_wall=11, gb_free=2.8, wall=77477 2021-06-19 16:10:14 | INFO | train_inner | epoch 003: 779 / 3002 loss=2.714, ppl=6.56, wps=5792.6, ups=0.09, wpb=64853, bsz=128, num_updates=6739, lr=9.99541e-05, gnorm=2.073, loss_scale=2, train_wall=11, gb_free=2.8, wall=77488 2021-06-19 16:10:25 | INFO | train_inner | epoch 003: 780 / 3002 loss=2.677, ppl=6.39, wps=5749.1, ups=0.09, wpb=64764, bsz=128, num_updates=6740, lr=9.99541e-05, gnorm=4.097, loss_scale=2, train_wall=11, gb_free=2.8, wall=77499 2021-06-19 16:10:36 | INFO | train_inner | epoch 003: 781 / 3002 loss=2.844, ppl=7.18, wps=5926.5, ups=0.09, wpb=64808, bsz=128, num_updates=6741, lr=9.99541e-05, gnorm=2.213, loss_scale=2, train_wall=10, gb_free=2.8, wall=77510 2021-06-19 16:10:47 | INFO | train_inner | epoch 003: 782 / 3002 loss=2.593, ppl=6.03, wps=5987.9, ups=0.09, wpb=64866, bsz=128, num_updates=6742, lr=9.99541e-05, gnorm=2.08, loss_scale=2, train_wall=10, gb_free=2.8, wall=77521 2021-06-19 16:10:58 | INFO | train_inner | epoch 003: 783 / 3002 loss=2.511, ppl=5.7, wps=5787.7, ups=0.09, wpb=64913, bsz=128, num_updates=6743, lr=9.99541e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=77532 2021-06-19 16:11:09 | INFO | train_inner | epoch 003: 784 / 3002 loss=2.71, ppl=6.54, wps=5905.9, ups=0.09, wpb=64888, bsz=128, num_updates=6744, lr=9.9954e-05, gnorm=2.212, loss_scale=2, train_wall=11, gb_free=2.8, wall=77543 2021-06-19 16:11:20 | INFO | train_inner | epoch 003: 785 / 3002 loss=2.691, ppl=6.46, wps=5828, ups=0.09, wpb=64826, bsz=128, num_updates=6745, lr=9.9954e-05, gnorm=4.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=77554 2021-06-19 16:11:31 | INFO | train_inner | epoch 003: 786 / 3002 loss=2.658, ppl=6.31, wps=5908.6, ups=0.09, wpb=64778, bsz=128, num_updates=6746, lr=9.9954e-05, gnorm=2.463, loss_scale=2, train_wall=10, gb_free=2.8, wall=77565 2021-06-19 16:11:42 | INFO | train_inner | epoch 003: 787 / 3002 loss=2.73, ppl=6.63, wps=5784.7, ups=0.09, wpb=64772, bsz=128, num_updates=6747, lr=9.9954e-05, gnorm=2.196, loss_scale=2, train_wall=11, gb_free=2.8, wall=77577 2021-06-19 16:11:54 | INFO | train_inner | epoch 003: 788 / 3002 loss=2.572, ppl=5.95, wps=5768.2, ups=0.09, wpb=64879, bsz=128, num_updates=6748, lr=9.9954e-05, gnorm=2.16, loss_scale=2, train_wall=11, gb_free=2.8, wall=77588 2021-06-19 16:12:05 | INFO | train_inner | epoch 003: 789 / 3002 loss=2.608, ppl=6.1, wps=5825.3, ups=0.09, wpb=64724, bsz=128, num_updates=6749, lr=9.9954e-05, gnorm=2.123, loss_scale=2, train_wall=11, gb_free=2.8, wall=77599 2021-06-19 16:12:16 | INFO | train_inner | epoch 003: 790 / 3002 loss=2.774, ppl=6.84, wps=5766.7, ups=0.09, wpb=64880, bsz=128, num_updates=6750, lr=9.9954e-05, gnorm=2.139, loss_scale=2, train_wall=11, gb_free=2.8, wall=77610 2021-06-19 16:12:27 | INFO | train_inner | epoch 003: 791 / 3002 loss=2.616, ppl=6.13, wps=5886.1, ups=0.09, wpb=64819, bsz=128, num_updates=6751, lr=9.9954e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=77621 2021-06-19 16:12:38 | INFO | train_inner | epoch 003: 792 / 3002 loss=2.587, ppl=6.01, wps=5852.5, ups=0.09, wpb=64717, bsz=128, num_updates=6752, lr=9.9954e-05, gnorm=2.109, loss_scale=2, train_wall=11, gb_free=2.8, wall=77632 2021-06-19 16:12:49 | INFO | train_inner | epoch 003: 793 / 3002 loss=2.806, ppl=6.99, wps=5852.8, ups=0.09, wpb=64890, bsz=128, num_updates=6753, lr=9.9954e-05, gnorm=3.43, loss_scale=2, train_wall=11, gb_free=2.8, wall=77643 2021-06-19 16:13:00 | INFO | train_inner | epoch 003: 794 / 3002 loss=2.682, ppl=6.42, wps=5864.8, ups=0.09, wpb=64861, bsz=128, num_updates=6754, lr=9.9954e-05, gnorm=7.251, loss_scale=2, train_wall=11, gb_free=2.8, wall=77654 2021-06-19 16:13:11 | INFO | train_inner | epoch 003: 795 / 3002 loss=2.546, ppl=5.84, wps=5785.3, ups=0.09, wpb=64805, bsz=128, num_updates=6755, lr=9.9954e-05, gnorm=2.174, loss_scale=2, train_wall=11, gb_free=2.8, wall=77666 2021-06-19 16:13:23 | INFO | train_inner | epoch 003: 796 / 3002 loss=2.647, ppl=6.26, wps=5726.7, ups=0.09, wpb=64724, bsz=128, num_updates=6756, lr=9.99539e-05, gnorm=2.378, loss_scale=2, train_wall=11, gb_free=2.8, wall=77677 2021-06-19 16:13:34 | INFO | train_inner | epoch 003: 797 / 3002 loss=2.727, ppl=6.62, wps=5768, ups=0.09, wpb=64816, bsz=128, num_updates=6757, lr=9.99539e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=77688 2021-06-19 16:13:45 | INFO | train_inner | epoch 003: 798 / 3002 loss=2.704, ppl=6.51, wps=5813.7, ups=0.09, wpb=64753, bsz=128, num_updates=6758, lr=9.99539e-05, gnorm=2.179, loss_scale=2, train_wall=11, gb_free=2.8, wall=77699 2021-06-19 16:13:56 | INFO | train_inner | epoch 003: 799 / 3002 loss=2.736, ppl=6.66, wps=5876.3, ups=0.09, wpb=64901, bsz=128, num_updates=6759, lr=9.99539e-05, gnorm=2.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=77710 2021-06-19 16:14:07 | INFO | train_inner | epoch 003: 800 / 3002 loss=2.581, ppl=5.98, wps=5848, ups=0.09, wpb=64849, bsz=128, num_updates=6760, lr=9.99539e-05, gnorm=2.091, loss_scale=2, train_wall=11, gb_free=2.8, wall=77721 2021-06-19 16:14:18 | INFO | train_inner | epoch 003: 801 / 3002 loss=2.588, ppl=6.01, wps=5883.9, ups=0.09, wpb=64863, bsz=128, num_updates=6761, lr=9.99539e-05, gnorm=2.129, loss_scale=2, train_wall=11, gb_free=2.8, wall=77732 2021-06-19 16:14:29 | INFO | train_inner | epoch 003: 802 / 3002 loss=2.659, ppl=6.32, wps=5821.3, ups=0.09, wpb=64741, bsz=128, num_updates=6762, lr=9.99539e-05, gnorm=2.193, loss_scale=2, train_wall=11, gb_free=2.8, wall=77744 2021-06-19 16:14:40 | INFO | train_inner | epoch 003: 803 / 3002 loss=2.652, ppl=6.28, wps=5855.1, ups=0.09, wpb=64777, bsz=128, num_updates=6763, lr=9.99539e-05, gnorm=2.109, loss_scale=2, train_wall=11, gb_free=2.8, wall=77755 2021-06-19 16:14:51 | INFO | train_inner | epoch 003: 804 / 3002 loss=2.719, ppl=6.59, wps=5843, ups=0.09, wpb=64883, bsz=128, num_updates=6764, lr=9.99539e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=77766 2021-06-19 16:15:02 | INFO | train_inner | epoch 003: 805 / 3002 loss=2.728, ppl=6.63, wps=5930.3, ups=0.09, wpb=64788, bsz=128, num_updates=6765, lr=9.99539e-05, gnorm=2.128, loss_scale=2, train_wall=10, gb_free=2.8, wall=77777 2021-06-19 16:15:14 | INFO | train_inner | epoch 003: 806 / 3002 loss=2.612, ppl=6.11, wps=5823.1, ups=0.09, wpb=64865, bsz=128, num_updates=6766, lr=9.99539e-05, gnorm=2.164, loss_scale=2, train_wall=11, gb_free=2.8, wall=77788 2021-06-19 16:15:25 | INFO | train_inner | epoch 003: 807 / 3002 loss=2.471, ppl=5.54, wps=5736, ups=0.09, wpb=64759, bsz=128, num_updates=6767, lr=9.99539e-05, gnorm=2.239, loss_scale=2, train_wall=11, gb_free=2.8, wall=77799 2021-06-19 16:15:36 | INFO | train_inner | epoch 003: 808 / 3002 loss=2.527, ppl=5.76, wps=5892.2, ups=0.09, wpb=64902, bsz=128, num_updates=6768, lr=9.99539e-05, gnorm=2.569, loss_scale=2, train_wall=11, gb_free=2.8, wall=77810 2021-06-19 16:15:47 | INFO | train_inner | epoch 003: 809 / 3002 loss=2.6, ppl=6.06, wps=5791.9, ups=0.09, wpb=64823, bsz=128, num_updates=6769, lr=9.99538e-05, gnorm=2.221, loss_scale=2, train_wall=11, gb_free=2.8, wall=77821 2021-06-19 16:15:58 | INFO | train_inner | epoch 003: 810 / 3002 loss=2.584, ppl=6, wps=5921.7, ups=0.09, wpb=64907, bsz=128, num_updates=6770, lr=9.99538e-05, gnorm=2.21, loss_scale=2, train_wall=10, gb_free=2.8, wall=77832 2021-06-19 16:16:09 | INFO | train_inner | epoch 003: 811 / 3002 loss=2.669, ppl=6.36, wps=5775.3, ups=0.09, wpb=64787, bsz=128, num_updates=6771, lr=9.99538e-05, gnorm=2.456, loss_scale=4, train_wall=11, gb_free=2.8, wall=77844 2021-06-19 16:16:20 | INFO | train_inner | epoch 003: 812 / 3002 loss=2.697, ppl=6.48, wps=5784.1, ups=0.09, wpb=64743, bsz=128, num_updates=6772, lr=9.99538e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=77855 2021-06-19 16:16:32 | INFO | train_inner | epoch 003: 813 / 3002 loss=2.751, ppl=6.73, wps=5696, ups=0.09, wpb=64699, bsz=128, num_updates=6773, lr=9.99538e-05, gnorm=2.135, loss_scale=4, train_wall=11, gb_free=2.8, wall=77866 2021-06-19 16:16:43 | INFO | train_inner | epoch 003: 814 / 3002 loss=2.802, ppl=6.97, wps=5874, ups=0.09, wpb=64946, bsz=128, num_updates=6774, lr=9.99538e-05, gnorm=2.209, loss_scale=4, train_wall=11, gb_free=2.8, wall=77877 2021-06-19 16:16:54 | INFO | train_inner | epoch 003: 815 / 3002 loss=2.514, ppl=5.71, wps=5839.1, ups=0.09, wpb=64840, bsz=128, num_updates=6775, lr=9.99538e-05, gnorm=2.075, loss_scale=4, train_wall=11, gb_free=2.8, wall=77888 2021-06-19 16:17:05 | INFO | train_inner | epoch 003: 816 / 3002 loss=2.68, ppl=6.41, wps=5854.1, ups=0.09, wpb=64867, bsz=128, num_updates=6776, lr=9.99538e-05, gnorm=13.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=77899 2021-06-19 16:17:16 | INFO | train_inner | epoch 003: 817 / 3002 loss=2.873, ppl=7.32, wps=5897.9, ups=0.09, wpb=64847, bsz=128, num_updates=6777, lr=9.99538e-05, gnorm=2.162, loss_scale=4, train_wall=11, gb_free=2.8, wall=77910 2021-06-19 16:17:27 | INFO | train_inner | epoch 003: 818 / 3002 loss=2.572, ppl=5.95, wps=5807.2, ups=0.09, wpb=64816, bsz=128, num_updates=6778, lr=9.99538e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=77921 2021-06-19 16:17:38 | INFO | train_inner | epoch 003: 819 / 3002 loss=2.604, ppl=6.08, wps=5923.4, ups=0.09, wpb=64880, bsz=128, num_updates=6779, lr=9.99538e-05, gnorm=2.146, loss_scale=4, train_wall=10, gb_free=2.8, wall=77932 2021-06-19 16:17:49 | INFO | train_inner | epoch 003: 820 / 3002 loss=2.698, ppl=6.49, wps=5828.5, ups=0.09, wpb=64759, bsz=128, num_updates=6780, lr=9.99538e-05, gnorm=2.161, loss_scale=4, train_wall=11, gb_free=2.8, wall=77944 2021-06-19 16:18:00 | INFO | train_inner | epoch 003: 821 / 3002 loss=2.716, ppl=6.57, wps=5829.2, ups=0.09, wpb=64845, bsz=128, num_updates=6781, lr=9.99537e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=77955 2021-06-19 16:18:12 | INFO | train_inner | epoch 003: 822 / 3002 loss=2.582, ppl=5.99, wps=5744, ups=0.09, wpb=64837, bsz=128, num_updates=6782, lr=9.99537e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=77966 2021-06-19 16:18:23 | INFO | train_inner | epoch 003: 823 / 3002 loss=2.678, ppl=6.4, wps=5802.4, ups=0.09, wpb=64842, bsz=128, num_updates=6783, lr=9.99537e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=77977 2021-06-19 16:18:34 | INFO | train_inner | epoch 003: 824 / 3002 loss=2.702, ppl=6.51, wps=5864.6, ups=0.09, wpb=64894, bsz=128, num_updates=6784, lr=9.99537e-05, gnorm=2.247, loss_scale=4, train_wall=11, gb_free=2.8, wall=77988 2021-06-19 16:18:45 | INFO | train_inner | epoch 003: 825 / 3002 loss=2.548, ppl=5.85, wps=5854.9, ups=0.09, wpb=64862, bsz=128, num_updates=6785, lr=9.99537e-05, gnorm=2.664, loss_scale=4, train_wall=11, gb_free=2.8, wall=77999 2021-06-19 16:18:56 | INFO | train_inner | epoch 003: 826 / 3002 loss=2.653, ppl=6.29, wps=5886.2, ups=0.09, wpb=64765, bsz=128, num_updates=6786, lr=9.99537e-05, gnorm=2.11, loss_scale=4, train_wall=11, gb_free=2.8, wall=78010 2021-06-19 16:19:07 | INFO | train_inner | epoch 003: 827 / 3002 loss=2.765, ppl=6.8, wps=5877.3, ups=0.09, wpb=64793, bsz=128, num_updates=6787, lr=9.99537e-05, gnorm=2.362, loss_scale=4, train_wall=11, gb_free=2.8, wall=78021 2021-06-19 16:19:18 | INFO | train_inner | epoch 003: 828 / 3002 loss=2.615, ppl=6.13, wps=5795.1, ups=0.09, wpb=64828, bsz=128, num_updates=6788, lr=9.99537e-05, gnorm=2.301, loss_scale=4, train_wall=11, gb_free=2.8, wall=78032 2021-06-19 16:19:29 | INFO | train_inner | epoch 003: 829 / 3002 loss=2.817, ppl=7.05, wps=5753.8, ups=0.09, wpb=64751, bsz=128, num_updates=6789, lr=9.99537e-05, gnorm=2.441, loss_scale=4, train_wall=11, gb_free=2.8, wall=78044 2021-06-19 16:19:41 | INFO | train_inner | epoch 003: 830 / 3002 loss=2.957, ppl=7.77, wps=5790.8, ups=0.09, wpb=64686, bsz=128, num_updates=6790, lr=9.99537e-05, gnorm=2.275, loss_scale=4, train_wall=11, gb_free=2.8, wall=78055 2021-06-19 16:19:51 | INFO | train_inner | epoch 003: 831 / 3002 loss=2.511, ppl=5.7, wps=6007, ups=0.09, wpb=64824, bsz=128, num_updates=6791, lr=9.99537e-05, gnorm=2.327, loss_scale=4, train_wall=10, gb_free=2.8, wall=78066 2021-06-19 16:20:02 | INFO | train_inner | epoch 003: 832 / 3002 loss=2.574, ppl=5.96, wps=5874.1, ups=0.09, wpb=64841, bsz=128, num_updates=6792, lr=9.99537e-05, gnorm=2.306, loss_scale=4, train_wall=11, gb_free=2.8, wall=78077 2021-06-19 16:20:13 | INFO | train_inner | epoch 003: 833 / 3002 loss=2.705, ppl=6.52, wps=6005.5, ups=0.09, wpb=64750, bsz=128, num_updates=6793, lr=9.99537e-05, gnorm=2.195, loss_scale=4, train_wall=10, gb_free=2.8, wall=78088 2021-06-19 16:20:24 | INFO | train_inner | epoch 003: 834 / 3002 loss=2.743, ppl=6.7, wps=5802.9, ups=0.09, wpb=64845, bsz=128, num_updates=6794, lr=9.99536e-05, gnorm=2.593, loss_scale=4, train_wall=11, gb_free=2.8, wall=78099 2021-06-19 16:20:36 | INFO | train_inner | epoch 003: 835 / 3002 loss=2.619, ppl=6.14, wps=5742, ups=0.09, wpb=64793, bsz=128, num_updates=6795, lr=9.99536e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=78110 2021-06-19 16:20:47 | INFO | train_inner | epoch 003: 836 / 3002 loss=2.558, ppl=5.89, wps=5791.3, ups=0.09, wpb=64788, bsz=128, num_updates=6796, lr=9.99536e-05, gnorm=2.814, loss_scale=4, train_wall=11, gb_free=2.8, wall=78121 2021-06-19 16:20:58 | INFO | train_inner | epoch 003: 837 / 3002 loss=2.777, ppl=6.85, wps=5935.1, ups=0.09, wpb=64881, bsz=128, num_updates=6797, lr=9.99536e-05, gnorm=2.245, loss_scale=4, train_wall=10, gb_free=2.8, wall=78132 2021-06-19 16:21:09 | INFO | train_inner | epoch 003: 838 / 3002 loss=2.573, ppl=5.95, wps=5790.3, ups=0.09, wpb=64809, bsz=128, num_updates=6798, lr=9.99536e-05, gnorm=2.475, loss_scale=4, train_wall=11, gb_free=2.8, wall=78143 2021-06-19 16:21:20 | INFO | train_inner | epoch 003: 839 / 3002 loss=2.968, ppl=7.82, wps=5868, ups=0.09, wpb=64826, bsz=128, num_updates=6799, lr=9.99536e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=78154 2021-06-19 16:21:31 | INFO | train_inner | epoch 003: 840 / 3002 loss=2.688, ppl=6.45, wps=5852.8, ups=0.09, wpb=64829, bsz=128, num_updates=6800, lr=9.99536e-05, gnorm=2.14, loss_scale=4, train_wall=11, gb_free=2.8, wall=78165 2021-06-19 16:21:42 | INFO | train_inner | epoch 003: 841 / 3002 loss=2.537, ppl=5.8, wps=5898.1, ups=0.09, wpb=64841, bsz=128, num_updates=6801, lr=9.99536e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=78176 2021-06-19 16:21:53 | INFO | train_inner | epoch 003: 842 / 3002 loss=2.668, ppl=6.36, wps=5791.8, ups=0.09, wpb=64870, bsz=128, num_updates=6802, lr=9.99536e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=78188 2021-06-19 16:22:04 | INFO | train_inner | epoch 003: 843 / 3002 loss=2.813, ppl=7.03, wps=5960.2, ups=0.09, wpb=64858, bsz=128, num_updates=6803, lr=9.99536e-05, gnorm=2.394, loss_scale=4, train_wall=10, gb_free=2.8, wall=78199 2021-06-19 16:22:15 | INFO | train_inner | epoch 003: 844 / 3002 loss=2.656, ppl=6.3, wps=5965.2, ups=0.09, wpb=64912, bsz=128, num_updates=6804, lr=9.99536e-05, gnorm=2.269, loss_scale=4, train_wall=10, gb_free=2.8, wall=78209 2021-06-19 16:22:26 | INFO | train_inner | epoch 003: 845 / 3002 loss=2.686, ppl=6.44, wps=5725.9, ups=0.09, wpb=64881, bsz=128, num_updates=6805, lr=9.99536e-05, gnorm=2.205, loss_scale=4, train_wall=11, gb_free=2.8, wall=78221 2021-06-19 16:22:37 | INFO | train_inner | epoch 003: 846 / 3002 loss=2.651, ppl=6.28, wps=5882.4, ups=0.09, wpb=64803, bsz=128, num_updates=6806, lr=9.99535e-05, gnorm=2.24, loss_scale=4, train_wall=11, gb_free=2.8, wall=78232 2021-06-19 16:22:49 | INFO | train_inner | epoch 003: 847 / 3002 loss=2.554, ppl=5.87, wps=5818.4, ups=0.09, wpb=64803, bsz=128, num_updates=6807, lr=9.99535e-05, gnorm=2.256, loss_scale=4, train_wall=11, gb_free=2.8, wall=78243 2021-06-19 16:23:00 | INFO | train_inner | epoch 003: 848 / 3002 loss=2.66, ppl=6.32, wps=5820.2, ups=0.09, wpb=64804, bsz=128, num_updates=6808, lr=9.99535e-05, gnorm=2.132, loss_scale=4, train_wall=11, gb_free=2.8, wall=78254 2021-06-19 16:23:11 | INFO | train_inner | epoch 003: 849 / 3002 loss=2.821, ppl=7.06, wps=5882.6, ups=0.09, wpb=64817, bsz=128, num_updates=6809, lr=9.99535e-05, gnorm=2.6, loss_scale=4, train_wall=11, gb_free=2.8, wall=78265 2021-06-19 16:23:22 | INFO | train_inner | epoch 003: 850 / 3002 loss=2.736, ppl=6.66, wps=5919.6, ups=0.09, wpb=64789, bsz=128, num_updates=6810, lr=9.99535e-05, gnorm=2.288, loss_scale=4, train_wall=11, gb_free=2.8, wall=78276 2021-06-19 16:23:33 | INFO | train_inner | epoch 003: 851 / 3002 loss=2.708, ppl=6.53, wps=5808, ups=0.09, wpb=64804, bsz=128, num_updates=6811, lr=9.99535e-05, gnorm=2.257, loss_scale=4, train_wall=11, gb_free=2.8, wall=78287 2021-06-19 16:23:44 | INFO | train_inner | epoch 003: 852 / 3002 loss=2.643, ppl=6.25, wps=5864.3, ups=0.09, wpb=64845, bsz=128, num_updates=6812, lr=9.99535e-05, gnorm=2.228, loss_scale=4, train_wall=11, gb_free=2.8, wall=78298 2021-06-19 16:23:55 | INFO | train_inner | epoch 003: 853 / 3002 loss=2.634, ppl=6.21, wps=5766.6, ups=0.09, wpb=64849, bsz=128, num_updates=6813, lr=9.99535e-05, gnorm=2.268, loss_scale=4, train_wall=11, gb_free=2.8, wall=78309 2021-06-19 16:24:06 | INFO | train_inner | epoch 003: 854 / 3002 loss=2.617, ppl=6.13, wps=5735.2, ups=0.09, wpb=64807, bsz=128, num_updates=6814, lr=9.99535e-05, gnorm=2.54, loss_scale=4, train_wall=11, gb_free=2.8, wall=78321 2021-06-19 16:24:18 | INFO | train_inner | epoch 003: 855 / 3002 loss=2.69, ppl=6.45, wps=5779.6, ups=0.09, wpb=64784, bsz=128, num_updates=6815, lr=9.99535e-05, gnorm=3.497, loss_scale=4, train_wall=11, gb_free=2.8, wall=78332 2021-06-19 16:24:29 | INFO | train_inner | epoch 003: 856 / 3002 loss=2.684, ppl=6.43, wps=5842.6, ups=0.09, wpb=64835, bsz=128, num_updates=6816, lr=9.99535e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=78343 2021-06-19 16:24:40 | INFO | train_inner | epoch 003: 857 / 3002 loss=2.579, ppl=5.97, wps=5833.1, ups=0.09, wpb=64812, bsz=128, num_updates=6817, lr=9.99535e-05, gnorm=2.506, loss_scale=4, train_wall=11, gb_free=2.8, wall=78354 2021-06-19 16:24:51 | INFO | train_inner | epoch 003: 858 / 3002 loss=2.865, ppl=7.29, wps=5784.3, ups=0.09, wpb=64848, bsz=128, num_updates=6818, lr=9.99535e-05, gnorm=2.206, loss_scale=4, train_wall=11, gb_free=2.8, wall=78365 2021-06-19 16:25:02 | INFO | train_inner | epoch 003: 859 / 3002 loss=2.688, ppl=6.44, wps=5861.5, ups=0.09, wpb=64855, bsz=128, num_updates=6819, lr=9.99534e-05, gnorm=2.145, loss_scale=4, train_wall=11, gb_free=2.8, wall=78376 2021-06-19 16:25:13 | INFO | train_inner | epoch 003: 860 / 3002 loss=2.583, ppl=5.99, wps=5844.5, ups=0.09, wpb=64850, bsz=128, num_updates=6820, lr=9.99534e-05, gnorm=2.192, loss_scale=4, train_wall=11, gb_free=2.8, wall=78388 2021-06-19 16:25:24 | INFO | train_inner | epoch 003: 861 / 3002 loss=2.649, ppl=6.27, wps=5771.4, ups=0.09, wpb=64802, bsz=128, num_updates=6821, lr=9.99534e-05, gnorm=4.626, loss_scale=4, train_wall=11, gb_free=2.8, wall=78399 2021-06-19 16:25:36 | INFO | train_inner | epoch 003: 862 / 3002 loss=2.631, ppl=6.2, wps=5804.5, ups=0.09, wpb=64792, bsz=128, num_updates=6822, lr=9.99534e-05, gnorm=2.41, loss_scale=4, train_wall=11, gb_free=2.8, wall=78410 2021-06-19 16:25:47 | INFO | train_inner | epoch 003: 863 / 3002 loss=2.635, ppl=6.21, wps=5853.2, ups=0.09, wpb=64807, bsz=128, num_updates=6823, lr=9.99534e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=78421 2021-06-19 16:25:58 | INFO | train_inner | epoch 003: 864 / 3002 loss=2.41, ppl=5.31, wps=5866.5, ups=0.09, wpb=64920, bsz=128, num_updates=6824, lr=9.99534e-05, gnorm=2.157, loss_scale=4, train_wall=11, gb_free=2.8, wall=78432 2021-06-19 16:26:09 | INFO | train_inner | epoch 003: 865 / 3002 loss=2.667, ppl=6.35, wps=5917.4, ups=0.09, wpb=64867, bsz=128, num_updates=6825, lr=9.99534e-05, gnorm=10.985, loss_scale=4, train_wall=10, gb_free=2.8, wall=78443 2021-06-19 16:26:20 | INFO | train_inner | epoch 003: 866 / 3002 loss=2.48, ppl=5.58, wps=5714.2, ups=0.09, wpb=64804, bsz=128, num_updates=6826, lr=9.99534e-05, gnorm=2.081, loss_scale=4, train_wall=11, gb_free=2.8, wall=78454 2021-06-19 16:26:31 | INFO | train_inner | epoch 003: 867 / 3002 loss=2.685, ppl=6.43, wps=5927, ups=0.09, wpb=64805, bsz=128, num_updates=6827, lr=9.99534e-05, gnorm=2.256, loss_scale=4, train_wall=10, gb_free=2.8, wall=78465 2021-06-19 16:26:42 | INFO | train_inner | epoch 003: 868 / 3002 loss=2.476, ppl=5.57, wps=5864.2, ups=0.09, wpb=64756, bsz=128, num_updates=6828, lr=9.99534e-05, gnorm=2.433, loss_scale=4, train_wall=11, gb_free=2.8, wall=78476 2021-06-19 16:26:53 | INFO | train_inner | epoch 003: 869 / 3002 loss=2.653, ppl=6.29, wps=5782.2, ups=0.09, wpb=64836, bsz=128, num_updates=6829, lr=9.99534e-05, gnorm=2.106, loss_scale=4, train_wall=11, gb_free=2.8, wall=78488 2021-06-19 16:27:04 | INFO | train_inner | epoch 003: 870 / 3002 loss=2.641, ppl=6.24, wps=5833.8, ups=0.09, wpb=64813, bsz=128, num_updates=6830, lr=9.99534e-05, gnorm=2.068, loss_scale=4, train_wall=11, gb_free=2.8, wall=78499 2021-06-19 16:27:15 | INFO | train_inner | epoch 003: 871 / 3002 loss=2.665, ppl=6.34, wps=5807.4, ups=0.09, wpb=64833, bsz=128, num_updates=6831, lr=9.99533e-05, gnorm=2.074, loss_scale=4, train_wall=11, gb_free=2.8, wall=78510 2021-06-19 16:27:26 | INFO | train_inner | epoch 003: 872 / 3002 loss=2.684, ppl=6.43, wps=5912.3, ups=0.09, wpb=64815, bsz=128, num_updates=6832, lr=9.99533e-05, gnorm=2.194, loss_scale=4, train_wall=10, gb_free=2.8, wall=78521 2021-06-19 16:27:38 | INFO | train_inner | epoch 003: 873 / 3002 loss=2.602, ppl=6.07, wps=5736.2, ups=0.09, wpb=64857, bsz=128, num_updates=6833, lr=9.99533e-05, gnorm=2.164, loss_scale=4, train_wall=11, gb_free=2.8, wall=78532 2021-06-19 16:27:49 | INFO | train_inner | epoch 003: 874 / 3002 loss=2.556, ppl=5.88, wps=5745.5, ups=0.09, wpb=64833, bsz=128, num_updates=6834, lr=9.99533e-05, gnorm=2.215, loss_scale=4, train_wall=11, gb_free=2.8, wall=78543 2021-06-19 16:28:00 | INFO | train_inner | epoch 003: 875 / 3002 loss=2.609, ppl=6.1, wps=5875.1, ups=0.09, wpb=64846, bsz=128, num_updates=6835, lr=9.99533e-05, gnorm=2.187, loss_scale=4, train_wall=11, gb_free=2.8, wall=78554 2021-06-19 16:28:11 | INFO | train_inner | epoch 003: 876 / 3002 loss=2.444, ppl=5.44, wps=5842.4, ups=0.09, wpb=64860, bsz=128, num_updates=6836, lr=9.99533e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=78566 2021-06-19 16:28:22 | INFO | train_inner | epoch 003: 877 / 3002 loss=2.672, ppl=6.37, wps=5885.4, ups=0.09, wpb=64883, bsz=128, num_updates=6837, lr=9.99533e-05, gnorm=2.358, loss_scale=4, train_wall=11, gb_free=2.8, wall=78577 2021-06-19 16:28:33 | INFO | train_inner | epoch 003: 878 / 3002 loss=2.648, ppl=6.27, wps=5783.4, ups=0.09, wpb=64723, bsz=128, num_updates=6838, lr=9.99533e-05, gnorm=3.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=78588 2021-06-19 16:28:44 | INFO | train_inner | epoch 003: 879 / 3002 loss=2.701, ppl=6.5, wps=5913.2, ups=0.09, wpb=64858, bsz=128, num_updates=6839, lr=9.99533e-05, gnorm=2.15, loss_scale=4, train_wall=10, gb_free=2.8, wall=78599 2021-06-19 16:28:55 | INFO | train_inner | epoch 003: 880 / 3002 loss=2.504, ppl=5.67, wps=5829.1, ups=0.09, wpb=64731, bsz=128, num_updates=6840, lr=9.99533e-05, gnorm=2.145, loss_scale=4, train_wall=11, gb_free=2.8, wall=78610 2021-06-19 16:29:07 | INFO | train_inner | epoch 003: 881 / 3002 loss=2.76, ppl=6.78, wps=5835.2, ups=0.09, wpb=64855, bsz=128, num_updates=6841, lr=9.99533e-05, gnorm=2.156, loss_scale=4, train_wall=11, gb_free=2.8, wall=78621 2021-06-19 16:29:18 | INFO | train_inner | epoch 003: 882 / 3002 loss=2.665, ppl=6.34, wps=5853.3, ups=0.09, wpb=64837, bsz=128, num_updates=6842, lr=9.99533e-05, gnorm=3.33, loss_scale=4, train_wall=11, gb_free=2.8, wall=78632 2021-06-19 16:29:29 | INFO | train_inner | epoch 003: 883 / 3002 loss=2.838, ppl=7.15, wps=5786, ups=0.09, wpb=64867, bsz=128, num_updates=6843, lr=9.99533e-05, gnorm=2.276, loss_scale=4, train_wall=11, gb_free=2.8, wall=78643 2021-06-19 16:29:40 | INFO | train_inner | epoch 003: 884 / 3002 loss=2.671, ppl=6.37, wps=5641.3, ups=0.09, wpb=64805, bsz=128, num_updates=6844, lr=9.99532e-05, gnorm=3.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=78655 2021-06-19 16:29:52 | INFO | train_inner | epoch 003: 885 / 3002 loss=2.62, ppl=6.15, wps=5797.2, ups=0.09, wpb=64856, bsz=128, num_updates=6845, lr=9.99532e-05, gnorm=2.689, loss_scale=4, train_wall=11, gb_free=2.8, wall=78666 2021-06-19 16:30:02 | INFO | train_inner | epoch 003: 886 / 3002 loss=2.651, ppl=6.28, wps=5995.9, ups=0.09, wpb=64830, bsz=128, num_updates=6846, lr=9.99532e-05, gnorm=2.131, loss_scale=4, train_wall=10, gb_free=2.8, wall=78677 2021-06-19 16:30:14 | INFO | train_inner | epoch 003: 887 / 3002 loss=2.573, ppl=5.95, wps=5772, ups=0.09, wpb=64875, bsz=128, num_updates=6847, lr=9.99532e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=78688 2021-06-19 16:30:25 | INFO | train_inner | epoch 003: 888 / 3002 loss=2.645, ppl=6.26, wps=5884.5, ups=0.09, wpb=64784, bsz=128, num_updates=6848, lr=9.99532e-05, gnorm=2.307, loss_scale=4, train_wall=11, gb_free=2.8, wall=78699 2021-06-19 16:30:36 | INFO | train_inner | epoch 003: 889 / 3002 loss=2.541, ppl=5.82, wps=5813.2, ups=0.09, wpb=64834, bsz=128, num_updates=6849, lr=9.99532e-05, gnorm=2.157, loss_scale=4, train_wall=11, gb_free=2.8, wall=78710 2021-06-19 16:30:47 | INFO | train_inner | epoch 003: 890 / 3002 loss=2.666, ppl=6.35, wps=5829, ups=0.09, wpb=64768, bsz=128, num_updates=6850, lr=9.99532e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=78721 2021-06-19 16:30:58 | INFO | train_inner | epoch 003: 891 / 3002 loss=2.775, ppl=6.84, wps=5770, ups=0.09, wpb=64774, bsz=128, num_updates=6851, lr=9.99532e-05, gnorm=3.213, loss_scale=4, train_wall=11, gb_free=2.8, wall=78732 2021-06-19 16:31:09 | INFO | train_inner | epoch 003: 892 / 3002 loss=2.645, ppl=6.26, wps=5741, ups=0.09, wpb=64790, bsz=128, num_updates=6852, lr=9.99532e-05, gnorm=2.077, loss_scale=4, train_wall=11, gb_free=2.8, wall=78744 2021-06-19 16:31:21 | INFO | train_inner | epoch 003: 893 / 3002 loss=2.642, ppl=6.24, wps=5786.3, ups=0.09, wpb=64791, bsz=128, num_updates=6853, lr=9.99532e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=78755 2021-06-19 16:31:32 | INFO | train_inner | epoch 003: 894 / 3002 loss=2.756, ppl=6.75, wps=5753.4, ups=0.09, wpb=64805, bsz=128, num_updates=6854, lr=9.99532e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=78766 2021-06-19 16:31:43 | INFO | train_inner | epoch 003: 895 / 3002 loss=2.552, ppl=5.86, wps=5863.6, ups=0.09, wpb=64832, bsz=128, num_updates=6855, lr=9.99532e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=78777 2021-06-19 16:31:54 | INFO | train_inner | epoch 003: 896 / 3002 loss=2.524, ppl=5.75, wps=5842.4, ups=0.09, wpb=64829, bsz=128, num_updates=6856, lr=9.99531e-05, gnorm=2.154, loss_scale=4, train_wall=11, gb_free=2.8, wall=78788 2021-06-19 16:32:05 | INFO | train_inner | epoch 003: 897 / 3002 loss=2.695, ppl=6.47, wps=5800, ups=0.09, wpb=64887, bsz=128, num_updates=6857, lr=9.99531e-05, gnorm=2.19, loss_scale=4, train_wall=11, gb_free=2.8, wall=78800 2021-06-19 16:32:16 | INFO | train_inner | epoch 003: 898 / 3002 loss=2.73, ppl=6.64, wps=5894.3, ups=0.09, wpb=64844, bsz=128, num_updates=6858, lr=9.99531e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=78811 2021-06-19 16:32:27 | INFO | train_inner | epoch 003: 899 / 3002 loss=2.658, ppl=6.31, wps=5761.9, ups=0.09, wpb=64808, bsz=128, num_updates=6859, lr=9.99531e-05, gnorm=3.727, loss_scale=4, train_wall=11, gb_free=2.8, wall=78822 2021-06-19 16:32:39 | INFO | train_inner | epoch 003: 900 / 3002 loss=2.804, ppl=6.99, wps=5825.7, ups=0.09, wpb=64833, bsz=128, num_updates=6860, lr=9.99531e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=78833 2021-06-19 16:32:50 | INFO | train_inner | epoch 003: 901 / 3002 loss=2.613, ppl=6.12, wps=5803.7, ups=0.09, wpb=64823, bsz=128, num_updates=6861, lr=9.99531e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=78844 2021-06-19 16:33:01 | INFO | train_inner | epoch 003: 902 / 3002 loss=2.61, ppl=6.11, wps=5872.1, ups=0.09, wpb=64775, bsz=128, num_updates=6862, lr=9.99531e-05, gnorm=4.644, loss_scale=4, train_wall=11, gb_free=2.8, wall=78855 2021-06-19 16:33:12 | INFO | train_inner | epoch 003: 903 / 3002 loss=2.713, ppl=6.56, wps=5882.1, ups=0.09, wpb=64818, bsz=128, num_updates=6863, lr=9.99531e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=78866 2021-06-19 16:33:23 | INFO | train_inner | epoch 003: 904 / 3002 loss=2.524, ppl=5.75, wps=5807.6, ups=0.09, wpb=64847, bsz=128, num_updates=6864, lr=9.99531e-05, gnorm=2.107, loss_scale=4, train_wall=11, gb_free=2.8, wall=78877 2021-06-19 16:33:34 | INFO | train_inner | epoch 003: 905 / 3002 loss=2.737, ppl=6.67, wps=5828.9, ups=0.09, wpb=64794, bsz=128, num_updates=6865, lr=9.99531e-05, gnorm=2.41, loss_scale=4, train_wall=11, gb_free=2.8, wall=78888 2021-06-19 16:33:45 | INFO | train_inner | epoch 003: 906 / 3002 loss=2.617, ppl=6.13, wps=5938.5, ups=0.09, wpb=64849, bsz=128, num_updates=6866, lr=9.99531e-05, gnorm=2.545, loss_scale=4, train_wall=10, gb_free=2.8, wall=78899 2021-06-19 16:33:56 | INFO | train_inner | epoch 003: 907 / 3002 loss=2.588, ppl=6.01, wps=5868.1, ups=0.09, wpb=64860, bsz=128, num_updates=6867, lr=9.99531e-05, gnorm=2.359, loss_scale=4, train_wall=11, gb_free=2.8, wall=78910 2021-06-19 16:34:07 | INFO | train_inner | epoch 003: 908 / 3002 loss=2.661, ppl=6.32, wps=5835, ups=0.09, wpb=64821, bsz=128, num_updates=6868, lr=9.99531e-05, gnorm=2.192, loss_scale=4, train_wall=11, gb_free=2.8, wall=78921 2021-06-19 16:34:18 | INFO | train_inner | epoch 003: 909 / 3002 loss=2.62, ppl=6.15, wps=5761.6, ups=0.09, wpb=64845, bsz=128, num_updates=6869, lr=9.9953e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=78933 2021-06-19 16:34:29 | INFO | train_inner | epoch 003: 910 / 3002 loss=2.521, ppl=5.74, wps=5884.3, ups=0.09, wpb=64922, bsz=128, num_updates=6870, lr=9.9953e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=78944 2021-06-19 16:34:41 | INFO | train_inner | epoch 003: 911 / 3002 loss=2.647, ppl=6.26, wps=5845.9, ups=0.09, wpb=64943, bsz=128, num_updates=6871, lr=9.9953e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=78955 2021-06-19 16:34:52 | INFO | train_inner | epoch 003: 912 / 3002 loss=2.506, ppl=5.68, wps=5863.6, ups=0.09, wpb=64836, bsz=128, num_updates=6872, lr=9.9953e-05, gnorm=2.153, loss_scale=4, train_wall=11, gb_free=2.8, wall=78966 2021-06-19 16:35:03 | INFO | train_inner | epoch 003: 913 / 3002 loss=2.512, ppl=5.71, wps=5833.8, ups=0.09, wpb=64749, bsz=128, num_updates=6873, lr=9.9953e-05, gnorm=2.231, loss_scale=4, train_wall=11, gb_free=2.8, wall=78977 2021-06-19 16:35:14 | INFO | train_inner | epoch 003: 914 / 3002 loss=2.706, ppl=6.52, wps=5850.1, ups=0.09, wpb=64854, bsz=128, num_updates=6874, lr=9.9953e-05, gnorm=2.526, loss_scale=4, train_wall=11, gb_free=2.8, wall=78988 2021-06-19 16:35:25 | INFO | train_inner | epoch 003: 915 / 3002 loss=2.584, ppl=6, wps=5903.2, ups=0.09, wpb=64862, bsz=128, num_updates=6875, lr=9.9953e-05, gnorm=2.152, loss_scale=4, train_wall=11, gb_free=2.8, wall=78999 2021-06-19 16:35:36 | INFO | train_inner | epoch 003: 916 / 3002 loss=2.589, ppl=6.02, wps=5918.1, ups=0.09, wpb=64795, bsz=128, num_updates=6876, lr=9.9953e-05, gnorm=2.174, loss_scale=4, train_wall=10, gb_free=2.8, wall=79010 2021-06-19 16:35:47 | INFO | train_inner | epoch 003: 917 / 3002 loss=2.674, ppl=6.38, wps=5902.1, ups=0.09, wpb=64741, bsz=128, num_updates=6877, lr=9.9953e-05, gnorm=2.32, loss_scale=4, train_wall=11, gb_free=2.8, wall=79021 2021-06-19 16:35:58 | INFO | train_inner | epoch 003: 918 / 3002 loss=2.766, ppl=6.8, wps=5792.4, ups=0.09, wpb=64811, bsz=128, num_updates=6878, lr=9.9953e-05, gnorm=2.209, loss_scale=4, train_wall=11, gb_free=2.8, wall=79032 2021-06-19 16:36:09 | INFO | train_inner | epoch 003: 919 / 3002 loss=2.728, ppl=6.63, wps=5763, ups=0.09, wpb=64902, bsz=128, num_updates=6879, lr=9.9953e-05, gnorm=2.144, loss_scale=4, train_wall=11, gb_free=2.8, wall=79043 2021-06-19 16:36:20 | INFO | train_inner | epoch 003: 920 / 3002 loss=2.502, ppl=5.67, wps=5836.7, ups=0.09, wpb=64885, bsz=128, num_updates=6880, lr=9.9953e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=79055 2021-06-19 16:36:31 | INFO | train_inner | epoch 003: 921 / 3002 loss=2.613, ppl=6.12, wps=5781, ups=0.09, wpb=64771, bsz=128, num_updates=6881, lr=9.99529e-05, gnorm=2.378, loss_scale=4, train_wall=11, gb_free=2.8, wall=79066 2021-06-19 16:36:43 | INFO | train_inner | epoch 003: 922 / 3002 loss=2.648, ppl=6.27, wps=5724.1, ups=0.09, wpb=64822, bsz=128, num_updates=6882, lr=9.99529e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=79077 2021-06-19 16:36:54 | INFO | train_inner | epoch 003: 923 / 3002 loss=2.67, ppl=6.36, wps=5916.1, ups=0.09, wpb=64897, bsz=128, num_updates=6883, lr=9.99529e-05, gnorm=2.215, loss_scale=4, train_wall=10, gb_free=2.8, wall=79088 2021-06-19 16:37:05 | INFO | train_inner | epoch 003: 924 / 3002 loss=2.586, ppl=6.01, wps=5808.4, ups=0.09, wpb=64880, bsz=128, num_updates=6884, lr=9.99529e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=79099 2021-06-19 16:37:16 | INFO | train_inner | epoch 003: 925 / 3002 loss=2.649, ppl=6.27, wps=5853, ups=0.09, wpb=64859, bsz=128, num_updates=6885, lr=9.99529e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=79110 2021-06-19 16:37:27 | INFO | train_inner | epoch 003: 926 / 3002 loss=2.679, ppl=6.4, wps=5798.1, ups=0.09, wpb=64940, bsz=128, num_updates=6886, lr=9.99529e-05, gnorm=2.103, loss_scale=4, train_wall=11, gb_free=2.8, wall=79122 2021-06-19 16:37:38 | INFO | train_inner | epoch 003: 927 / 3002 loss=2.707, ppl=6.53, wps=5825.6, ups=0.09, wpb=64867, bsz=128, num_updates=6887, lr=9.99529e-05, gnorm=2.189, loss_scale=4, train_wall=11, gb_free=2.8, wall=79133 2021-06-19 16:37:49 | INFO | train_inner | epoch 003: 928 / 3002 loss=2.626, ppl=6.17, wps=5913.9, ups=0.09, wpb=64828, bsz=128, num_updates=6888, lr=9.99529e-05, gnorm=2.205, loss_scale=4, train_wall=11, gb_free=2.8, wall=79144 2021-06-19 16:38:00 | INFO | train_inner | epoch 003: 929 / 3002 loss=2.536, ppl=5.8, wps=5907.6, ups=0.09, wpb=64862, bsz=128, num_updates=6889, lr=9.99529e-05, gnorm=2.305, loss_scale=4, train_wall=11, gb_free=2.8, wall=79155 2021-06-19 16:38:11 | INFO | train_inner | epoch 003: 930 / 3002 loss=2.558, ppl=5.89, wps=5852, ups=0.09, wpb=64763, bsz=128, num_updates=6890, lr=9.99529e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=79166 2021-06-19 16:38:22 | INFO | train_inner | epoch 003: 931 / 3002 loss=2.714, ppl=6.56, wps=5853.3, ups=0.09, wpb=64884, bsz=128, num_updates=6891, lr=9.99529e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=79177 2021-06-19 16:38:34 | INFO | train_inner | epoch 003: 932 / 3002 loss=2.604, ppl=6.08, wps=5808.5, ups=0.09, wpb=64894, bsz=128, num_updates=6892, lr=9.99529e-05, gnorm=2.078, loss_scale=4, train_wall=11, gb_free=2.8, wall=79188 2021-06-19 16:38:45 | INFO | train_inner | epoch 003: 933 / 3002 loss=2.633, ppl=6.2, wps=5758.1, ups=0.09, wpb=64802, bsz=128, num_updates=6893, lr=9.99529e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=79199 2021-06-19 16:38:56 | INFO | train_inner | epoch 003: 934 / 3002 loss=2.7, ppl=6.5, wps=5772.5, ups=0.09, wpb=64809, bsz=128, num_updates=6894, lr=9.99528e-05, gnorm=14.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=79210 2021-06-19 16:39:07 | INFO | train_inner | epoch 003: 935 / 3002 loss=2.729, ppl=6.63, wps=5880.5, ups=0.09, wpb=64806, bsz=128, num_updates=6895, lr=9.99528e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=79221 2021-06-19 16:39:18 | INFO | train_inner | epoch 003: 936 / 3002 loss=2.574, ppl=5.95, wps=5968.1, ups=0.09, wpb=64889, bsz=128, num_updates=6896, lr=9.99528e-05, gnorm=2.192, loss_scale=4, train_wall=10, gb_free=2.8, wall=79232 2021-06-19 16:39:29 | INFO | train_inner | epoch 003: 937 / 3002 loss=2.929, ppl=7.62, wps=5807.3, ups=0.09, wpb=64801, bsz=128, num_updates=6897, lr=9.99528e-05, gnorm=2.841, loss_scale=4, train_wall=11, gb_free=2.8, wall=79243 2021-06-19 16:39:40 | INFO | train_inner | epoch 003: 938 / 3002 loss=2.764, ppl=6.79, wps=5798.7, ups=0.09, wpb=64793, bsz=128, num_updates=6898, lr=9.99528e-05, gnorm=2.239, loss_scale=4, train_wall=11, gb_free=2.8, wall=79255 2021-06-19 16:39:51 | INFO | train_inner | epoch 003: 939 / 3002 loss=2.449, ppl=5.46, wps=5875.4, ups=0.09, wpb=64909, bsz=128, num_updates=6899, lr=9.99528e-05, gnorm=2.659, loss_scale=8, train_wall=11, gb_free=2.8, wall=79266 2021-06-19 16:40:02 | INFO | train_inner | epoch 003: 940 / 3002 loss=2.566, ppl=5.92, wps=5899.8, ups=0.09, wpb=64770, bsz=128, num_updates=6900, lr=9.99528e-05, gnorm=2.236, loss_scale=8, train_wall=11, gb_free=2.8, wall=79277 2021-06-19 16:40:13 | INFO | train_inner | epoch 003: 941 / 3002 loss=2.67, ppl=6.36, wps=5839.3, ups=0.09, wpb=64877, bsz=128, num_updates=6901, lr=9.99528e-05, gnorm=3.249, loss_scale=8, train_wall=11, gb_free=2.8, wall=79288 2021-06-19 16:40:25 | INFO | train_inner | epoch 003: 942 / 3002 loss=2.687, ppl=6.44, wps=5878.6, ups=0.09, wpb=64947, bsz=128, num_updates=6902, lr=9.99528e-05, gnorm=2.614, loss_scale=8, train_wall=11, gb_free=2.8, wall=79299 2021-06-19 16:40:36 | INFO | train_inner | epoch 003: 943 / 3002 loss=2.701, ppl=6.5, wps=5899.3, ups=0.09, wpb=64881, bsz=128, num_updates=6903, lr=9.99528e-05, gnorm=3.279, loss_scale=8, train_wall=11, gb_free=2.8, wall=79310 2021-06-19 16:40:47 | INFO | train_inner | epoch 003: 944 / 3002 loss=2.628, ppl=6.18, wps=5793.1, ups=0.09, wpb=64815, bsz=128, num_updates=6904, lr=9.99528e-05, gnorm=2.359, loss_scale=8, train_wall=11, gb_free=2.8, wall=79321 2021-06-19 16:40:58 | INFO | train_inner | epoch 003: 945 / 3002 loss=2.784, ppl=6.89, wps=5924.6, ups=0.09, wpb=64849, bsz=128, num_updates=6905, lr=9.99528e-05, gnorm=2.254, loss_scale=8, train_wall=10, gb_free=2.8, wall=79332 2021-06-19 16:41:09 | INFO | train_inner | epoch 003: 946 / 3002 loss=2.75, ppl=6.73, wps=5767.8, ups=0.09, wpb=64869, bsz=128, num_updates=6906, lr=9.99527e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=79343 2021-06-19 16:41:20 | INFO | train_inner | epoch 003: 947 / 3002 loss=2.703, ppl=6.51, wps=5936.5, ups=0.09, wpb=64864, bsz=128, num_updates=6907, lr=9.99527e-05, gnorm=2.144, loss_scale=8, train_wall=10, gb_free=2.8, wall=79354 2021-06-19 16:41:31 | INFO | train_inner | epoch 003: 948 / 3002 loss=2.932, ppl=7.63, wps=5791.1, ups=0.09, wpb=64811, bsz=128, num_updates=6908, lr=9.99527e-05, gnorm=2.443, loss_scale=8, train_wall=11, gb_free=2.8, wall=79365 2021-06-19 16:41:42 | INFO | train_inner | epoch 003: 949 / 3002 loss=2.659, ppl=6.31, wps=5706.4, ups=0.09, wpb=64862, bsz=128, num_updates=6909, lr=9.99527e-05, gnorm=2.747, loss_scale=8, train_wall=11, gb_free=2.8, wall=79377 2021-06-19 16:41:54 | INFO | train_inner | epoch 003: 950 / 3002 loss=2.716, ppl=6.57, wps=5776.6, ups=0.09, wpb=64794, bsz=128, num_updates=6910, lr=9.99527e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=79388 2021-06-19 16:42:05 | INFO | train_inner | epoch 003: 951 / 3002 loss=2.686, ppl=6.44, wps=5828.5, ups=0.09, wpb=64741, bsz=128, num_updates=6911, lr=9.99527e-05, gnorm=6.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=79399 2021-06-19 16:42:16 | INFO | train_inner | epoch 003: 952 / 3002 loss=2.688, ppl=6.44, wps=5810.5, ups=0.09, wpb=64820, bsz=128, num_updates=6912, lr=9.99527e-05, gnorm=2.14, loss_scale=8, train_wall=11, gb_free=2.8, wall=79410 2021-06-19 16:42:27 | INFO | train_inner | epoch 003: 953 / 3002 loss=2.599, ppl=6.06, wps=5923.8, ups=0.09, wpb=64813, bsz=128, num_updates=6913, lr=9.99527e-05, gnorm=2.088, loss_scale=8, train_wall=10, gb_free=2.8, wall=79421 2021-06-19 16:42:38 | INFO | train_inner | epoch 003: 954 / 3002 loss=2.666, ppl=6.35, wps=5828.9, ups=0.09, wpb=64804, bsz=128, num_updates=6914, lr=9.99527e-05, gnorm=2.127, loss_scale=8, train_wall=11, gb_free=2.8, wall=79432 2021-06-19 16:42:49 | INFO | train_inner | epoch 003: 955 / 3002 loss=2.684, ppl=6.43, wps=5758, ups=0.09, wpb=64774, bsz=128, num_updates=6915, lr=9.99527e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=79444 2021-06-19 16:43:00 | INFO | train_inner | epoch 003: 956 / 3002 loss=2.701, ppl=6.5, wps=5793.3, ups=0.09, wpb=64812, bsz=128, num_updates=6916, lr=9.99527e-05, gnorm=2.235, loss_scale=8, train_wall=11, gb_free=2.8, wall=79455 2021-06-19 16:43:11 | INFO | train_inner | epoch 003: 957 / 3002 loss=2.585, ppl=6, wps=5910.2, ups=0.09, wpb=64855, bsz=128, num_updates=6917, lr=9.99527e-05, gnorm=2.187, loss_scale=8, train_wall=10, gb_free=2.8, wall=79466 2021-06-19 16:43:22 | INFO | train_inner | epoch 003: 958 / 3002 loss=2.672, ppl=6.37, wps=5831.4, ups=0.09, wpb=64778, bsz=128, num_updates=6918, lr=9.99527e-05, gnorm=2.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=79477 2021-06-19 16:43:33 | INFO | train_inner | epoch 003: 959 / 3002 loss=2.586, ppl=6, wps=5872.9, ups=0.09, wpb=64815, bsz=128, num_updates=6919, lr=9.99526e-05, gnorm=2.092, loss_scale=8, train_wall=11, gb_free=2.8, wall=79488 2021-06-19 16:43:44 | INFO | train_inner | epoch 003: 960 / 3002 loss=2.881, ppl=7.36, wps=5892.1, ups=0.09, wpb=64859, bsz=128, num_updates=6920, lr=9.99526e-05, gnorm=2.372, loss_scale=8, train_wall=11, gb_free=2.8, wall=79499 2021-06-19 16:43:56 | INFO | train_inner | epoch 003: 961 / 3002 loss=2.604, ppl=6.08, wps=5817, ups=0.09, wpb=64787, bsz=128, num_updates=6921, lr=9.99526e-05, gnorm=2.095, loss_scale=8, train_wall=11, gb_free=2.8, wall=79510 2021-06-19 16:44:07 | INFO | train_inner | epoch 003: 962 / 3002 loss=2.579, ppl=5.97, wps=5808.3, ups=0.09, wpb=64778, bsz=128, num_updates=6922, lr=9.99526e-05, gnorm=2.327, loss_scale=8, train_wall=11, gb_free=2.8, wall=79521 2021-06-19 16:44:18 | INFO | train_inner | epoch 003: 963 / 3002 loss=2.606, ppl=6.09, wps=5838.6, ups=0.09, wpb=64823, bsz=128, num_updates=6923, lr=9.99526e-05, gnorm=2.454, loss_scale=8, train_wall=11, gb_free=2.8, wall=79532 2021-06-19 16:44:29 | INFO | train_inner | epoch 003: 964 / 3002 loss=2.593, ppl=6.03, wps=5835.1, ups=0.09, wpb=64741, bsz=128, num_updates=6924, lr=9.99526e-05, gnorm=2.178, loss_scale=8, train_wall=11, gb_free=2.8, wall=79543 2021-06-19 16:44:40 | INFO | train_inner | epoch 003: 965 / 3002 loss=2.644, ppl=6.25, wps=5735, ups=0.09, wpb=64901, bsz=128, num_updates=6925, lr=9.99526e-05, gnorm=2.217, loss_scale=8, train_wall=11, gb_free=2.8, wall=79555 2021-06-19 16:44:51 | INFO | train_inner | epoch 003: 966 / 3002 loss=2.737, ppl=6.67, wps=5806.8, ups=0.09, wpb=64759, bsz=128, num_updates=6926, lr=9.99526e-05, gnorm=2.448, loss_scale=8, train_wall=11, gb_free=2.8, wall=79566 2021-06-19 16:45:02 | INFO | train_inner | epoch 003: 967 / 3002 loss=2.733, ppl=6.65, wps=5877.4, ups=0.09, wpb=64759, bsz=128, num_updates=6927, lr=9.99526e-05, gnorm=2.217, loss_scale=8, train_wall=11, gb_free=2.8, wall=79577 2021-06-19 16:45:14 | INFO | train_inner | epoch 003: 968 / 3002 loss=2.735, ppl=6.66, wps=5747, ups=0.09, wpb=64853, bsz=128, num_updates=6928, lr=9.99526e-05, gnorm=2.116, loss_scale=8, train_wall=11, gb_free=2.8, wall=79588 2021-06-19 16:45:25 | INFO | train_inner | epoch 003: 969 / 3002 loss=2.436, ppl=5.41, wps=5971.3, ups=0.09, wpb=64929, bsz=128, num_updates=6929, lr=9.99526e-05, gnorm=2.047, loss_scale=8, train_wall=10, gb_free=2.8, wall=79599 2021-06-19 16:45:36 | INFO | train_inner | epoch 003: 970 / 3002 loss=2.759, ppl=6.77, wps=5821.9, ups=0.09, wpb=64749, bsz=128, num_updates=6930, lr=9.99526e-05, gnorm=2.15, loss_scale=8, train_wall=11, gb_free=2.8, wall=79610 2021-06-19 16:45:47 | INFO | train_inner | epoch 003: 971 / 3002 loss=2.772, ppl=6.83, wps=5812.9, ups=0.09, wpb=64813, bsz=128, num_updates=6931, lr=9.99525e-05, gnorm=2.194, loss_scale=8, train_wall=11, gb_free=2.8, wall=79621 2021-06-19 16:45:58 | INFO | train_inner | epoch 003: 972 / 3002 loss=2.6, ppl=6.06, wps=5844.2, ups=0.09, wpb=64905, bsz=128, num_updates=6932, lr=9.99525e-05, gnorm=2.119, loss_scale=8, train_wall=11, gb_free=2.8, wall=79632 2021-06-19 16:46:09 | INFO | train_inner | epoch 003: 973 / 3002 loss=2.835, ppl=7.13, wps=5827.2, ups=0.09, wpb=64781, bsz=128, num_updates=6933, lr=9.99525e-05, gnorm=2.111, loss_scale=8, train_wall=11, gb_free=2.8, wall=79643 2021-06-19 16:46:20 | INFO | train_inner | epoch 003: 974 / 3002 loss=2.648, ppl=6.27, wps=5742.9, ups=0.09, wpb=64781, bsz=128, num_updates=6934, lr=9.99525e-05, gnorm=10.646, loss_scale=8, train_wall=11, gb_free=2.8, wall=79655 2021-06-19 16:46:32 | INFO | train_inner | epoch 003: 975 / 3002 loss=2.571, ppl=5.94, wps=5787.5, ups=0.09, wpb=64893, bsz=128, num_updates=6935, lr=9.99525e-05, gnorm=2.205, loss_scale=8, train_wall=11, gb_free=2.8, wall=79666 2021-06-19 16:46:43 | INFO | train_inner | epoch 003: 976 / 3002 loss=2.481, ppl=5.58, wps=5875.9, ups=0.09, wpb=64866, bsz=128, num_updates=6936, lr=9.99525e-05, gnorm=2.417, loss_scale=8, train_wall=11, gb_free=2.8, wall=79677 2021-06-19 16:46:54 | INFO | train_inner | epoch 003: 977 / 3002 loss=2.609, ppl=6.1, wps=5855.1, ups=0.09, wpb=64812, bsz=128, num_updates=6937, lr=9.99525e-05, gnorm=2.21, loss_scale=8, train_wall=11, gb_free=2.8, wall=79688 2021-06-19 16:47:05 | INFO | train_inner | epoch 003: 978 / 3002 loss=2.71, ppl=6.54, wps=5846.1, ups=0.09, wpb=64802, bsz=128, num_updates=6938, lr=9.99525e-05, gnorm=2.296, loss_scale=8, train_wall=11, gb_free=2.8, wall=79699 2021-06-19 16:47:16 | INFO | train_inner | epoch 003: 979 / 3002 loss=2.722, ppl=6.6, wps=5831.9, ups=0.09, wpb=64823, bsz=128, num_updates=6939, lr=9.99525e-05, gnorm=2.119, loss_scale=8, train_wall=11, gb_free=2.8, wall=79710 2021-06-19 16:47:27 | INFO | train_inner | epoch 003: 980 / 3002 loss=2.614, ppl=6.12, wps=5851.1, ups=0.09, wpb=64817, bsz=128, num_updates=6940, lr=9.99525e-05, gnorm=2.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=79721 2021-06-19 16:47:38 | INFO | train_inner | epoch 003: 981 / 3002 loss=2.669, ppl=6.36, wps=5930.9, ups=0.09, wpb=64883, bsz=128, num_updates=6941, lr=9.99525e-05, gnorm=2.44, loss_scale=8, train_wall=10, gb_free=2.8, wall=79732 2021-06-19 16:47:49 | INFO | train_inner | epoch 003: 982 / 3002 loss=2.656, ppl=6.3, wps=5881.7, ups=0.09, wpb=64859, bsz=128, num_updates=6942, lr=9.99525e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=79743 2021-06-19 16:48:00 | INFO | train_inner | epoch 003: 983 / 3002 loss=2.735, ppl=6.66, wps=5794, ups=0.09, wpb=64773, bsz=128, num_updates=6943, lr=9.99525e-05, gnorm=2.219, loss_scale=8, train_wall=11, gb_free=2.8, wall=79754 2021-06-19 16:48:11 | INFO | train_inner | epoch 003: 984 / 3002 loss=2.637, ppl=6.22, wps=5827.8, ups=0.09, wpb=64877, bsz=128, num_updates=6944, lr=9.99524e-05, gnorm=2.172, loss_scale=8, train_wall=11, gb_free=2.8, wall=79766 2021-06-19 16:48:22 | INFO | train_inner | epoch 003: 985 / 3002 loss=2.454, ppl=5.48, wps=5853.9, ups=0.09, wpb=64763, bsz=128, num_updates=6945, lr=9.99524e-05, gnorm=2.18, loss_scale=8, train_wall=11, gb_free=2.8, wall=79777 2021-06-19 16:48:34 | INFO | train_inner | epoch 003: 986 / 3002 loss=2.562, ppl=5.91, wps=5784.2, ups=0.09, wpb=64850, bsz=128, num_updates=6946, lr=9.99524e-05, gnorm=2.126, loss_scale=8, train_wall=11, gb_free=2.8, wall=79788 2021-06-19 16:48:45 | INFO | train_inner | epoch 003: 987 / 3002 loss=2.62, ppl=6.15, wps=5915, ups=0.09, wpb=64872, bsz=128, num_updates=6947, lr=9.99524e-05, gnorm=2.165, loss_scale=8, train_wall=11, gb_free=2.8, wall=79799 2021-06-19 16:48:56 | INFO | train_inner | epoch 003: 988 / 3002 loss=2.797, ppl=6.95, wps=5815.5, ups=0.09, wpb=64814, bsz=128, num_updates=6948, lr=9.99524e-05, gnorm=2.276, loss_scale=8, train_wall=11, gb_free=2.8, wall=79810 2021-06-19 16:49:07 | INFO | train_inner | epoch 003: 989 / 3002 loss=2.511, ppl=5.7, wps=5887, ups=0.09, wpb=64893, bsz=128, num_updates=6949, lr=9.99524e-05, gnorm=2.071, loss_scale=8, train_wall=11, gb_free=2.8, wall=79821 2021-06-19 16:49:18 | INFO | train_inner | epoch 003: 990 / 3002 loss=2.648, ppl=6.27, wps=5791.1, ups=0.09, wpb=64789, bsz=128, num_updates=6950, lr=9.99524e-05, gnorm=2.168, loss_scale=8, train_wall=11, gb_free=2.8, wall=79832 2021-06-19 16:49:29 | INFO | train_inner | epoch 003: 991 / 3002 loss=2.605, ppl=6.08, wps=5898.4, ups=0.09, wpb=64870, bsz=128, num_updates=6951, lr=9.99524e-05, gnorm=2.088, loss_scale=8, train_wall=11, gb_free=2.8, wall=79843 2021-06-19 16:49:40 | INFO | train_inner | epoch 003: 992 / 3002 loss=2.699, ppl=6.49, wps=5697.9, ups=0.09, wpb=64797, bsz=128, num_updates=6952, lr=9.99524e-05, gnorm=2.188, loss_scale=8, train_wall=11, gb_free=2.8, wall=79855 2021-06-19 16:49:51 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-19 16:50:02 | INFO | train_inner | epoch 003: 994 / 3002 loss=2.725, ppl=6.61, wps=2920.3, ups=0.05, wpb=64847, bsz=128, num_updates=6953, lr=9.99524e-05, gnorm=2.16, loss_scale=4, train_wall=21, gb_free=2.8, wall=79877 2021-06-19 16:50:14 | INFO | train_inner | epoch 003: 995 / 3002 loss=2.513, ppl=5.71, wps=5857.1, ups=0.09, wpb=64900, bsz=128, num_updates=6954, lr=9.99524e-05, gnorm=2.137, loss_scale=4, train_wall=11, gb_free=2.8, wall=79888 2021-06-19 16:50:25 | INFO | train_inner | epoch 003: 996 / 3002 loss=2.669, ppl=6.36, wps=5832.9, ups=0.09, wpb=64831, bsz=128, num_updates=6955, lr=9.99524e-05, gnorm=2.092, loss_scale=4, train_wall=11, gb_free=2.8, wall=79899 2021-06-19 16:50:36 | INFO | train_inner | epoch 003: 997 / 3002 loss=2.652, ppl=6.29, wps=5865.7, ups=0.09, wpb=64824, bsz=128, num_updates=6956, lr=9.99523e-05, gnorm=2.413, loss_scale=4, train_wall=11, gb_free=2.8, wall=79910 2021-06-19 16:50:47 | INFO | train_inner | epoch 003: 998 / 3002 loss=2.661, ppl=6.33, wps=5846.7, ups=0.09, wpb=64906, bsz=128, num_updates=6957, lr=9.99523e-05, gnorm=2.211, loss_scale=4, train_wall=11, gb_free=2.8, wall=79921 2021-06-19 16:50:58 | INFO | train_inner | epoch 003: 999 / 3002 loss=2.571, ppl=5.94, wps=6004.6, ups=0.09, wpb=64805, bsz=128, num_updates=6958, lr=9.99523e-05, gnorm=2.404, loss_scale=4, train_wall=10, gb_free=2.8, wall=79932 2021-06-19 16:51:09 | INFO | train_inner | epoch 003: 1000 / 3002 loss=2.646, ppl=6.26, wps=5722.7, ups=0.09, wpb=64859, bsz=128, num_updates=6959, lr=9.99523e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=79943 2021-06-19 16:51:20 | INFO | train_inner | epoch 003: 1001 / 3002 loss=2.653, ppl=6.29, wps=5715.7, ups=0.09, wpb=64794, bsz=128, num_updates=6960, lr=9.99523e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=79955 2021-06-19 16:51:31 | INFO | train_inner | epoch 003: 1002 / 3002 loss=2.54, ppl=5.81, wps=5837.3, ups=0.09, wpb=64888, bsz=128, num_updates=6961, lr=9.99523e-05, gnorm=4.778, loss_scale=4, train_wall=11, gb_free=2.8, wall=79966 2021-06-19 16:51:43 | INFO | train_inner | epoch 003: 1003 / 3002 loss=2.605, ppl=6.08, wps=5783, ups=0.09, wpb=64753, bsz=128, num_updates=6962, lr=9.99523e-05, gnorm=2.764, loss_scale=4, train_wall=11, gb_free=2.8, wall=79977 2021-06-19 16:51:54 | INFO | train_inner | epoch 003: 1004 / 3002 loss=2.631, ppl=6.19, wps=5822.5, ups=0.09, wpb=64756, bsz=128, num_updates=6963, lr=9.99523e-05, gnorm=2.252, loss_scale=4, train_wall=11, gb_free=2.8, wall=79988 2021-06-19 16:52:05 | INFO | train_inner | epoch 003: 1005 / 3002 loss=2.597, ppl=6.05, wps=5814.9, ups=0.09, wpb=64781, bsz=128, num_updates=6964, lr=9.99523e-05, gnorm=2.259, loss_scale=4, train_wall=11, gb_free=2.8, wall=79999 2021-06-19 16:52:16 | INFO | train_inner | epoch 003: 1006 / 3002 loss=2.569, ppl=5.94, wps=5804.1, ups=0.09, wpb=64796, bsz=128, num_updates=6965, lr=9.99523e-05, gnorm=2.177, loss_scale=4, train_wall=11, gb_free=2.8, wall=80010 2021-06-19 16:52:27 | INFO | train_inner | epoch 003: 1007 / 3002 loss=2.654, ppl=6.3, wps=5814.9, ups=0.09, wpb=64804, bsz=128, num_updates=6966, lr=9.99523e-05, gnorm=2.639, loss_scale=4, train_wall=11, gb_free=2.8, wall=80021 2021-06-19 16:52:38 | INFO | train_inner | epoch 003: 1008 / 3002 loss=2.566, ppl=5.92, wps=5874.1, ups=0.09, wpb=64836, bsz=128, num_updates=6967, lr=9.99523e-05, gnorm=2.266, loss_scale=4, train_wall=11, gb_free=2.8, wall=80033 2021-06-19 16:52:49 | INFO | train_inner | epoch 003: 1009 / 3002 loss=2.831, ppl=7.11, wps=5896.9, ups=0.09, wpb=64795, bsz=128, num_updates=6968, lr=9.99523e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=80044 2021-06-19 16:53:00 | INFO | train_inner | epoch 003: 1010 / 3002 loss=2.668, ppl=6.36, wps=5853.3, ups=0.09, wpb=64874, bsz=128, num_updates=6969, lr=9.99522e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=80055 2021-06-19 16:53:11 | INFO | train_inner | epoch 003: 1011 / 3002 loss=2.554, ppl=5.87, wps=6084.2, ups=0.09, wpb=64917, bsz=128, num_updates=6970, lr=9.99522e-05, gnorm=2.152, loss_scale=4, train_wall=10, gb_free=2.8, wall=80065 2021-06-19 16:53:22 | INFO | train_inner | epoch 003: 1012 / 3002 loss=2.539, ppl=5.81, wps=5806.6, ups=0.09, wpb=64861, bsz=128, num_updates=6971, lr=9.99522e-05, gnorm=2.086, loss_scale=4, train_wall=11, gb_free=2.8, wall=80076 2021-06-19 16:53:33 | INFO | train_inner | epoch 003: 1013 / 3002 loss=2.686, ppl=6.43, wps=5854.5, ups=0.09, wpb=64786, bsz=128, num_updates=6972, lr=9.99522e-05, gnorm=2.241, loss_scale=4, train_wall=11, gb_free=2.8, wall=80088 2021-06-19 16:53:44 | INFO | train_inner | epoch 003: 1014 / 3002 loss=2.882, ppl=7.37, wps=5898.3, ups=0.09, wpb=64721, bsz=128, num_updates=6973, lr=9.99522e-05, gnorm=2.079, loss_scale=4, train_wall=11, gb_free=2.8, wall=80098 2021-06-19 16:53:55 | INFO | train_inner | epoch 003: 1015 / 3002 loss=2.611, ppl=6.11, wps=5975.9, ups=0.09, wpb=64929, bsz=128, num_updates=6974, lr=9.99522e-05, gnorm=2.138, loss_scale=4, train_wall=10, gb_free=2.8, wall=80109 2021-06-19 16:54:06 | INFO | train_inner | epoch 003: 1016 / 3002 loss=2.752, ppl=6.74, wps=5830.4, ups=0.09, wpb=64788, bsz=128, num_updates=6975, lr=9.99522e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=80120 2021-06-19 16:54:17 | INFO | train_inner | epoch 003: 1017 / 3002 loss=2.599, ppl=6.06, wps=5806, ups=0.09, wpb=64849, bsz=128, num_updates=6976, lr=9.99522e-05, gnorm=2.214, loss_scale=4, train_wall=11, gb_free=2.8, wall=80132 2021-06-19 16:54:28 | INFO | train_inner | epoch 003: 1018 / 3002 loss=2.649, ppl=6.27, wps=5902.5, ups=0.09, wpb=64739, bsz=128, num_updates=6977, lr=9.99522e-05, gnorm=2.403, loss_scale=4, train_wall=10, gb_free=2.8, wall=80143 2021-06-19 16:54:39 | INFO | train_inner | epoch 003: 1019 / 3002 loss=2.861, ppl=7.26, wps=5829, ups=0.09, wpb=64746, bsz=128, num_updates=6978, lr=9.99522e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=80154 2021-06-19 16:54:51 | INFO | train_inner | epoch 003: 1020 / 3002 loss=2.696, ppl=6.48, wps=5810.4, ups=0.09, wpb=64732, bsz=128, num_updates=6979, lr=9.99522e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=80165 2021-06-19 16:55:02 | INFO | train_inner | epoch 003: 1021 / 3002 loss=2.597, ppl=6.05, wps=5843.5, ups=0.09, wpb=64858, bsz=128, num_updates=6980, lr=9.99522e-05, gnorm=2.273, loss_scale=4, train_wall=11, gb_free=2.8, wall=80176 2021-06-19 16:55:13 | INFO | train_inner | epoch 003: 1022 / 3002 loss=2.638, ppl=6.22, wps=5852.8, ups=0.09, wpb=64827, bsz=128, num_updates=6981, lr=9.99521e-05, gnorm=2.094, loss_scale=4, train_wall=11, gb_free=2.8, wall=80187 2021-06-19 16:55:24 | INFO | train_inner | epoch 003: 1023 / 3002 loss=2.618, ppl=6.14, wps=5893.1, ups=0.09, wpb=64899, bsz=128, num_updates=6982, lr=9.99521e-05, gnorm=2.47, loss_scale=4, train_wall=11, gb_free=2.8, wall=80198 2021-06-19 16:55:35 | INFO | train_inner | epoch 003: 1024 / 3002 loss=2.666, ppl=6.35, wps=5871.1, ups=0.09, wpb=64856, bsz=128, num_updates=6983, lr=9.99521e-05, gnorm=2.14, loss_scale=4, train_wall=11, gb_free=2.8, wall=80209 2021-06-19 16:55:46 | INFO | train_inner | epoch 003: 1025 / 3002 loss=2.598, ppl=6.05, wps=5727.1, ups=0.09, wpb=64828, bsz=128, num_updates=6984, lr=9.99521e-05, gnorm=3.5, loss_scale=4, train_wall=11, gb_free=2.8, wall=80220 2021-06-19 16:55:57 | INFO | train_inner | epoch 003: 1026 / 3002 loss=2.755, ppl=6.75, wps=5903.6, ups=0.09, wpb=64912, bsz=128, num_updates=6985, lr=9.99521e-05, gnorm=7.131, loss_scale=4, train_wall=11, gb_free=2.8, wall=80231 2021-06-19 16:56:08 | INFO | train_inner | epoch 003: 1027 / 3002 loss=2.489, ppl=5.61, wps=5775.9, ups=0.09, wpb=64755, bsz=128, num_updates=6986, lr=9.99521e-05, gnorm=2.153, loss_scale=4, train_wall=11, gb_free=2.8, wall=80243 2021-06-19 16:56:19 | INFO | train_inner | epoch 003: 1028 / 3002 loss=2.778, ppl=6.86, wps=5934.9, ups=0.09, wpb=64869, bsz=128, num_updates=6987, lr=9.99521e-05, gnorm=2.221, loss_scale=4, train_wall=10, gb_free=2.8, wall=80254 2021-06-19 16:56:30 | INFO | train_inner | epoch 003: 1029 / 3002 loss=2.592, ppl=6.03, wps=5937.9, ups=0.09, wpb=64763, bsz=128, num_updates=6988, lr=9.99521e-05, gnorm=2.092, loss_scale=4, train_wall=10, gb_free=2.8, wall=80264 2021-06-19 16:56:41 | INFO | train_inner | epoch 003: 1030 / 3002 loss=2.59, ppl=6.02, wps=5758, ups=0.09, wpb=64853, bsz=128, num_updates=6989, lr=9.99521e-05, gnorm=4.44, loss_scale=4, train_wall=11, gb_free=2.8, wall=80276 2021-06-19 16:56:52 | INFO | train_inner | epoch 003: 1031 / 3002 loss=2.655, ppl=6.3, wps=5844.2, ups=0.09, wpb=64718, bsz=128, num_updates=6990, lr=9.99521e-05, gnorm=2.152, loss_scale=4, train_wall=11, gb_free=2.8, wall=80287 2021-06-19 16:57:03 | INFO | train_inner | epoch 003: 1032 / 3002 loss=2.722, ppl=6.6, wps=5879.1, ups=0.09, wpb=64905, bsz=128, num_updates=6991, lr=9.99521e-05, gnorm=2.145, loss_scale=4, train_wall=11, gb_free=2.8, wall=80298 2021-06-19 16:57:15 | INFO | train_inner | epoch 003: 1033 / 3002 loss=2.73, ppl=6.63, wps=5726.6, ups=0.09, wpb=64894, bsz=128, num_updates=6992, lr=9.99521e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=80309 2021-06-19 16:57:26 | INFO | train_inner | epoch 003: 1034 / 3002 loss=2.796, ppl=6.94, wps=5843.4, ups=0.09, wpb=64869, bsz=128, num_updates=6993, lr=9.99521e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=80320 2021-06-19 16:57:37 | INFO | train_inner | epoch 003: 1035 / 3002 loss=2.771, ppl=6.83, wps=5777.4, ups=0.09, wpb=64803, bsz=128, num_updates=6994, lr=9.9952e-05, gnorm=3.828, loss_scale=4, train_wall=11, gb_free=2.8, wall=80331 2021-06-19 16:57:48 | INFO | train_inner | epoch 003: 1036 / 3002 loss=2.78, ppl=6.87, wps=5871.6, ups=0.09, wpb=64907, bsz=128, num_updates=6995, lr=9.9952e-05, gnorm=2.152, loss_scale=4, train_wall=11, gb_free=2.8, wall=80343 2021-06-19 16:57:59 | INFO | train_inner | epoch 003: 1037 / 3002 loss=2.653, ppl=6.29, wps=5847.4, ups=0.09, wpb=64838, bsz=128, num_updates=6996, lr=9.9952e-05, gnorm=2.172, loss_scale=4, train_wall=11, gb_free=2.8, wall=80354 2021-06-19 16:58:10 | INFO | train_inner | epoch 003: 1038 / 3002 loss=2.618, ppl=6.14, wps=5850, ups=0.09, wpb=64852, bsz=128, num_updates=6997, lr=9.9952e-05, gnorm=2.129, loss_scale=4, train_wall=11, gb_free=2.8, wall=80365 2021-06-19 16:58:21 | INFO | train_inner | epoch 003: 1039 / 3002 loss=2.797, ppl=6.95, wps=5894, ups=0.09, wpb=64879, bsz=128, num_updates=6998, lr=9.9952e-05, gnorm=2.688, loss_scale=4, train_wall=11, gb_free=2.8, wall=80376 2021-06-19 16:58:33 | INFO | train_inner | epoch 003: 1040 / 3002 loss=2.578, ppl=5.97, wps=5770.9, ups=0.09, wpb=64801, bsz=128, num_updates=6999, lr=9.9952e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=80387 2021-06-19 16:58:44 | INFO | train_inner | epoch 003: 1041 / 3002 loss=2.66, ppl=6.32, wps=5752.2, ups=0.09, wpb=64824, bsz=128, num_updates=7000, lr=9.9952e-05, gnorm=2.902, loss_scale=4, train_wall=11, gb_free=2.8, wall=80398 2021-06-19 16:58:55 | INFO | train_inner | epoch 003: 1042 / 3002 loss=2.606, ppl=6.09, wps=5882.9, ups=0.09, wpb=64887, bsz=128, num_updates=7001, lr=9.9952e-05, gnorm=2.314, loss_scale=4, train_wall=11, gb_free=2.8, wall=80409 2021-06-19 16:59:06 | INFO | train_inner | epoch 003: 1043 / 3002 loss=2.613, ppl=6.12, wps=5883.9, ups=0.09, wpb=64802, bsz=128, num_updates=7002, lr=9.9952e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=80420 2021-06-19 16:59:17 | INFO | train_inner | epoch 003: 1044 / 3002 loss=2.496, ppl=5.64, wps=5858, ups=0.09, wpb=64814, bsz=128, num_updates=7003, lr=9.9952e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=80431 2021-06-19 16:59:28 | INFO | train_inner | epoch 003: 1045 / 3002 loss=2.717, ppl=6.57, wps=5891.1, ups=0.09, wpb=64863, bsz=128, num_updates=7004, lr=9.9952e-05, gnorm=2.146, loss_scale=4, train_wall=11, gb_free=2.8, wall=80442 2021-06-19 16:59:39 | INFO | train_inner | epoch 003: 1046 / 3002 loss=2.668, ppl=6.36, wps=5898.7, ups=0.09, wpb=64849, bsz=128, num_updates=7005, lr=9.9952e-05, gnorm=3.319, loss_scale=4, train_wall=11, gb_free=2.8, wall=80453 2021-06-19 16:59:50 | INFO | train_inner | epoch 003: 1047 / 3002 loss=2.737, ppl=6.67, wps=5767.8, ups=0.09, wpb=64766, bsz=128, num_updates=7006, lr=9.99519e-05, gnorm=2.368, loss_scale=4, train_wall=11, gb_free=2.8, wall=80465 2021-06-19 17:00:01 | INFO | train_inner | epoch 003: 1048 / 3002 loss=2.647, ppl=6.27, wps=5850.3, ups=0.09, wpb=64755, bsz=128, num_updates=7007, lr=9.99519e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=80476 2021-06-19 17:00:12 | INFO | train_inner | epoch 003: 1049 / 3002 loss=2.637, ppl=6.22, wps=5901.4, ups=0.09, wpb=64789, bsz=128, num_updates=7008, lr=9.99519e-05, gnorm=2.327, loss_scale=4, train_wall=11, gb_free=2.8, wall=80487 2021-06-19 17:00:23 | INFO | train_inner | epoch 003: 1050 / 3002 loss=2.711, ppl=6.55, wps=5845.8, ups=0.09, wpb=64835, bsz=128, num_updates=7009, lr=9.99519e-05, gnorm=2.602, loss_scale=4, train_wall=11, gb_free=2.8, wall=80498 2021-06-19 17:00:35 | INFO | train_inner | epoch 003: 1051 / 3002 loss=2.449, ppl=5.46, wps=5768.2, ups=0.09, wpb=64890, bsz=128, num_updates=7010, lr=9.99519e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=80509 2021-06-19 17:00:46 | INFO | train_inner | epoch 003: 1052 / 3002 loss=2.5, ppl=5.66, wps=5893, ups=0.09, wpb=64780, bsz=128, num_updates=7011, lr=9.99519e-05, gnorm=2.166, loss_scale=4, train_wall=11, gb_free=2.8, wall=80520 2021-06-19 17:00:57 | INFO | train_inner | epoch 003: 1053 / 3002 loss=2.634, ppl=6.21, wps=5751.2, ups=0.09, wpb=64777, bsz=128, num_updates=7012, lr=9.99519e-05, gnorm=2.228, loss_scale=4, train_wall=11, gb_free=2.8, wall=80531 2021-06-19 17:01:08 | INFO | train_inner | epoch 003: 1054 / 3002 loss=2.539, ppl=5.81, wps=5837.4, ups=0.09, wpb=64813, bsz=128, num_updates=7013, lr=9.99519e-05, gnorm=2.194, loss_scale=4, train_wall=11, gb_free=2.8, wall=80542 2021-06-19 17:01:19 | INFO | train_inner | epoch 003: 1055 / 3002 loss=2.716, ppl=6.57, wps=5851.2, ups=0.09, wpb=64846, bsz=128, num_updates=7014, lr=9.99519e-05, gnorm=2.171, loss_scale=4, train_wall=11, gb_free=2.8, wall=80553 2021-06-19 17:01:30 | INFO | train_inner | epoch 003: 1056 / 3002 loss=2.567, ppl=5.92, wps=5871.6, ups=0.09, wpb=64862, bsz=128, num_updates=7015, lr=9.99519e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=80564 2021-06-19 17:01:41 | INFO | train_inner | epoch 003: 1057 / 3002 loss=2.483, ppl=5.59, wps=5932.5, ups=0.09, wpb=64856, bsz=128, num_updates=7016, lr=9.99519e-05, gnorm=2.144, loss_scale=4, train_wall=10, gb_free=2.8, wall=80575 2021-06-19 17:01:52 | INFO | train_inner | epoch 003: 1058 / 3002 loss=2.595, ppl=6.04, wps=5814.8, ups=0.09, wpb=64733, bsz=128, num_updates=7017, lr=9.99519e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=80586 2021-06-19 17:02:03 | INFO | train_inner | epoch 003: 1059 / 3002 loss=2.658, ppl=6.31, wps=5997.1, ups=0.09, wpb=64880, bsz=128, num_updates=7018, lr=9.99519e-05, gnorm=2.304, loss_scale=4, train_wall=10, gb_free=2.8, wall=80597 2021-06-19 17:02:14 | INFO | train_inner | epoch 003: 1060 / 3002 loss=2.687, ppl=6.44, wps=5855.9, ups=0.09, wpb=64727, bsz=128, num_updates=7019, lr=9.99518e-05, gnorm=2.341, loss_scale=4, train_wall=11, gb_free=2.8, wall=80608 2021-06-19 17:02:25 | INFO | train_inner | epoch 003: 1061 / 3002 loss=2.639, ppl=6.23, wps=5787.9, ups=0.09, wpb=64855, bsz=128, num_updates=7020, lr=9.99518e-05, gnorm=2.295, loss_scale=4, train_wall=11, gb_free=2.8, wall=80620 2021-06-19 17:02:36 | INFO | train_inner | epoch 003: 1062 / 3002 loss=2.631, ppl=6.19, wps=5846.5, ups=0.09, wpb=64894, bsz=128, num_updates=7021, lr=9.99518e-05, gnorm=2.176, loss_scale=4, train_wall=11, gb_free=2.8, wall=80631 2021-06-19 17:02:47 | INFO | train_inner | epoch 003: 1063 / 3002 loss=2.8, ppl=6.97, wps=5896.8, ups=0.09, wpb=64807, bsz=128, num_updates=7022, lr=9.99518e-05, gnorm=2.206, loss_scale=4, train_wall=11, gb_free=2.8, wall=80642 2021-06-19 17:02:58 | INFO | train_inner | epoch 003: 1064 / 3002 loss=2.539, ppl=5.81, wps=5837.7, ups=0.09, wpb=64834, bsz=128, num_updates=7023, lr=9.99518e-05, gnorm=2.261, loss_scale=4, train_wall=11, gb_free=2.8, wall=80653 2021-06-19 17:03:10 | INFO | train_inner | epoch 003: 1065 / 3002 loss=2.522, ppl=5.74, wps=5841.4, ups=0.09, wpb=64856, bsz=128, num_updates=7024, lr=9.99518e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=80664 2021-06-19 17:03:21 | INFO | train_inner | epoch 003: 1066 / 3002 loss=2.617, ppl=6.14, wps=5863.2, ups=0.09, wpb=64870, bsz=128, num_updates=7025, lr=9.99518e-05, gnorm=5.24, loss_scale=4, train_wall=11, gb_free=2.8, wall=80675 2021-06-19 17:03:32 | INFO | train_inner | epoch 003: 1067 / 3002 loss=2.617, ppl=6.13, wps=5737.4, ups=0.09, wpb=64840, bsz=128, num_updates=7026, lr=9.99518e-05, gnorm=2.873, loss_scale=4, train_wall=11, gb_free=2.8, wall=80686 2021-06-19 17:03:43 | INFO | train_inner | epoch 003: 1068 / 3002 loss=2.673, ppl=6.38, wps=5846.8, ups=0.09, wpb=64802, bsz=128, num_updates=7027, lr=9.99518e-05, gnorm=2.167, loss_scale=4, train_wall=11, gb_free=2.8, wall=80697 2021-06-19 17:03:54 | INFO | train_inner | epoch 003: 1069 / 3002 loss=2.734, ppl=6.65, wps=5827.7, ups=0.09, wpb=64751, bsz=128, num_updates=7028, lr=9.99518e-05, gnorm=2.491, loss_scale=4, train_wall=11, gb_free=2.8, wall=80708 2021-06-19 17:04:05 | INFO | train_inner | epoch 003: 1070 / 3002 loss=2.709, ppl=6.54, wps=5828.8, ups=0.09, wpb=64817, bsz=128, num_updates=7029, lr=9.99518e-05, gnorm=2.524, loss_scale=4, train_wall=11, gb_free=2.8, wall=80720 2021-06-19 17:04:16 | INFO | train_inner | epoch 003: 1071 / 3002 loss=2.464, ppl=5.52, wps=5888.1, ups=0.09, wpb=64839, bsz=128, num_updates=7030, lr=9.99518e-05, gnorm=2.839, loss_scale=4, train_wall=11, gb_free=2.8, wall=80731 2021-06-19 17:04:27 | INFO | train_inner | epoch 003: 1072 / 3002 loss=2.772, ppl=6.83, wps=5859.2, ups=0.09, wpb=64766, bsz=128, num_updates=7031, lr=9.99517e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=80742 2021-06-19 17:04:38 | INFO | train_inner | epoch 003: 1073 / 3002 loss=2.484, ppl=5.59, wps=5867, ups=0.09, wpb=64880, bsz=128, num_updates=7032, lr=9.99517e-05, gnorm=2.133, loss_scale=4, train_wall=11, gb_free=2.8, wall=80753 2021-06-19 17:04:50 | INFO | train_inner | epoch 003: 1074 / 3002 loss=2.644, ppl=6.25, wps=5780.7, ups=0.09, wpb=64863, bsz=128, num_updates=7033, lr=9.99517e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=80764 2021-06-19 17:05:01 | INFO | train_inner | epoch 003: 1075 / 3002 loss=2.654, ppl=6.29, wps=5856.5, ups=0.09, wpb=64801, bsz=128, num_updates=7034, lr=9.99517e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=80775 2021-06-19 17:05:12 | INFO | train_inner | epoch 003: 1076 / 3002 loss=2.815, ppl=7.04, wps=5814.1, ups=0.09, wpb=64693, bsz=128, num_updates=7035, lr=9.99517e-05, gnorm=2.807, loss_scale=4, train_wall=11, gb_free=2.8, wall=80786 2021-06-19 17:05:23 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-19 17:05:34 | INFO | train_inner | epoch 003: 1078 / 3002 loss=2.736, ppl=6.66, wps=2925.6, ups=0.05, wpb=64801, bsz=128, num_updates=7036, lr=9.99517e-05, gnorm=2.186, loss_scale=2, train_wall=21, gb_free=2.8, wall=80808 2021-06-19 17:05:45 | INFO | train_inner | epoch 003: 1079 / 3002 loss=2.805, ppl=6.99, wps=5890, ups=0.09, wpb=64794, bsz=128, num_updates=7037, lr=9.99517e-05, gnorm=2.138, loss_scale=2, train_wall=11, gb_free=2.8, wall=80819 2021-06-19 17:05:56 | INFO | train_inner | epoch 003: 1080 / 3002 loss=2.65, ppl=6.28, wps=5856.6, ups=0.09, wpb=64906, bsz=128, num_updates=7038, lr=9.99517e-05, gnorm=2.131, loss_scale=2, train_wall=11, gb_free=2.8, wall=80830 2021-06-19 17:06:07 | INFO | train_inner | epoch 003: 1081 / 3002 loss=2.777, ppl=6.86, wps=5919.5, ups=0.09, wpb=64828, bsz=128, num_updates=7039, lr=9.99517e-05, gnorm=2.192, loss_scale=2, train_wall=10, gb_free=2.8, wall=80841 2021-06-19 17:06:18 | INFO | train_inner | epoch 003: 1082 / 3002 loss=2.783, ppl=6.88, wps=5802.5, ups=0.09, wpb=64798, bsz=128, num_updates=7040, lr=9.99517e-05, gnorm=2.279, loss_scale=2, train_wall=11, gb_free=2.8, wall=80852 2021-06-19 17:06:29 | INFO | train_inner | epoch 003: 1083 / 3002 loss=2.578, ppl=5.97, wps=5947.5, ups=0.09, wpb=64820, bsz=128, num_updates=7041, lr=9.99517e-05, gnorm=2.213, loss_scale=2, train_wall=10, gb_free=2.8, wall=80863 2021-06-19 17:06:40 | INFO | train_inner | epoch 003: 1084 / 3002 loss=2.709, ppl=6.54, wps=5808.2, ups=0.09, wpb=64847, bsz=128, num_updates=7042, lr=9.99517e-05, gnorm=2.219, loss_scale=2, train_wall=11, gb_free=2.8, wall=80875 2021-06-19 17:06:51 | INFO | train_inner | epoch 003: 1085 / 3002 loss=2.649, ppl=6.27, wps=5842.5, ups=0.09, wpb=64816, bsz=128, num_updates=7043, lr=9.99517e-05, gnorm=2.657, loss_scale=2, train_wall=11, gb_free=2.8, wall=80886 2021-06-19 17:07:03 | INFO | train_inner | epoch 003: 1086 / 3002 loss=2.564, ppl=5.91, wps=5768.5, ups=0.09, wpb=64861, bsz=128, num_updates=7044, lr=9.99516e-05, gnorm=2.212, loss_scale=2, train_wall=11, gb_free=2.8, wall=80897 2021-06-19 17:07:14 | INFO | train_inner | epoch 003: 1087 / 3002 loss=2.659, ppl=6.31, wps=5741.4, ups=0.09, wpb=64824, bsz=128, num_updates=7045, lr=9.99516e-05, gnorm=2.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=80908 2021-06-19 17:07:25 | INFO | train_inner | epoch 003: 1088 / 3002 loss=2.632, ppl=6.2, wps=5708.5, ups=0.09, wpb=64790, bsz=128, num_updates=7046, lr=9.99516e-05, gnorm=2.312, loss_scale=2, train_wall=11, gb_free=2.8, wall=80919 2021-06-19 17:07:36 | INFO | train_inner | epoch 003: 1089 / 3002 loss=2.691, ppl=6.46, wps=5834.1, ups=0.09, wpb=64786, bsz=128, num_updates=7047, lr=9.99516e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=80931 2021-06-19 17:07:47 | INFO | train_inner | epoch 003: 1090 / 3002 loss=2.674, ppl=6.38, wps=5891.8, ups=0.09, wpb=64830, bsz=128, num_updates=7048, lr=9.99516e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=80942 2021-06-19 17:07:59 | INFO | train_inner | epoch 003: 1091 / 3002 loss=2.817, ppl=7.05, wps=5747.9, ups=0.09, wpb=64794, bsz=128, num_updates=7049, lr=9.99516e-05, gnorm=2.097, loss_scale=2, train_wall=11, gb_free=2.8, wall=80953 2021-06-19 17:08:10 | INFO | train_inner | epoch 003: 1092 / 3002 loss=2.414, ppl=5.33, wps=5785.8, ups=0.09, wpb=64773, bsz=128, num_updates=7050, lr=9.99516e-05, gnorm=2.338, loss_scale=2, train_wall=11, gb_free=2.8, wall=80964 2021-06-19 17:08:21 | INFO | train_inner | epoch 003: 1093 / 3002 loss=2.728, ppl=6.63, wps=5809.6, ups=0.09, wpb=64855, bsz=128, num_updates=7051, lr=9.99516e-05, gnorm=2.147, loss_scale=2, train_wall=11, gb_free=2.8, wall=80975 2021-06-19 17:08:32 | INFO | train_inner | epoch 003: 1094 / 3002 loss=2.693, ppl=6.47, wps=5927.8, ups=0.09, wpb=64809, bsz=128, num_updates=7052, lr=9.99516e-05, gnorm=2.069, loss_scale=2, train_wall=10, gb_free=2.8, wall=80986 2021-06-19 17:08:43 | INFO | train_inner | epoch 003: 1095 / 3002 loss=2.627, ppl=6.18, wps=5863.7, ups=0.09, wpb=64839, bsz=128, num_updates=7053, lr=9.99516e-05, gnorm=2.178, loss_scale=2, train_wall=11, gb_free=2.8, wall=80997 2021-06-19 17:08:54 | INFO | train_inner | epoch 003: 1096 / 3002 loss=2.652, ppl=6.29, wps=5830.5, ups=0.09, wpb=64882, bsz=128, num_updates=7054, lr=9.99516e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=81008 2021-06-19 17:09:05 | INFO | train_inner | epoch 003: 1097 / 3002 loss=2.538, ppl=5.81, wps=5919.1, ups=0.09, wpb=64871, bsz=128, num_updates=7055, lr=9.99516e-05, gnorm=2.136, loss_scale=2, train_wall=11, gb_free=2.8, wall=81019 2021-06-19 17:09:16 | INFO | train_inner | epoch 003: 1098 / 3002 loss=2.604, ppl=6.08, wps=5922, ups=0.09, wpb=64873, bsz=128, num_updates=7056, lr=9.99515e-05, gnorm=2.304, loss_scale=2, train_wall=11, gb_free=2.8, wall=81030 2021-06-19 17:09:27 | INFO | train_inner | epoch 003: 1099 / 3002 loss=2.591, ppl=6.03, wps=5925.5, ups=0.09, wpb=64747, bsz=128, num_updates=7057, lr=9.99515e-05, gnorm=2.494, loss_scale=2, train_wall=10, gb_free=2.8, wall=81041 2021-06-19 17:09:38 | INFO | train_inner | epoch 003: 1100 / 3002 loss=2.668, ppl=6.36, wps=5724.7, ups=0.09, wpb=64783, bsz=128, num_updates=7058, lr=9.99515e-05, gnorm=2.107, loss_scale=2, train_wall=11, gb_free=2.8, wall=81053 2021-06-19 17:09:49 | INFO | train_inner | epoch 003: 1101 / 3002 loss=2.786, ppl=6.9, wps=5757.7, ups=0.09, wpb=64807, bsz=128, num_updates=7059, lr=9.99515e-05, gnorm=17.904, loss_scale=2, train_wall=11, gb_free=2.8, wall=81064 2021-06-19 17:10:00 | INFO | train_inner | epoch 003: 1102 / 3002 loss=2.589, ppl=6.02, wps=5907.9, ups=0.09, wpb=64894, bsz=128, num_updates=7060, lr=9.99515e-05, gnorm=2.575, loss_scale=2, train_wall=11, gb_free=2.8, wall=81075 2021-06-19 17:10:11 | INFO | train_inner | epoch 003: 1103 / 3002 loss=2.698, ppl=6.49, wps=5879.6, ups=0.09, wpb=64849, bsz=128, num_updates=7061, lr=9.99515e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=81086 2021-06-19 17:10:22 | INFO | train_inner | epoch 003: 1104 / 3002 loss=2.57, ppl=5.94, wps=5883.3, ups=0.09, wpb=64814, bsz=128, num_updates=7062, lr=9.99515e-05, gnorm=2.115, loss_scale=2, train_wall=11, gb_free=2.8, wall=81097 2021-06-19 17:10:34 | INFO | train_inner | epoch 003: 1105 / 3002 loss=2.604, ppl=6.08, wps=5848.6, ups=0.09, wpb=64812, bsz=128, num_updates=7063, lr=9.99515e-05, gnorm=2.157, loss_scale=2, train_wall=11, gb_free=2.8, wall=81108 2021-06-19 17:10:45 | INFO | train_inner | epoch 003: 1106 / 3002 loss=2.542, ppl=5.82, wps=5820.8, ups=0.09, wpb=64845, bsz=128, num_updates=7064, lr=9.99515e-05, gnorm=2.21, loss_scale=2, train_wall=11, gb_free=2.8, wall=81119 2021-06-19 17:10:56 | INFO | train_inner | epoch 003: 1107 / 3002 loss=2.818, ppl=7.05, wps=5956.1, ups=0.09, wpb=64883, bsz=128, num_updates=7065, lr=9.99515e-05, gnorm=3.097, loss_scale=2, train_wall=10, gb_free=2.8, wall=81130 2021-06-19 17:11:07 | INFO | train_inner | epoch 003: 1108 / 3002 loss=2.643, ppl=6.25, wps=5903.1, ups=0.09, wpb=64803, bsz=128, num_updates=7066, lr=9.99515e-05, gnorm=2.197, loss_scale=2, train_wall=11, gb_free=2.8, wall=81141 2021-06-19 17:11:17 | INFO | train_inner | epoch 003: 1109 / 3002 loss=2.635, ppl=6.21, wps=5981.4, ups=0.09, wpb=64832, bsz=128, num_updates=7067, lr=9.99515e-05, gnorm=2.355, loss_scale=2, train_wall=10, gb_free=2.8, wall=81152 2021-06-19 17:11:28 | INFO | train_inner | epoch 003: 1110 / 3002 loss=2.738, ppl=6.67, wps=5862.9, ups=0.09, wpb=64853, bsz=128, num_updates=7068, lr=9.99515e-05, gnorm=2.435, loss_scale=2, train_wall=11, gb_free=2.8, wall=81163 2021-06-19 17:11:39 | INFO | train_inner | epoch 003: 1111 / 3002 loss=2.71, ppl=6.54, wps=5924.8, ups=0.09, wpb=64879, bsz=128, num_updates=7069, lr=9.99514e-05, gnorm=2.425, loss_scale=2, train_wall=10, gb_free=2.8, wall=81174 2021-06-19 17:11:50 | INFO | train_inner | epoch 003: 1112 / 3002 loss=2.637, ppl=6.22, wps=5868.8, ups=0.09, wpb=64873, bsz=128, num_updates=7070, lr=9.99514e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=81185 2021-06-19 17:12:02 | INFO | train_inner | epoch 003: 1113 / 3002 loss=2.635, ppl=6.21, wps=5770, ups=0.09, wpb=64821, bsz=128, num_updates=7071, lr=9.99514e-05, gnorm=3.114, loss_scale=2, train_wall=11, gb_free=2.8, wall=81196 2021-06-19 17:12:13 | INFO | train_inner | epoch 003: 1114 / 3002 loss=2.651, ppl=6.28, wps=5812, ups=0.09, wpb=64811, bsz=128, num_updates=7072, lr=9.99514e-05, gnorm=2.202, loss_scale=2, train_wall=11, gb_free=2.8, wall=81207 2021-06-19 17:12:24 | INFO | train_inner | epoch 003: 1115 / 3002 loss=2.751, ppl=6.73, wps=5983.1, ups=0.09, wpb=64900, bsz=128, num_updates=7073, lr=9.99514e-05, gnorm=2.493, loss_scale=2, train_wall=10, gb_free=2.8, wall=81218 2021-06-19 17:12:35 | INFO | train_inner | epoch 003: 1116 / 3002 loss=2.624, ppl=6.16, wps=5813.6, ups=0.09, wpb=64764, bsz=128, num_updates=7074, lr=9.99514e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=81229 2021-06-19 17:12:46 | INFO | train_inner | epoch 003: 1117 / 3002 loss=2.744, ppl=6.7, wps=5786.6, ups=0.09, wpb=64812, bsz=128, num_updates=7075, lr=9.99514e-05, gnorm=6.106, loss_scale=2, train_wall=11, gb_free=2.8, wall=81240 2021-06-19 17:12:57 | INFO | train_inner | epoch 003: 1118 / 3002 loss=2.748, ppl=6.72, wps=5893, ups=0.09, wpb=64835, bsz=128, num_updates=7076, lr=9.99514e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=81251 2021-06-19 17:13:08 | INFO | train_inner | epoch 003: 1119 / 3002 loss=2.735, ppl=6.66, wps=5939.3, ups=0.09, wpb=64897, bsz=128, num_updates=7077, lr=9.99514e-05, gnorm=2.204, loss_scale=2, train_wall=10, gb_free=2.8, wall=81262 2021-06-19 17:13:19 | INFO | train_inner | epoch 003: 1120 / 3002 loss=2.791, ppl=6.92, wps=5811.4, ups=0.09, wpb=64838, bsz=128, num_updates=7078, lr=9.99514e-05, gnorm=3.496, loss_scale=2, train_wall=11, gb_free=2.8, wall=81273 2021-06-19 17:13:30 | INFO | train_inner | epoch 003: 1121 / 3002 loss=2.607, ppl=6.09, wps=5728.1, ups=0.09, wpb=64845, bsz=128, num_updates=7079, lr=9.99514e-05, gnorm=2.351, loss_scale=2, train_wall=11, gb_free=2.8, wall=81285 2021-06-19 17:13:42 | INFO | train_inner | epoch 003: 1122 / 3002 loss=2.541, ppl=5.82, wps=5807.7, ups=0.09, wpb=64743, bsz=128, num_updates=7080, lr=9.99514e-05, gnorm=2.537, loss_scale=2, train_wall=11, gb_free=2.8, wall=81296 2021-06-19 17:13:53 | INFO | train_inner | epoch 003: 1123 / 3002 loss=2.595, ppl=6.04, wps=5823.2, ups=0.09, wpb=64756, bsz=128, num_updates=7081, lr=9.99513e-05, gnorm=2.172, loss_scale=2, train_wall=11, gb_free=2.8, wall=81307 2021-06-19 17:14:04 | INFO | train_inner | epoch 003: 1124 / 3002 loss=2.659, ppl=6.32, wps=5657.7, ups=0.09, wpb=64832, bsz=128, num_updates=7082, lr=9.99513e-05, gnorm=2.543, loss_scale=2, train_wall=11, gb_free=2.8, wall=81319 2021-06-19 17:14:15 | INFO | train_inner | epoch 003: 1125 / 3002 loss=2.783, ppl=6.88, wps=5956.6, ups=0.09, wpb=64818, bsz=128, num_updates=7083, lr=9.99513e-05, gnorm=3.05, loss_scale=2, train_wall=10, gb_free=2.8, wall=81329 2021-06-19 17:14:26 | INFO | train_inner | epoch 003: 1126 / 3002 loss=2.455, ppl=5.48, wps=5799.9, ups=0.09, wpb=64850, bsz=128, num_updates=7084, lr=9.99513e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=81341 2021-06-19 17:14:37 | INFO | train_inner | epoch 003: 1127 / 3002 loss=2.619, ppl=6.14, wps=5767.3, ups=0.09, wpb=64778, bsz=128, num_updates=7085, lr=9.99513e-05, gnorm=2.367, loss_scale=2, train_wall=11, gb_free=2.8, wall=81352 2021-06-19 17:14:48 | INFO | train_inner | epoch 003: 1128 / 3002 loss=2.643, ppl=6.25, wps=5900.4, ups=0.09, wpb=64817, bsz=128, num_updates=7086, lr=9.99513e-05, gnorm=2.143, loss_scale=2, train_wall=11, gb_free=2.8, wall=81363 2021-06-19 17:15:00 | INFO | train_inner | epoch 003: 1129 / 3002 loss=2.68, ppl=6.41, wps=5786.3, ups=0.09, wpb=64929, bsz=128, num_updates=7087, lr=9.99513e-05, gnorm=2.251, loss_scale=2, train_wall=11, gb_free=2.8, wall=81374 2021-06-19 17:15:11 | INFO | train_inner | epoch 003: 1130 / 3002 loss=2.634, ppl=6.21, wps=5769.1, ups=0.09, wpb=64900, bsz=128, num_updates=7088, lr=9.99513e-05, gnorm=2.383, loss_scale=2, train_wall=11, gb_free=2.8, wall=81385 2021-06-19 17:15:22 | INFO | train_inner | epoch 003: 1131 / 3002 loss=2.599, ppl=6.06, wps=5794, ups=0.09, wpb=64825, bsz=128, num_updates=7089, lr=9.99513e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=81396 2021-06-19 17:15:33 | INFO | train_inner | epoch 003: 1132 / 3002 loss=2.627, ppl=6.18, wps=5917.7, ups=0.09, wpb=64862, bsz=128, num_updates=7090, lr=9.99513e-05, gnorm=2.642, loss_scale=2, train_wall=10, gb_free=2.8, wall=81407 2021-06-19 17:15:44 | INFO | train_inner | epoch 003: 1133 / 3002 loss=2.575, ppl=5.96, wps=5764.3, ups=0.09, wpb=64768, bsz=128, num_updates=7091, lr=9.99513e-05, gnorm=2.065, loss_scale=2, train_wall=11, gb_free=2.8, wall=81419 2021-06-19 17:15:55 | INFO | train_inner | epoch 003: 1134 / 3002 loss=2.814, ppl=7.03, wps=5837.1, ups=0.09, wpb=64798, bsz=128, num_updates=7092, lr=9.99513e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=81430 2021-06-19 17:16:06 | INFO | train_inner | epoch 003: 1135 / 3002 loss=2.636, ppl=6.21, wps=5887.2, ups=0.09, wpb=64848, bsz=128, num_updates=7093, lr=9.99513e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=81441 2021-06-19 17:16:18 | INFO | train_inner | epoch 003: 1136 / 3002 loss=2.644, ppl=6.25, wps=5812.8, ups=0.09, wpb=64839, bsz=128, num_updates=7094, lr=9.99512e-05, gnorm=2.288, loss_scale=2, train_wall=11, gb_free=2.8, wall=81452 2021-06-19 17:16:29 | INFO | train_inner | epoch 003: 1137 / 3002 loss=2.742, ppl=6.69, wps=5910, ups=0.09, wpb=64810, bsz=128, num_updates=7095, lr=9.99512e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=81463 2021-06-19 17:16:40 | INFO | train_inner | epoch 003: 1138 / 3002 loss=2.691, ppl=6.46, wps=5801.8, ups=0.09, wpb=64836, bsz=128, num_updates=7096, lr=9.99512e-05, gnorm=2.222, loss_scale=2, train_wall=11, gb_free=2.8, wall=81474 2021-06-19 17:16:51 | INFO | train_inner | epoch 003: 1139 / 3002 loss=2.672, ppl=6.38, wps=5945.6, ups=0.09, wpb=64898, bsz=128, num_updates=7097, lr=9.99512e-05, gnorm=2.398, loss_scale=2, train_wall=10, gb_free=2.8, wall=81485 2021-06-19 17:17:02 | INFO | train_inner | epoch 003: 1140 / 3002 loss=2.651, ppl=6.28, wps=5874.2, ups=0.09, wpb=64774, bsz=128, num_updates=7098, lr=9.99512e-05, gnorm=2.289, loss_scale=2, train_wall=11, gb_free=2.8, wall=81496 2021-06-19 17:17:13 | INFO | train_inner | epoch 003: 1141 / 3002 loss=2.719, ppl=6.58, wps=5898.4, ups=0.09, wpb=64890, bsz=128, num_updates=7099, lr=9.99512e-05, gnorm=2.984, loss_scale=2, train_wall=11, gb_free=2.8, wall=81507 2021-06-19 17:17:24 | INFO | train_inner | epoch 003: 1142 / 3002 loss=2.741, ppl=6.68, wps=5781.4, ups=0.09, wpb=64748, bsz=128, num_updates=7100, lr=9.99512e-05, gnorm=2.168, loss_scale=2, train_wall=11, gb_free=2.8, wall=81518 2021-06-19 17:17:35 | INFO | train_inner | epoch 003: 1143 / 3002 loss=2.818, ppl=7.05, wps=5866.5, ups=0.09, wpb=64803, bsz=128, num_updates=7101, lr=9.99512e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=81529 2021-06-19 17:17:46 | INFO | train_inner | epoch 003: 1144 / 3002 loss=2.518, ppl=5.73, wps=5693.1, ups=0.09, wpb=64750, bsz=128, num_updates=7102, lr=9.99512e-05, gnorm=2.729, loss_scale=2, train_wall=11, gb_free=2.8, wall=81541 2021-06-19 17:17:57 | INFO | train_inner | epoch 003: 1145 / 3002 loss=2.678, ppl=6.4, wps=5824, ups=0.09, wpb=64796, bsz=128, num_updates=7103, lr=9.99512e-05, gnorm=2.256, loss_scale=2, train_wall=11, gb_free=2.8, wall=81552 2021-06-19 17:18:09 | INFO | train_inner | epoch 003: 1146 / 3002 loss=2.644, ppl=6.25, wps=5805.7, ups=0.09, wpb=64725, bsz=128, num_updates=7104, lr=9.99512e-05, gnorm=2.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=81563 2021-06-19 17:18:20 | INFO | train_inner | epoch 003: 1147 / 3002 loss=2.562, ppl=5.9, wps=5754, ups=0.09, wpb=64855, bsz=128, num_updates=7105, lr=9.99512e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=81574 2021-06-19 17:18:31 | INFO | train_inner | epoch 003: 1148 / 3002 loss=2.628, ppl=6.18, wps=5949.6, ups=0.09, wpb=64808, bsz=128, num_updates=7106, lr=9.99511e-05, gnorm=2.295, loss_scale=2, train_wall=10, gb_free=2.8, wall=81585 2021-06-19 17:18:42 | INFO | train_inner | epoch 003: 1149 / 3002 loss=2.689, ppl=6.45, wps=5832.2, ups=0.09, wpb=64771, bsz=128, num_updates=7107, lr=9.99511e-05, gnorm=2.123, loss_scale=2, train_wall=11, gb_free=2.8, wall=81596 2021-06-19 17:18:53 | INFO | train_inner | epoch 003: 1150 / 3002 loss=2.523, ppl=5.75, wps=5798.1, ups=0.09, wpb=64760, bsz=128, num_updates=7108, lr=9.99511e-05, gnorm=2.103, loss_scale=2, train_wall=11, gb_free=2.8, wall=81607 2021-06-19 17:19:04 | INFO | train_inner | epoch 003: 1151 / 3002 loss=2.616, ppl=6.13, wps=5724.9, ups=0.09, wpb=64809, bsz=128, num_updates=7109, lr=9.99511e-05, gnorm=3.054, loss_scale=2, train_wall=11, gb_free=2.8, wall=81619 2021-06-19 17:19:16 | INFO | train_inner | epoch 003: 1152 / 3002 loss=2.527, ppl=5.76, wps=5793.2, ups=0.09, wpb=64884, bsz=128, num_updates=7110, lr=9.99511e-05, gnorm=2.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=81630 2021-06-19 17:19:27 | INFO | train_inner | epoch 003: 1153 / 3002 loss=2.593, ppl=6.03, wps=5713.2, ups=0.09, wpb=64833, bsz=128, num_updates=7111, lr=9.99511e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=81641 2021-06-19 17:19:38 | INFO | train_inner | epoch 003: 1154 / 3002 loss=2.652, ppl=6.29, wps=5863.1, ups=0.09, wpb=64728, bsz=128, num_updates=7112, lr=9.99511e-05, gnorm=2.53, loss_scale=2, train_wall=11, gb_free=2.8, wall=81652 2021-06-19 17:19:49 | INFO | train_inner | epoch 003: 1155 / 3002 loss=2.705, ppl=6.52, wps=5811, ups=0.09, wpb=64793, bsz=128, num_updates=7113, lr=9.99511e-05, gnorm=2.588, loss_scale=2, train_wall=11, gb_free=2.8, wall=81663 2021-06-19 17:20:00 | INFO | train_inner | epoch 003: 1156 / 3002 loss=2.602, ppl=6.07, wps=5765.7, ups=0.09, wpb=64803, bsz=128, num_updates=7114, lr=9.99511e-05, gnorm=2.121, loss_scale=2, train_wall=11, gb_free=2.8, wall=81675 2021-06-19 17:20:11 | INFO | train_inner | epoch 003: 1157 / 3002 loss=2.647, ppl=6.26, wps=5959, ups=0.09, wpb=64804, bsz=128, num_updates=7115, lr=9.99511e-05, gnorm=2.275, loss_scale=2, train_wall=10, gb_free=2.8, wall=81686 2021-06-19 17:20:22 | INFO | train_inner | epoch 003: 1158 / 3002 loss=2.511, ppl=5.7, wps=5910.5, ups=0.09, wpb=64847, bsz=128, num_updates=7116, lr=9.99511e-05, gnorm=2.56, loss_scale=2, train_wall=10, gb_free=2.8, wall=81696 2021-06-19 17:20:33 | INFO | train_inner | epoch 003: 1159 / 3002 loss=2.695, ppl=6.48, wps=5821.5, ups=0.09, wpb=64896, bsz=128, num_updates=7117, lr=9.99511e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=81708 2021-06-19 17:20:44 | INFO | train_inner | epoch 003: 1160 / 3002 loss=2.699, ppl=6.49, wps=5976.5, ups=0.09, wpb=64894, bsz=128, num_updates=7118, lr=9.99511e-05, gnorm=2.111, loss_scale=2, train_wall=10, gb_free=2.8, wall=81718 2021-06-19 17:20:55 | INFO | train_inner | epoch 003: 1161 / 3002 loss=2.754, ppl=6.75, wps=5825.1, ups=0.09, wpb=64826, bsz=128, num_updates=7119, lr=9.9951e-05, gnorm=13.478, loss_scale=2, train_wall=11, gb_free=2.8, wall=81730 2021-06-19 17:21:06 | INFO | train_inner | epoch 003: 1162 / 3002 loss=2.793, ppl=6.93, wps=5804.8, ups=0.09, wpb=64822, bsz=128, num_updates=7120, lr=9.9951e-05, gnorm=2.17, loss_scale=2, train_wall=11, gb_free=2.8, wall=81741 2021-06-19 17:21:17 | INFO | train_inner | epoch 003: 1163 / 3002 loss=2.71, ppl=6.54, wps=6007.8, ups=0.09, wpb=64863, bsz=128, num_updates=7121, lr=9.9951e-05, gnorm=2.081, loss_scale=2, train_wall=10, gb_free=2.8, wall=81752 2021-06-19 17:21:28 | INFO | train_inner | epoch 003: 1164 / 3002 loss=2.698, ppl=6.49, wps=5785.2, ups=0.09, wpb=64827, bsz=128, num_updates=7122, lr=9.9951e-05, gnorm=2.16, loss_scale=2, train_wall=11, gb_free=2.8, wall=81763 2021-06-19 17:21:40 | INFO | train_inner | epoch 003: 1165 / 3002 loss=2.7, ppl=6.5, wps=5846.8, ups=0.09, wpb=64859, bsz=128, num_updates=7123, lr=9.9951e-05, gnorm=4.11, loss_scale=2, train_wall=11, gb_free=2.8, wall=81774 2021-06-19 17:21:50 | INFO | train_inner | epoch 003: 1166 / 3002 loss=2.613, ppl=6.12, wps=5935.2, ups=0.09, wpb=64805, bsz=128, num_updates=7124, lr=9.9951e-05, gnorm=2.143, loss_scale=2, train_wall=10, gb_free=2.8, wall=81785 2021-06-19 17:22:01 | INFO | train_inner | epoch 003: 1167 / 3002 loss=2.78, ppl=6.87, wps=5967.6, ups=0.09, wpb=64839, bsz=128, num_updates=7125, lr=9.9951e-05, gnorm=2.253, loss_scale=2, train_wall=10, gb_free=2.8, wall=81796 2021-06-19 17:22:13 | INFO | train_inner | epoch 003: 1168 / 3002 loss=2.659, ppl=6.32, wps=5767.5, ups=0.09, wpb=64841, bsz=128, num_updates=7126, lr=9.9951e-05, gnorm=2.353, loss_scale=2, train_wall=11, gb_free=2.8, wall=81807 2021-06-19 17:22:24 | INFO | train_inner | epoch 003: 1169 / 3002 loss=2.624, ppl=6.16, wps=5917.3, ups=0.09, wpb=64915, bsz=128, num_updates=7127, lr=9.9951e-05, gnorm=2.295, loss_scale=2, train_wall=11, gb_free=2.8, wall=81818 2021-06-19 17:22:35 | INFO | train_inner | epoch 003: 1170 / 3002 loss=2.762, ppl=6.78, wps=5823.4, ups=0.09, wpb=64844, bsz=128, num_updates=7128, lr=9.9951e-05, gnorm=2.35, loss_scale=2, train_wall=11, gb_free=2.8, wall=81829 2021-06-19 17:22:46 | INFO | train_inner | epoch 003: 1171 / 3002 loss=2.641, ppl=6.24, wps=5806.1, ups=0.09, wpb=64849, bsz=128, num_updates=7129, lr=9.9951e-05, gnorm=2.7, loss_scale=2, train_wall=11, gb_free=2.8, wall=81840 2021-06-19 17:22:57 | INFO | train_inner | epoch 003: 1172 / 3002 loss=2.664, ppl=6.34, wps=5825.7, ups=0.09, wpb=64796, bsz=128, num_updates=7130, lr=9.9951e-05, gnorm=2.738, loss_scale=2, train_wall=11, gb_free=2.8, wall=81851 2021-06-19 17:23:08 | INFO | train_inner | epoch 003: 1173 / 3002 loss=2.573, ppl=5.95, wps=5790.4, ups=0.09, wpb=64843, bsz=128, num_updates=7131, lr=9.99509e-05, gnorm=2.151, loss_scale=2, train_wall=11, gb_free=2.8, wall=81863 2021-06-19 17:23:19 | INFO | train_inner | epoch 003: 1174 / 3002 loss=2.569, ppl=5.93, wps=5867.7, ups=0.09, wpb=64862, bsz=128, num_updates=7132, lr=9.99509e-05, gnorm=2.211, loss_scale=2, train_wall=11, gb_free=2.8, wall=81874 2021-06-19 17:23:30 | INFO | train_inner | epoch 003: 1175 / 3002 loss=2.728, ppl=6.63, wps=5912.1, ups=0.09, wpb=64826, bsz=128, num_updates=7133, lr=9.99509e-05, gnorm=2.496, loss_scale=2, train_wall=11, gb_free=2.8, wall=81885 2021-06-19 17:23:41 | INFO | train_inner | epoch 003: 1176 / 3002 loss=2.697, ppl=6.49, wps=5806.4, ups=0.09, wpb=64737, bsz=128, num_updates=7134, lr=9.99509e-05, gnorm=6.058, loss_scale=2, train_wall=11, gb_free=2.8, wall=81896 2021-06-19 17:23:52 | INFO | train_inner | epoch 003: 1177 / 3002 loss=2.708, ppl=6.53, wps=5981.2, ups=0.09, wpb=64835, bsz=128, num_updates=7135, lr=9.99509e-05, gnorm=2.463, loss_scale=2, train_wall=10, gb_free=2.8, wall=81907 2021-06-19 17:24:03 | INFO | train_inner | epoch 003: 1178 / 3002 loss=2.66, ppl=6.32, wps=5795.4, ups=0.09, wpb=64818, bsz=128, num_updates=7136, lr=9.99509e-05, gnorm=2.248, loss_scale=2, train_wall=11, gb_free=2.8, wall=81918 2021-06-19 17:24:14 | INFO | train_inner | epoch 003: 1179 / 3002 loss=2.531, ppl=5.78, wps=6035.6, ups=0.09, wpb=64932, bsz=128, num_updates=7137, lr=9.99509e-05, gnorm=2.621, loss_scale=2, train_wall=10, gb_free=2.8, wall=81928 2021-06-19 17:24:25 | INFO | train_inner | epoch 003: 1180 / 3002 loss=2.544, ppl=5.83, wps=5774.3, ups=0.09, wpb=64813, bsz=128, num_updates=7138, lr=9.99509e-05, gnorm=2.062, loss_scale=2, train_wall=11, gb_free=2.8, wall=81940 2021-06-19 17:24:37 | INFO | train_inner | epoch 003: 1181 / 3002 loss=2.58, ppl=5.98, wps=5729.6, ups=0.09, wpb=64855, bsz=128, num_updates=7139, lr=9.99509e-05, gnorm=2.127, loss_scale=2, train_wall=11, gb_free=2.8, wall=81951 2021-06-19 17:24:48 | INFO | train_inner | epoch 003: 1182 / 3002 loss=2.631, ppl=6.19, wps=5945.1, ups=0.09, wpb=64857, bsz=128, num_updates=7140, lr=9.99509e-05, gnorm=2.159, loss_scale=2, train_wall=10, gb_free=2.8, wall=81962 2021-06-19 17:24:59 | INFO | train_inner | epoch 003: 1183 / 3002 loss=2.626, ppl=6.17, wps=5868, ups=0.09, wpb=64794, bsz=128, num_updates=7141, lr=9.99509e-05, gnorm=2.211, loss_scale=2, train_wall=11, gb_free=2.8, wall=81973 2021-06-19 17:25:10 | INFO | train_inner | epoch 003: 1184 / 3002 loss=2.765, ppl=6.8, wps=5831.9, ups=0.09, wpb=64834, bsz=128, num_updates=7142, lr=9.99509e-05, gnorm=2.677, loss_scale=2, train_wall=11, gb_free=2.8, wall=81984 2021-06-19 17:25:21 | INFO | train_inner | epoch 003: 1185 / 3002 loss=2.656, ppl=6.3, wps=5790.8, ups=0.09, wpb=64866, bsz=128, num_updates=7143, lr=9.99509e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=81995 2021-06-19 17:25:32 | INFO | train_inner | epoch 003: 1186 / 3002 loss=2.812, ppl=7.02, wps=5842.9, ups=0.09, wpb=64875, bsz=128, num_updates=7144, lr=9.99508e-05, gnorm=2.26, loss_scale=2, train_wall=11, gb_free=2.8, wall=82006 2021-06-19 17:25:43 | INFO | train_inner | epoch 003: 1187 / 3002 loss=2.637, ppl=6.22, wps=5953.2, ups=0.09, wpb=64823, bsz=128, num_updates=7145, lr=9.99508e-05, gnorm=2.283, loss_scale=2, train_wall=10, gb_free=2.8, wall=82017 2021-06-19 17:25:54 | INFO | train_inner | epoch 003: 1188 / 3002 loss=2.639, ppl=6.23, wps=5924.5, ups=0.09, wpb=64865, bsz=128, num_updates=7146, lr=9.99508e-05, gnorm=3.032, loss_scale=2, train_wall=10, gb_free=2.8, wall=82028 2021-06-19 17:26:05 | INFO | train_inner | epoch 003: 1189 / 3002 loss=2.513, ppl=5.71, wps=5918.8, ups=0.09, wpb=64856, bsz=128, num_updates=7147, lr=9.99508e-05, gnorm=2.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=82039 2021-06-19 17:26:16 | INFO | train_inner | epoch 003: 1190 / 3002 loss=2.625, ppl=6.17, wps=5802.4, ups=0.09, wpb=64780, bsz=128, num_updates=7148, lr=9.99508e-05, gnorm=2.218, loss_scale=2, train_wall=11, gb_free=2.8, wall=82050 2021-06-19 17:26:27 | INFO | train_inner | epoch 003: 1191 / 3002 loss=2.66, ppl=6.32, wps=5789.9, ups=0.09, wpb=64802, bsz=128, num_updates=7149, lr=9.99508e-05, gnorm=2.262, loss_scale=2, train_wall=11, gb_free=2.8, wall=82062 2021-06-19 17:26:38 | INFO | train_inner | epoch 003: 1192 / 3002 loss=2.694, ppl=6.47, wps=5968.1, ups=0.09, wpb=64898, bsz=128, num_updates=7150, lr=9.99508e-05, gnorm=2.714, loss_scale=2, train_wall=10, gb_free=2.8, wall=82072 2021-06-19 17:26:49 | INFO | train_inner | epoch 003: 1193 / 3002 loss=2.64, ppl=6.23, wps=5873.3, ups=0.09, wpb=64823, bsz=128, num_updates=7151, lr=9.99508e-05, gnorm=2.273, loss_scale=2, train_wall=11, gb_free=2.8, wall=82083 2021-06-19 17:27:00 | INFO | train_inner | epoch 003: 1194 / 3002 loss=2.667, ppl=6.35, wps=5785.1, ups=0.09, wpb=64840, bsz=128, num_updates=7152, lr=9.99508e-05, gnorm=2.912, loss_scale=2, train_wall=11, gb_free=2.8, wall=82095 2021-06-19 17:27:12 | INFO | train_inner | epoch 003: 1195 / 3002 loss=2.648, ppl=6.27, wps=5756.9, ups=0.09, wpb=64864, bsz=128, num_updates=7153, lr=9.99508e-05, gnorm=2.354, loss_scale=2, train_wall=11, gb_free=2.8, wall=82106 2021-06-19 17:27:23 | INFO | train_inner | epoch 003: 1196 / 3002 loss=2.463, ppl=5.51, wps=5887.4, ups=0.09, wpb=64896, bsz=128, num_updates=7154, lr=9.99508e-05, gnorm=2.198, loss_scale=2, train_wall=11, gb_free=2.8, wall=82117 2021-06-19 17:27:34 | INFO | train_inner | epoch 003: 1197 / 3002 loss=2.808, ppl=7, wps=5919.8, ups=0.09, wpb=64843, bsz=128, num_updates=7155, lr=9.99508e-05, gnorm=2.115, loss_scale=2, train_wall=10, gb_free=2.8, wall=82128 2021-06-19 17:27:45 | INFO | train_inner | epoch 003: 1198 / 3002 loss=2.617, ppl=6.14, wps=5918.9, ups=0.09, wpb=64839, bsz=128, num_updates=7156, lr=9.99507e-05, gnorm=9.55, loss_scale=2, train_wall=11, gb_free=2.8, wall=82139 2021-06-19 17:27:56 | INFO | train_inner | epoch 003: 1199 / 3002 loss=2.614, ppl=6.12, wps=5823.9, ups=0.09, wpb=64773, bsz=128, num_updates=7157, lr=9.99507e-05, gnorm=2.224, loss_scale=2, train_wall=11, gb_free=2.8, wall=82150 2021-06-19 17:28:07 | INFO | train_inner | epoch 003: 1200 / 3002 loss=2.606, ppl=6.09, wps=5897.6, ups=0.09, wpb=64807, bsz=128, num_updates=7158, lr=9.99507e-05, gnorm=2.118, loss_scale=2, train_wall=10, gb_free=2.8, wall=82161 2021-06-19 17:28:18 | INFO | train_inner | epoch 003: 1201 / 3002 loss=2.586, ppl=6, wps=5795.3, ups=0.09, wpb=64902, bsz=128, num_updates=7159, lr=9.99507e-05, gnorm=2.147, loss_scale=2, train_wall=11, gb_free=2.8, wall=82172 2021-06-19 17:28:29 | INFO | train_inner | epoch 003: 1202 / 3002 loss=2.736, ppl=6.66, wps=5918, ups=0.09, wpb=64759, bsz=128, num_updates=7160, lr=9.99507e-05, gnorm=2.239, loss_scale=2, train_wall=10, gb_free=2.8, wall=82183 2021-06-19 17:28:40 | INFO | train_inner | epoch 003: 1203 / 3002 loss=2.662, ppl=6.33, wps=5858.8, ups=0.09, wpb=64911, bsz=128, num_updates=7161, lr=9.99507e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=82194 2021-06-19 17:28:51 | INFO | train_inner | epoch 003: 1204 / 3002 loss=2.527, ppl=5.76, wps=5830.2, ups=0.09, wpb=64886, bsz=128, num_updates=7162, lr=9.99507e-05, gnorm=2.3, loss_scale=2, train_wall=11, gb_free=2.8, wall=82205 2021-06-19 17:29:02 | INFO | train_inner | epoch 003: 1205 / 3002 loss=2.713, ppl=6.55, wps=5897.4, ups=0.09, wpb=64874, bsz=128, num_updates=7163, lr=9.99507e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=82216 2021-06-19 17:29:13 | INFO | train_inner | epoch 003: 1206 / 3002 loss=2.546, ppl=5.84, wps=5784.5, ups=0.09, wpb=64782, bsz=128, num_updates=7164, lr=9.99507e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=82228 2021-06-19 17:29:24 | INFO | train_inner | epoch 003: 1207 / 3002 loss=2.594, ppl=6.04, wps=5819.7, ups=0.09, wpb=64904, bsz=128, num_updates=7165, lr=9.99507e-05, gnorm=2.104, loss_scale=4, train_wall=11, gb_free=2.8, wall=82239 2021-06-19 17:29:35 | INFO | train_inner | epoch 003: 1208 / 3002 loss=2.636, ppl=6.22, wps=5865.6, ups=0.09, wpb=64837, bsz=128, num_updates=7166, lr=9.99507e-05, gnorm=2.421, loss_scale=4, train_wall=11, gb_free=2.8, wall=82250 2021-06-19 17:29:46 | INFO | train_inner | epoch 003: 1209 / 3002 loss=2.593, ppl=6.03, wps=5916.1, ups=0.09, wpb=64877, bsz=128, num_updates=7167, lr=9.99507e-05, gnorm=2.216, loss_scale=4, train_wall=10, gb_free=2.8, wall=82261 2021-06-19 17:29:57 | INFO | train_inner | epoch 003: 1210 / 3002 loss=2.555, ppl=5.88, wps=5902.7, ups=0.09, wpb=64947, bsz=128, num_updates=7168, lr=9.99507e-05, gnorm=2.131, loss_scale=4, train_wall=11, gb_free=2.8, wall=82272 2021-06-19 17:30:08 | INFO | train_inner | epoch 003: 1211 / 3002 loss=2.603, ppl=6.07, wps=5888.7, ups=0.09, wpb=64872, bsz=128, num_updates=7169, lr=9.99506e-05, gnorm=2.427, loss_scale=4, train_wall=11, gb_free=2.8, wall=82283 2021-06-19 17:30:19 | INFO | train_inner | epoch 003: 1212 / 3002 loss=2.582, ppl=5.99, wps=6015.6, ups=0.09, wpb=64938, bsz=128, num_updates=7170, lr=9.99506e-05, gnorm=2.103, loss_scale=4, train_wall=10, gb_free=2.8, wall=82294 2021-06-19 17:30:30 | INFO | train_inner | epoch 003: 1213 / 3002 loss=2.736, ppl=6.66, wps=5919.4, ups=0.09, wpb=64758, bsz=128, num_updates=7171, lr=9.99506e-05, gnorm=2.327, loss_scale=4, train_wall=10, gb_free=2.8, wall=82304 2021-06-19 17:30:41 | INFO | train_inner | epoch 003: 1214 / 3002 loss=2.606, ppl=6.09, wps=5765.2, ups=0.09, wpb=64828, bsz=128, num_updates=7172, lr=9.99506e-05, gnorm=2.184, loss_scale=4, train_wall=11, gb_free=2.8, wall=82316 2021-06-19 17:30:52 | INFO | train_inner | epoch 003: 1215 / 3002 loss=2.684, ppl=6.43, wps=5898.8, ups=0.09, wpb=64818, bsz=128, num_updates=7173, lr=9.99506e-05, gnorm=3.331, loss_scale=4, train_wall=11, gb_free=2.8, wall=82327 2021-06-19 17:31:03 | INFO | train_inner | epoch 003: 1216 / 3002 loss=2.643, ppl=6.24, wps=5810.7, ups=0.09, wpb=64787, bsz=128, num_updates=7174, lr=9.99506e-05, gnorm=2.352, loss_scale=4, train_wall=11, gb_free=2.8, wall=82338 2021-06-19 17:31:15 | INFO | train_inner | epoch 003: 1217 / 3002 loss=2.579, ppl=5.97, wps=5754.8, ups=0.09, wpb=64807, bsz=128, num_updates=7175, lr=9.99506e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=82349 2021-06-19 17:31:26 | INFO | train_inner | epoch 003: 1218 / 3002 loss=2.607, ppl=6.09, wps=5886, ups=0.09, wpb=64864, bsz=128, num_updates=7176, lr=9.99506e-05, gnorm=2.176, loss_scale=4, train_wall=11, gb_free=2.8, wall=82360 2021-06-19 17:31:37 | INFO | train_inner | epoch 003: 1219 / 3002 loss=2.717, ppl=6.58, wps=5780.3, ups=0.09, wpb=64758, bsz=128, num_updates=7177, lr=9.99506e-05, gnorm=2.228, loss_scale=4, train_wall=11, gb_free=2.8, wall=82371 2021-06-19 17:31:48 | INFO | train_inner | epoch 003: 1220 / 3002 loss=2.775, ppl=6.85, wps=5765.9, ups=0.09, wpb=64915, bsz=128, num_updates=7178, lr=9.99506e-05, gnorm=2.454, loss_scale=4, train_wall=11, gb_free=2.8, wall=82383 2021-06-19 17:31:59 | INFO | train_inner | epoch 003: 1221 / 3002 loss=2.655, ppl=6.3, wps=5939.1, ups=0.09, wpb=64891, bsz=128, num_updates=7179, lr=9.99506e-05, gnorm=2.152, loss_scale=4, train_wall=10, gb_free=2.8, wall=82394 2021-06-19 17:32:10 | INFO | train_inner | epoch 003: 1222 / 3002 loss=2.639, ppl=6.23, wps=5849.9, ups=0.09, wpb=64817, bsz=128, num_updates=7180, lr=9.99506e-05, gnorm=2.128, loss_scale=4, train_wall=11, gb_free=2.8, wall=82405 2021-06-19 17:32:21 | INFO | train_inner | epoch 003: 1223 / 3002 loss=2.7, ppl=6.5, wps=5893.5, ups=0.09, wpb=64829, bsz=128, num_updates=7181, lr=9.99505e-05, gnorm=10.323, loss_scale=4, train_wall=11, gb_free=2.8, wall=82416 2021-06-19 17:32:32 | INFO | train_inner | epoch 003: 1224 / 3002 loss=2.696, ppl=6.48, wps=5935.2, ups=0.09, wpb=64835, bsz=128, num_updates=7182, lr=9.99505e-05, gnorm=2.207, loss_scale=4, train_wall=10, gb_free=2.8, wall=82427 2021-06-19 17:32:43 | INFO | train_inner | epoch 003: 1225 / 3002 loss=2.712, ppl=6.55, wps=5853, ups=0.09, wpb=64793, bsz=128, num_updates=7183, lr=9.99505e-05, gnorm=2.135, loss_scale=4, train_wall=11, gb_free=2.8, wall=82438 2021-06-19 17:32:54 | INFO | train_inner | epoch 003: 1226 / 3002 loss=2.616, ppl=6.13, wps=5837.8, ups=0.09, wpb=64857, bsz=128, num_updates=7184, lr=9.99505e-05, gnorm=2.291, loss_scale=4, train_wall=11, gb_free=2.8, wall=82449 2021-06-19 17:33:06 | INFO | train_inner | epoch 003: 1227 / 3002 loss=2.576, ppl=5.96, wps=5775.4, ups=0.09, wpb=64897, bsz=128, num_updates=7185, lr=9.99505e-05, gnorm=2.139, loss_scale=4, train_wall=11, gb_free=2.8, wall=82460 2021-06-19 17:33:17 | INFO | train_inner | epoch 003: 1228 / 3002 loss=2.721, ppl=6.59, wps=5917.8, ups=0.09, wpb=64853, bsz=128, num_updates=7186, lr=9.99505e-05, gnorm=2.599, loss_scale=4, train_wall=10, gb_free=2.8, wall=82471 2021-06-19 17:33:28 | INFO | train_inner | epoch 003: 1229 / 3002 loss=2.492, ppl=5.63, wps=5860.1, ups=0.09, wpb=64760, bsz=128, num_updates=7187, lr=9.99505e-05, gnorm=2.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=82482 2021-06-19 17:33:39 | INFO | train_inner | epoch 003: 1230 / 3002 loss=2.677, ppl=6.4, wps=5745.9, ups=0.09, wpb=64820, bsz=128, num_updates=7188, lr=9.99505e-05, gnorm=5.694, loss_scale=4, train_wall=11, gb_free=2.8, wall=82493 2021-06-19 17:33:50 | INFO | train_inner | epoch 003: 1231 / 3002 loss=2.657, ppl=6.31, wps=5727.8, ups=0.09, wpb=64740, bsz=128, num_updates=7189, lr=9.99505e-05, gnorm=2.637, loss_scale=4, train_wall=11, gb_free=2.8, wall=82505 2021-06-19 17:34:01 | INFO | train_inner | epoch 003: 1232 / 3002 loss=2.791, ppl=6.92, wps=5866.1, ups=0.09, wpb=64912, bsz=128, num_updates=7190, lr=9.99505e-05, gnorm=2.256, loss_scale=4, train_wall=11, gb_free=2.8, wall=82516 2021-06-19 17:34:12 | INFO | train_inner | epoch 003: 1233 / 3002 loss=2.704, ppl=6.52, wps=5768.7, ups=0.09, wpb=64850, bsz=128, num_updates=7191, lr=9.99505e-05, gnorm=2.338, loss_scale=4, train_wall=11, gb_free=2.8, wall=82527 2021-06-19 17:34:24 | INFO | train_inner | epoch 003: 1234 / 3002 loss=2.69, ppl=6.45, wps=5858.6, ups=0.09, wpb=64856, bsz=128, num_updates=7192, lr=9.99505e-05, gnorm=2.124, loss_scale=4, train_wall=11, gb_free=2.8, wall=82538 2021-06-19 17:34:35 | INFO | train_inner | epoch 003: 1235 / 3002 loss=2.68, ppl=6.41, wps=5801, ups=0.09, wpb=64800, bsz=128, num_updates=7193, lr=9.99505e-05, gnorm=2.11, loss_scale=4, train_wall=11, gb_free=2.8, wall=82549 2021-06-19 17:34:46 | INFO | train_inner | epoch 003: 1236 / 3002 loss=2.524, ppl=5.75, wps=5882.1, ups=0.09, wpb=64822, bsz=128, num_updates=7194, lr=9.99504e-05, gnorm=3.36, loss_scale=4, train_wall=11, gb_free=2.8, wall=82560 2021-06-19 17:34:57 | INFO | train_inner | epoch 003: 1237 / 3002 loss=2.473, ppl=5.55, wps=5904.9, ups=0.09, wpb=64824, bsz=128, num_updates=7195, lr=9.99504e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=82571 2021-06-19 17:35:08 | INFO | train_inner | epoch 003: 1238 / 3002 loss=2.696, ppl=6.48, wps=5793.2, ups=0.09, wpb=64832, bsz=128, num_updates=7196, lr=9.99504e-05, gnorm=2.155, loss_scale=4, train_wall=11, gb_free=2.8, wall=82582 2021-06-19 17:35:19 | INFO | train_inner | epoch 003: 1239 / 3002 loss=2.547, ppl=5.84, wps=5833.1, ups=0.09, wpb=64821, bsz=128, num_updates=7197, lr=9.99504e-05, gnorm=2.132, loss_scale=4, train_wall=11, gb_free=2.8, wall=82593 2021-06-19 17:35:30 | INFO | train_inner | epoch 003: 1240 / 3002 loss=2.587, ppl=6.01, wps=5780.1, ups=0.09, wpb=64740, bsz=128, num_updates=7198, lr=9.99504e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=82605 2021-06-19 17:35:41 | INFO | train_inner | epoch 003: 1241 / 3002 loss=2.762, ppl=6.78, wps=5890.5, ups=0.09, wpb=64863, bsz=128, num_updates=7199, lr=9.99504e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=82616 2021-06-19 17:35:52 | INFO | train_inner | epoch 003: 1242 / 3002 loss=2.761, ppl=6.78, wps=5777, ups=0.09, wpb=64734, bsz=128, num_updates=7200, lr=9.99504e-05, gnorm=2.229, loss_scale=4, train_wall=11, gb_free=2.8, wall=82627 2021-06-19 17:36:04 | INFO | train_inner | epoch 003: 1243 / 3002 loss=2.674, ppl=6.38, wps=5793.4, ups=0.09, wpb=64782, bsz=128, num_updates=7201, lr=9.99504e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=82638 2021-06-19 17:36:15 | INFO | train_inner | epoch 003: 1244 / 3002 loss=2.771, ppl=6.82, wps=5724.6, ups=0.09, wpb=64806, bsz=128, num_updates=7202, lr=9.99504e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=82649 2021-06-19 17:36:26 | INFO | train_inner | epoch 003: 1245 / 3002 loss=2.618, ppl=6.14, wps=5972.5, ups=0.09, wpb=64735, bsz=128, num_updates=7203, lr=9.99504e-05, gnorm=2.15, loss_scale=4, train_wall=10, gb_free=2.8, wall=82660 2021-06-19 17:36:37 | INFO | train_inner | epoch 003: 1246 / 3002 loss=2.78, ppl=6.87, wps=5832.6, ups=0.09, wpb=64845, bsz=128, num_updates=7204, lr=9.99504e-05, gnorm=2.854, loss_scale=4, train_wall=11, gb_free=2.8, wall=82671 2021-06-19 17:36:48 | INFO | train_inner | epoch 003: 1247 / 3002 loss=2.698, ppl=6.49, wps=5993.6, ups=0.09, wpb=64797, bsz=128, num_updates=7205, lr=9.99504e-05, gnorm=2.432, loss_scale=4, train_wall=10, gb_free=2.8, wall=82682 2021-06-19 17:36:59 | INFO | train_inner | epoch 003: 1248 / 3002 loss=2.603, ppl=6.08, wps=5917.6, ups=0.09, wpb=64886, bsz=128, num_updates=7206, lr=9.99503e-05, gnorm=2.205, loss_scale=4, train_wall=11, gb_free=2.8, wall=82693 2021-06-19 17:37:10 | INFO | train_inner | epoch 003: 1249 / 3002 loss=2.775, ppl=6.85, wps=5783.6, ups=0.09, wpb=64865, bsz=128, num_updates=7207, lr=9.99503e-05, gnorm=2.163, loss_scale=4, train_wall=11, gb_free=2.8, wall=82704 2021-06-19 17:37:21 | INFO | train_inner | epoch 003: 1250 / 3002 loss=2.683, ppl=6.42, wps=5822.4, ups=0.09, wpb=64854, bsz=128, num_updates=7208, lr=9.99503e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=82715 2021-06-19 17:37:32 | INFO | train_inner | epoch 003: 1251 / 3002 loss=2.599, ppl=6.06, wps=5793.7, ups=0.09, wpb=64839, bsz=128, num_updates=7209, lr=9.99503e-05, gnorm=2.267, loss_scale=4, train_wall=11, gb_free=2.8, wall=82727 2021-06-19 17:37:43 | INFO | train_inner | epoch 003: 1252 / 3002 loss=2.517, ppl=5.73, wps=5818.5, ups=0.09, wpb=64814, bsz=128, num_updates=7210, lr=9.99503e-05, gnorm=2.772, loss_scale=4, train_wall=11, gb_free=2.8, wall=82738 2021-06-19 17:37:55 | INFO | train_inner | epoch 003: 1253 / 3002 loss=2.705, ppl=6.52, wps=5812.3, ups=0.09, wpb=64832, bsz=128, num_updates=7211, lr=9.99503e-05, gnorm=2.669, loss_scale=4, train_wall=11, gb_free=2.8, wall=82749 2021-06-19 17:38:06 | INFO | train_inner | epoch 003: 1254 / 3002 loss=2.545, ppl=5.84, wps=5867.4, ups=0.09, wpb=64856, bsz=128, num_updates=7212, lr=9.99503e-05, gnorm=2.5, loss_scale=4, train_wall=11, gb_free=2.8, wall=82760 2021-06-19 17:38:17 | INFO | train_inner | epoch 003: 1255 / 3002 loss=2.621, ppl=6.15, wps=5811, ups=0.09, wpb=64816, bsz=128, num_updates=7213, lr=9.99503e-05, gnorm=2.239, loss_scale=4, train_wall=11, gb_free=2.8, wall=82771 2021-06-19 17:38:28 | INFO | train_inner | epoch 003: 1256 / 3002 loss=2.609, ppl=6.1, wps=5859.8, ups=0.09, wpb=64756, bsz=128, num_updates=7214, lr=9.99503e-05, gnorm=2.166, loss_scale=4, train_wall=11, gb_free=2.8, wall=82782 2021-06-19 17:38:39 | INFO | train_inner | epoch 003: 1257 / 3002 loss=2.638, ppl=6.22, wps=5797.4, ups=0.09, wpb=64783, bsz=128, num_updates=7215, lr=9.99503e-05, gnorm=2.115, loss_scale=4, train_wall=11, gb_free=2.8, wall=82793 2021-06-19 17:38:50 | INFO | train_inner | epoch 003: 1258 / 3002 loss=2.669, ppl=6.36, wps=5822.7, ups=0.09, wpb=64862, bsz=128, num_updates=7216, lr=9.99503e-05, gnorm=2.164, loss_scale=4, train_wall=11, gb_free=2.8, wall=82804 2021-06-19 17:39:01 | INFO | train_inner | epoch 003: 1259 / 3002 loss=2.667, ppl=6.35, wps=5853.4, ups=0.09, wpb=64783, bsz=128, num_updates=7217, lr=9.99503e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=82816 2021-06-19 17:39:12 | INFO | train_inner | epoch 003: 1260 / 3002 loss=2.745, ppl=6.7, wps=5937.6, ups=0.09, wpb=64869, bsz=128, num_updates=7218, lr=9.99503e-05, gnorm=2.232, loss_scale=4, train_wall=10, gb_free=2.8, wall=82826 2021-06-19 17:39:23 | INFO | train_inner | epoch 003: 1261 / 3002 loss=2.619, ppl=6.14, wps=5840.8, ups=0.09, wpb=64900, bsz=128, num_updates=7219, lr=9.99502e-05, gnorm=2.133, loss_scale=4, train_wall=11, gb_free=2.8, wall=82838 2021-06-19 17:39:34 | INFO | train_inner | epoch 003: 1262 / 3002 loss=2.627, ppl=6.18, wps=5999.1, ups=0.09, wpb=64832, bsz=128, num_updates=7220, lr=9.99502e-05, gnorm=2.258, loss_scale=4, train_wall=10, gb_free=2.8, wall=82848 2021-06-19 17:39:45 | INFO | train_inner | epoch 003: 1263 / 3002 loss=2.705, ppl=6.52, wps=5775.5, ups=0.09, wpb=64827, bsz=128, num_updates=7221, lr=9.99502e-05, gnorm=2.374, loss_scale=4, train_wall=11, gb_free=2.8, wall=82860 2021-06-19 17:39:56 | INFO | train_inner | epoch 003: 1264 / 3002 loss=2.624, ppl=6.17, wps=5835.3, ups=0.09, wpb=64866, bsz=128, num_updates=7222, lr=9.99502e-05, gnorm=2.289, loss_scale=4, train_wall=11, gb_free=2.8, wall=82871 2021-06-19 17:40:07 | INFO | train_inner | epoch 003: 1265 / 3002 loss=2.709, ppl=6.54, wps=5830.4, ups=0.09, wpb=64875, bsz=128, num_updates=7223, lr=9.99502e-05, gnorm=2.344, loss_scale=4, train_wall=11, gb_free=2.8, wall=82882 2021-06-19 17:40:19 | INFO | train_inner | epoch 003: 1266 / 3002 loss=2.823, ppl=7.08, wps=5784.7, ups=0.09, wpb=64783, bsz=128, num_updates=7224, lr=9.99502e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=82893 2021-06-19 17:40:30 | INFO | train_inner | epoch 003: 1267 / 3002 loss=2.69, ppl=6.45, wps=5843.5, ups=0.09, wpb=64846, bsz=128, num_updates=7225, lr=9.99502e-05, gnorm=2.439, loss_scale=4, train_wall=11, gb_free=2.8, wall=82904 2021-06-19 17:40:41 | INFO | train_inner | epoch 003: 1268 / 3002 loss=2.541, ppl=5.82, wps=5817.3, ups=0.09, wpb=64769, bsz=128, num_updates=7226, lr=9.99502e-05, gnorm=2.177, loss_scale=4, train_wall=11, gb_free=2.8, wall=82915 2021-06-19 17:40:52 | INFO | train_inner | epoch 003: 1269 / 3002 loss=2.804, ppl=6.99, wps=5851.3, ups=0.09, wpb=64838, bsz=128, num_updates=7227, lr=9.99502e-05, gnorm=2.177, loss_scale=4, train_wall=11, gb_free=2.8, wall=82926 2021-06-19 17:41:03 | INFO | train_inner | epoch 003: 1270 / 3002 loss=2.735, ppl=6.66, wps=5732.8, ups=0.09, wpb=64781, bsz=128, num_updates=7228, lr=9.99502e-05, gnorm=2.312, loss_scale=4, train_wall=11, gb_free=2.8, wall=82938 2021-06-19 17:41:14 | INFO | train_inner | epoch 003: 1271 / 3002 loss=2.721, ppl=6.59, wps=5814.8, ups=0.09, wpb=64803, bsz=128, num_updates=7229, lr=9.99502e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=82949 2021-06-19 17:41:26 | INFO | train_inner | epoch 003: 1272 / 3002 loss=2.769, ppl=6.82, wps=5849.6, ups=0.09, wpb=64786, bsz=128, num_updates=7230, lr=9.99502e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=82960 2021-06-19 17:41:36 | INFO | train_inner | epoch 003: 1273 / 3002 loss=2.601, ppl=6.07, wps=5964.9, ups=0.09, wpb=64782, bsz=128, num_updates=7231, lr=9.99501e-05, gnorm=2.446, loss_scale=4, train_wall=10, gb_free=2.8, wall=82971 2021-06-19 17:41:48 | INFO | train_inner | epoch 003: 1274 / 3002 loss=2.722, ppl=6.6, wps=5780.3, ups=0.09, wpb=64809, bsz=128, num_updates=7232, lr=9.99501e-05, gnorm=2.269, loss_scale=4, train_wall=11, gb_free=2.8, wall=82982 2021-06-19 17:41:59 | INFO | train_inner | epoch 003: 1275 / 3002 loss=2.688, ppl=6.44, wps=5858, ups=0.09, wpb=64794, bsz=128, num_updates=7233, lr=9.99501e-05, gnorm=2.184, loss_scale=4, train_wall=11, gb_free=2.8, wall=82993 2021-06-19 17:42:10 | INFO | train_inner | epoch 003: 1276 / 3002 loss=2.552, ppl=5.86, wps=5838.3, ups=0.09, wpb=64821, bsz=128, num_updates=7234, lr=9.99501e-05, gnorm=2.153, loss_scale=4, train_wall=11, gb_free=2.8, wall=83004 2021-06-19 17:42:21 | INFO | train_inner | epoch 003: 1277 / 3002 loss=2.635, ppl=6.21, wps=5929, ups=0.09, wpb=64860, bsz=128, num_updates=7235, lr=9.99501e-05, gnorm=2.235, loss_scale=4, train_wall=10, gb_free=2.8, wall=83015 2021-06-19 17:42:32 | INFO | train_inner | epoch 003: 1278 / 3002 loss=2.619, ppl=6.14, wps=5919.2, ups=0.09, wpb=64780, bsz=128, num_updates=7236, lr=9.99501e-05, gnorm=2.174, loss_scale=4, train_wall=10, gb_free=2.8, wall=83026 2021-06-19 17:42:43 | INFO | train_inner | epoch 003: 1279 / 3002 loss=2.725, ppl=6.61, wps=5889.7, ups=0.09, wpb=64865, bsz=128, num_updates=7237, lr=9.99501e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=83037 2021-06-19 17:42:54 | INFO | train_inner | epoch 003: 1280 / 3002 loss=2.74, ppl=6.68, wps=5732.2, ups=0.09, wpb=64820, bsz=128, num_updates=7238, lr=9.99501e-05, gnorm=2.09, loss_scale=4, train_wall=11, gb_free=2.8, wall=83048 2021-06-19 17:43:05 | INFO | train_inner | epoch 003: 1281 / 3002 loss=2.589, ppl=6.02, wps=5710.4, ups=0.09, wpb=64827, bsz=128, num_updates=7239, lr=9.99501e-05, gnorm=2.16, loss_scale=4, train_wall=11, gb_free=2.8, wall=83060 2021-06-19 17:43:16 | INFO | train_inner | epoch 003: 1282 / 3002 loss=2.536, ppl=5.8, wps=5884.3, ups=0.09, wpb=64788, bsz=128, num_updates=7240, lr=9.99501e-05, gnorm=4.549, loss_scale=4, train_wall=11, gb_free=2.8, wall=83071 2021-06-19 17:43:28 | INFO | train_inner | epoch 003: 1283 / 3002 loss=2.669, ppl=6.36, wps=5745.3, ups=0.09, wpb=64801, bsz=128, num_updates=7241, lr=9.99501e-05, gnorm=2.144, loss_scale=4, train_wall=11, gb_free=2.8, wall=83082 2021-06-19 17:43:39 | INFO | train_inner | epoch 003: 1284 / 3002 loss=2.818, ppl=7.05, wps=5926.6, ups=0.09, wpb=64792, bsz=128, num_updates=7242, lr=9.99501e-05, gnorm=2.288, loss_scale=4, train_wall=10, gb_free=2.8, wall=83093 2021-06-19 17:43:50 | INFO | train_inner | epoch 003: 1285 / 3002 loss=2.844, ppl=7.18, wps=5843.8, ups=0.09, wpb=64817, bsz=128, num_updates=7243, lr=9.99501e-05, gnorm=2.302, loss_scale=4, train_wall=11, gb_free=2.8, wall=83104 2021-06-19 17:44:01 | INFO | train_inner | epoch 003: 1286 / 3002 loss=2.582, ppl=5.99, wps=5875.3, ups=0.09, wpb=64825, bsz=128, num_updates=7244, lr=9.995e-05, gnorm=2.718, loss_scale=4, train_wall=11, gb_free=2.8, wall=83115 2021-06-19 17:44:12 | INFO | train_inner | epoch 003: 1287 / 3002 loss=2.833, ppl=7.13, wps=5853.8, ups=0.09, wpb=64805, bsz=128, num_updates=7245, lr=9.995e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=83126 2021-06-19 17:44:23 | INFO | train_inner | epoch 003: 1288 / 3002 loss=2.706, ppl=6.52, wps=5770.4, ups=0.09, wpb=64758, bsz=128, num_updates=7246, lr=9.995e-05, gnorm=2.2, loss_scale=4, train_wall=11, gb_free=2.8, wall=83137 2021-06-19 17:44:34 | INFO | train_inner | epoch 003: 1289 / 3002 loss=2.59, ppl=6.02, wps=5766.9, ups=0.09, wpb=64835, bsz=128, num_updates=7247, lr=9.995e-05, gnorm=2.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=83149 2021-06-19 17:44:45 | INFO | train_inner | epoch 003: 1290 / 3002 loss=2.655, ppl=6.3, wps=5850.9, ups=0.09, wpb=64794, bsz=128, num_updates=7248, lr=9.995e-05, gnorm=2.1, loss_scale=4, train_wall=11, gb_free=2.8, wall=83160 2021-06-19 17:44:56 | INFO | train_inner | epoch 003: 1291 / 3002 loss=2.645, ppl=6.25, wps=5922.1, ups=0.09, wpb=64836, bsz=128, num_updates=7249, lr=9.995e-05, gnorm=2.158, loss_scale=4, train_wall=10, gb_free=2.8, wall=83171 2021-06-19 17:45:07 | INFO | train_inner | epoch 003: 1292 / 3002 loss=2.598, ppl=6.06, wps=5831.1, ups=0.09, wpb=64820, bsz=128, num_updates=7250, lr=9.995e-05, gnorm=3.574, loss_scale=4, train_wall=11, gb_free=2.8, wall=83182 2021-06-19 17:45:18 | INFO | train_inner | epoch 003: 1293 / 3002 loss=2.578, ppl=5.97, wps=5961.3, ups=0.09, wpb=64828, bsz=128, num_updates=7251, lr=9.995e-05, gnorm=2.248, loss_scale=4, train_wall=10, gb_free=2.8, wall=83193 2021-06-19 17:45:29 | INFO | train_inner | epoch 003: 1294 / 3002 loss=2.655, ppl=6.3, wps=5877.8, ups=0.09, wpb=64820, bsz=128, num_updates=7252, lr=9.995e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=83204 2021-06-19 17:45:40 | INFO | train_inner | epoch 003: 1295 / 3002 loss=2.695, ppl=6.47, wps=5815.5, ups=0.09, wpb=64690, bsz=128, num_updates=7253, lr=9.995e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=83215 2021-06-19 17:45:52 | INFO | train_inner | epoch 003: 1296 / 3002 loss=2.738, ppl=6.67, wps=5777.5, ups=0.09, wpb=64702, bsz=128, num_updates=7254, lr=9.995e-05, gnorm=2.28, loss_scale=4, train_wall=11, gb_free=2.8, wall=83226 2021-06-19 17:46:03 | INFO | train_inner | epoch 003: 1297 / 3002 loss=2.598, ppl=6.05, wps=5838.7, ups=0.09, wpb=64766, bsz=128, num_updates=7255, lr=9.995e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=83237 2021-06-19 17:46:14 | INFO | train_inner | epoch 003: 1298 / 3002 loss=2.524, ppl=5.75, wps=5870, ups=0.09, wpb=64934, bsz=128, num_updates=7256, lr=9.99499e-05, gnorm=2.268, loss_scale=4, train_wall=11, gb_free=2.8, wall=83248 2021-06-19 17:46:25 | INFO | train_inner | epoch 003: 1299 / 3002 loss=2.66, ppl=6.32, wps=5725.3, ups=0.09, wpb=64842, bsz=128, num_updates=7257, lr=9.99499e-05, gnorm=3.036, loss_scale=4, train_wall=11, gb_free=2.8, wall=83259 2021-06-19 17:46:36 | INFO | train_inner | epoch 003: 1300 / 3002 loss=2.648, ppl=6.27, wps=5807.4, ups=0.09, wpb=64811, bsz=128, num_updates=7258, lr=9.99499e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=83271 2021-06-19 17:46:47 | INFO | train_inner | epoch 003: 1301 / 3002 loss=2.658, ppl=6.31, wps=5742.4, ups=0.09, wpb=64771, bsz=128, num_updates=7259, lr=9.99499e-05, gnorm=7.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=83282 2021-06-19 17:46:59 | INFO | train_inner | epoch 003: 1302 / 3002 loss=2.611, ppl=6.11, wps=5865, ups=0.09, wpb=64831, bsz=128, num_updates=7260, lr=9.99499e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=83293 2021-06-19 17:47:10 | INFO | train_inner | epoch 003: 1303 / 3002 loss=2.627, ppl=6.18, wps=5789.7, ups=0.09, wpb=64823, bsz=128, num_updates=7261, lr=9.99499e-05, gnorm=2.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=83304 2021-06-19 17:47:21 | INFO | train_inner | epoch 003: 1304 / 3002 loss=2.579, ppl=5.98, wps=5937.7, ups=0.09, wpb=64917, bsz=128, num_updates=7262, lr=9.99499e-05, gnorm=2.201, loss_scale=4, train_wall=10, gb_free=2.8, wall=83315 2021-06-19 17:47:32 | INFO | train_inner | epoch 003: 1305 / 3002 loss=2.638, ppl=6.22, wps=5704.5, ups=0.09, wpb=64835, bsz=128, num_updates=7263, lr=9.99499e-05, gnorm=2.191, loss_scale=4, train_wall=11, gb_free=2.8, wall=83326 2021-06-19 17:47:43 | INFO | train_inner | epoch 003: 1306 / 3002 loss=2.566, ppl=5.92, wps=5684.2, ups=0.09, wpb=64784, bsz=128, num_updates=7264, lr=9.99499e-05, gnorm=5.61, loss_scale=4, train_wall=11, gb_free=2.8, wall=83338 2021-06-19 17:47:55 | INFO | train_inner | epoch 003: 1307 / 3002 loss=2.592, ppl=6.03, wps=5831.5, ups=0.09, wpb=64853, bsz=128, num_updates=7265, lr=9.99499e-05, gnorm=2.264, loss_scale=4, train_wall=11, gb_free=2.8, wall=83349 2021-06-19 17:48:06 | INFO | train_inner | epoch 003: 1308 / 3002 loss=2.789, ppl=6.91, wps=5813.1, ups=0.09, wpb=64778, bsz=128, num_updates=7266, lr=9.99499e-05, gnorm=2.364, loss_scale=4, train_wall=11, gb_free=2.8, wall=83360 2021-06-19 17:48:17 | INFO | train_inner | epoch 003: 1309 / 3002 loss=2.653, ppl=6.29, wps=5903.1, ups=0.09, wpb=64851, bsz=128, num_updates=7267, lr=9.99499e-05, gnorm=2.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=83371 2021-06-19 17:48:28 | INFO | train_inner | epoch 003: 1310 / 3002 loss=2.547, ppl=5.84, wps=5890.8, ups=0.09, wpb=64899, bsz=128, num_updates=7268, lr=9.99499e-05, gnorm=3.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=83382 2021-06-19 17:48:39 | INFO | train_inner | epoch 003: 1311 / 3002 loss=2.676, ppl=6.39, wps=5833, ups=0.09, wpb=64894, bsz=128, num_updates=7269, lr=9.99498e-05, gnorm=2.838, loss_scale=4, train_wall=11, gb_free=2.8, wall=83393 2021-06-19 17:48:50 | INFO | train_inner | epoch 003: 1312 / 3002 loss=2.653, ppl=6.29, wps=5848.9, ups=0.09, wpb=64823, bsz=128, num_updates=7270, lr=9.99498e-05, gnorm=2.557, loss_scale=4, train_wall=11, gb_free=2.8, wall=83404 2021-06-19 17:49:01 | INFO | train_inner | epoch 003: 1313 / 3002 loss=2.61, ppl=6.1, wps=5771.1, ups=0.09, wpb=64889, bsz=128, num_updates=7271, lr=9.99498e-05, gnorm=2.655, loss_scale=4, train_wall=11, gb_free=2.8, wall=83415 2021-06-19 17:49:12 | INFO | train_inner | epoch 003: 1314 / 3002 loss=2.563, ppl=5.91, wps=5777.4, ups=0.09, wpb=64812, bsz=128, num_updates=7272, lr=9.99498e-05, gnorm=2.34, loss_scale=4, train_wall=11, gb_free=2.8, wall=83427 2021-06-19 17:49:23 | INFO | train_inner | epoch 003: 1315 / 3002 loss=2.499, ppl=5.65, wps=5853.8, ups=0.09, wpb=64845, bsz=128, num_updates=7273, lr=9.99498e-05, gnorm=2.166, loss_scale=4, train_wall=11, gb_free=2.8, wall=83438 2021-06-19 17:49:35 | INFO | train_inner | epoch 003: 1316 / 3002 loss=2.664, ppl=6.34, wps=5798.4, ups=0.09, wpb=64782, bsz=128, num_updates=7274, lr=9.99498e-05, gnorm=2.359, loss_scale=4, train_wall=11, gb_free=2.8, wall=83449 2021-06-19 17:49:46 | INFO | train_inner | epoch 003: 1317 / 3002 loss=2.666, ppl=6.35, wps=5793, ups=0.09, wpb=64792, bsz=128, num_updates=7275, lr=9.99498e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=83460 2021-06-19 17:49:57 | INFO | train_inner | epoch 003: 1318 / 3002 loss=2.617, ppl=6.13, wps=5807, ups=0.09, wpb=64835, bsz=128, num_updates=7276, lr=9.99498e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=83471 2021-06-19 17:50:08 | INFO | train_inner | epoch 003: 1319 / 3002 loss=2.722, ppl=6.6, wps=5873.9, ups=0.09, wpb=64842, bsz=128, num_updates=7277, lr=9.99498e-05, gnorm=2.247, loss_scale=4, train_wall=11, gb_free=2.8, wall=83482 2021-06-19 17:50:19 | INFO | train_inner | epoch 003: 1320 / 3002 loss=2.528, ppl=5.77, wps=5778.4, ups=0.09, wpb=64879, bsz=128, num_updates=7278, lr=9.99498e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=83494 2021-06-19 17:50:30 | INFO | train_inner | epoch 003: 1321 / 3002 loss=2.755, ppl=6.75, wps=5850.3, ups=0.09, wpb=64782, bsz=128, num_updates=7279, lr=9.99498e-05, gnorm=5.651, loss_scale=4, train_wall=11, gb_free=2.8, wall=83505 2021-06-19 17:50:42 | INFO | train_inner | epoch 003: 1322 / 3002 loss=2.562, ppl=5.91, wps=5737.9, ups=0.09, wpb=64818, bsz=128, num_updates=7280, lr=9.99498e-05, gnorm=2.393, loss_scale=4, train_wall=11, gb_free=2.8, wall=83516 2021-06-19 17:50:53 | INFO | train_inner | epoch 003: 1323 / 3002 loss=2.539, ppl=5.81, wps=5899.2, ups=0.09, wpb=64797, bsz=128, num_updates=7281, lr=9.99497e-05, gnorm=2.496, loss_scale=4, train_wall=11, gb_free=2.8, wall=83527 2021-06-19 17:51:03 | INFO | train_inner | epoch 003: 1324 / 3002 loss=2.546, ppl=5.84, wps=6012.6, ups=0.09, wpb=64854, bsz=128, num_updates=7282, lr=9.99497e-05, gnorm=2.19, loss_scale=4, train_wall=10, gb_free=2.8, wall=83538 2021-06-19 17:51:15 | INFO | train_inner | epoch 003: 1325 / 3002 loss=2.688, ppl=6.44, wps=5767.5, ups=0.09, wpb=64698, bsz=128, num_updates=7283, lr=9.99497e-05, gnorm=6.349, loss_scale=4, train_wall=11, gb_free=2.8, wall=83549 2021-06-19 17:51:26 | INFO | train_inner | epoch 003: 1326 / 3002 loss=2.66, ppl=6.32, wps=5925.1, ups=0.09, wpb=64855, bsz=128, num_updates=7284, lr=9.99497e-05, gnorm=2.24, loss_scale=4, train_wall=10, gb_free=2.8, wall=83560 2021-06-19 17:51:37 | INFO | train_inner | epoch 003: 1327 / 3002 loss=2.785, ppl=6.89, wps=5822.1, ups=0.09, wpb=64780, bsz=128, num_updates=7285, lr=9.99497e-05, gnorm=7.818, loss_scale=4, train_wall=11, gb_free=2.8, wall=83571 2021-06-19 17:51:48 | INFO | train_inner | epoch 003: 1328 / 3002 loss=2.613, ppl=6.12, wps=5901.4, ups=0.09, wpb=64836, bsz=128, num_updates=7286, lr=9.99497e-05, gnorm=2.601, loss_scale=4, train_wall=11, gb_free=2.8, wall=83582 2021-06-19 17:51:59 | INFO | train_inner | epoch 003: 1329 / 3002 loss=2.731, ppl=6.64, wps=5832, ups=0.09, wpb=64771, bsz=128, num_updates=7287, lr=9.99497e-05, gnorm=2.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=83593 2021-06-19 17:52:10 | INFO | train_inner | epoch 003: 1330 / 3002 loss=2.602, ppl=6.07, wps=5923.6, ups=0.09, wpb=64877, bsz=128, num_updates=7288, lr=9.99497e-05, gnorm=2.339, loss_scale=4, train_wall=11, gb_free=2.8, wall=83604 2021-06-19 17:52:21 | INFO | train_inner | epoch 003: 1331 / 3002 loss=2.645, ppl=6.25, wps=5867.7, ups=0.09, wpb=64852, bsz=128, num_updates=7289, lr=9.99497e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=83615 2021-06-19 17:52:32 | INFO | train_inner | epoch 003: 1332 / 3002 loss=2.545, ppl=5.84, wps=5837.3, ups=0.09, wpb=64842, bsz=128, num_updates=7290, lr=9.99497e-05, gnorm=2.325, loss_scale=4, train_wall=11, gb_free=2.8, wall=83626 2021-06-19 17:52:43 | INFO | train_inner | epoch 003: 1333 / 3002 loss=2.64, ppl=6.23, wps=5967.4, ups=0.09, wpb=64872, bsz=128, num_updates=7291, lr=9.99497e-05, gnorm=2.306, loss_scale=8, train_wall=10, gb_free=2.8, wall=83637 2021-06-19 17:52:54 | INFO | train_inner | epoch 003: 1334 / 3002 loss=2.482, ppl=5.59, wps=5845.8, ups=0.09, wpb=64886, bsz=128, num_updates=7292, lr=9.99497e-05, gnorm=6.727, loss_scale=8, train_wall=11, gb_free=2.8, wall=83648 2021-06-19 17:53:05 | INFO | train_inner | epoch 003: 1335 / 3002 loss=2.716, ppl=6.57, wps=5776.5, ups=0.09, wpb=64829, bsz=128, num_updates=7293, lr=9.99497e-05, gnorm=2.221, loss_scale=8, train_wall=11, gb_free=2.8, wall=83659 2021-06-19 17:53:16 | INFO | train_inner | epoch 003: 1336 / 3002 loss=2.621, ppl=6.15, wps=5886.1, ups=0.09, wpb=64791, bsz=128, num_updates=7294, lr=9.99496e-05, gnorm=2.344, loss_scale=8, train_wall=11, gb_free=2.8, wall=83670 2021-06-19 17:53:27 | INFO | train_inner | epoch 003: 1337 / 3002 loss=2.661, ppl=6.32, wps=5837.7, ups=0.09, wpb=64821, bsz=128, num_updates=7295, lr=9.99496e-05, gnorm=2.233, loss_scale=8, train_wall=11, gb_free=2.8, wall=83682 2021-06-19 17:53:38 | INFO | train_inner | epoch 003: 1338 / 3002 loss=2.673, ppl=6.38, wps=5746, ups=0.09, wpb=64868, bsz=128, num_updates=7296, lr=9.99496e-05, gnorm=2.427, loss_scale=8, train_wall=11, gb_free=2.8, wall=83693 2021-06-19 17:53:50 | INFO | train_inner | epoch 003: 1339 / 3002 loss=2.64, ppl=6.23, wps=5784.4, ups=0.09, wpb=64752, bsz=128, num_updates=7297, lr=9.99496e-05, gnorm=2.559, loss_scale=8, train_wall=11, gb_free=2.8, wall=83704 2021-06-19 17:54:01 | INFO | train_inner | epoch 003: 1340 / 3002 loss=2.781, ppl=6.87, wps=5798.6, ups=0.09, wpb=64870, bsz=128, num_updates=7298, lr=9.99496e-05, gnorm=2.385, loss_scale=8, train_wall=11, gb_free=2.8, wall=83715 2021-06-19 17:54:12 | INFO | train_inner | epoch 003: 1341 / 3002 loss=2.676, ppl=6.39, wps=5873.6, ups=0.09, wpb=64912, bsz=128, num_updates=7299, lr=9.99496e-05, gnorm=3.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=83726 2021-06-19 17:54:23 | INFO | train_inner | epoch 003: 1342 / 3002 loss=2.607, ppl=6.09, wps=5847.7, ups=0.09, wpb=64831, bsz=128, num_updates=7300, lr=9.99496e-05, gnorm=15.518, loss_scale=8, train_wall=11, gb_free=2.8, wall=83737 2021-06-19 17:54:34 | INFO | train_inner | epoch 003: 1343 / 3002 loss=2.508, ppl=5.69, wps=5912, ups=0.09, wpb=64897, bsz=128, num_updates=7301, lr=9.99496e-05, gnorm=4.662, loss_scale=8, train_wall=11, gb_free=2.8, wall=83748 2021-06-19 17:54:45 | INFO | train_inner | epoch 003: 1344 / 3002 loss=2.77, ppl=6.82, wps=5824.9, ups=0.09, wpb=64828, bsz=128, num_updates=7302, lr=9.99496e-05, gnorm=2.33, loss_scale=8, train_wall=11, gb_free=2.8, wall=83759 2021-06-19 17:54:56 | INFO | train_inner | epoch 003: 1345 / 3002 loss=2.657, ppl=6.31, wps=5830.9, ups=0.09, wpb=64806, bsz=128, num_updates=7303, lr=9.99496e-05, gnorm=2.266, loss_scale=8, train_wall=11, gb_free=2.8, wall=83771 2021-06-19 17:55:07 | INFO | train_inner | epoch 003: 1346 / 3002 loss=2.721, ppl=6.59, wps=5874, ups=0.09, wpb=64805, bsz=128, num_updates=7304, lr=9.99496e-05, gnorm=2.359, loss_scale=8, train_wall=11, gb_free=2.8, wall=83782 2021-06-19 17:55:18 | INFO | train_inner | epoch 003: 1347 / 3002 loss=2.727, ppl=6.62, wps=5834.7, ups=0.09, wpb=64811, bsz=128, num_updates=7305, lr=9.99496e-05, gnorm=2.511, loss_scale=8, train_wall=11, gb_free=2.8, wall=83793 2021-06-19 17:55:29 | INFO | train_inner | epoch 003: 1348 / 3002 loss=2.843, ppl=7.18, wps=5889.7, ups=0.09, wpb=64859, bsz=128, num_updates=7306, lr=9.99495e-05, gnorm=2.21, loss_scale=8, train_wall=11, gb_free=2.8, wall=83804 2021-06-19 17:55:40 | INFO | train_inner | epoch 003: 1349 / 3002 loss=2.494, ppl=5.63, wps=5981.5, ups=0.09, wpb=64831, bsz=128, num_updates=7307, lr=9.99495e-05, gnorm=2.449, loss_scale=8, train_wall=10, gb_free=2.8, wall=83815 2021-06-19 17:55:51 | INFO | train_inner | epoch 003: 1350 / 3002 loss=2.86, ppl=7.26, wps=5761.8, ups=0.09, wpb=64659, bsz=128, num_updates=7308, lr=9.99495e-05, gnorm=2.392, loss_scale=8, train_wall=11, gb_free=2.8, wall=83826 2021-06-19 17:56:02 | INFO | train_inner | epoch 003: 1351 / 3002 loss=2.721, ppl=6.59, wps=5896.4, ups=0.09, wpb=64729, bsz=128, num_updates=7309, lr=9.99495e-05, gnorm=3.01, loss_scale=8, train_wall=10, gb_free=2.8, wall=83837 2021-06-19 17:56:14 | INFO | train_inner | epoch 003: 1352 / 3002 loss=2.711, ppl=6.55, wps=5837.8, ups=0.09, wpb=64808, bsz=128, num_updates=7310, lr=9.99495e-05, gnorm=2.28, loss_scale=8, train_wall=11, gb_free=2.8, wall=83848 2021-06-19 17:56:25 | INFO | train_inner | epoch 003: 1353 / 3002 loss=2.691, ppl=6.46, wps=5828.5, ups=0.09, wpb=64831, bsz=128, num_updates=7311, lr=9.99495e-05, gnorm=2.268, loss_scale=8, train_wall=11, gb_free=2.8, wall=83859 2021-06-19 17:56:36 | INFO | train_inner | epoch 003: 1354 / 3002 loss=2.958, ppl=7.77, wps=5868.4, ups=0.09, wpb=64846, bsz=128, num_updates=7312, lr=9.99495e-05, gnorm=2.231, loss_scale=8, train_wall=11, gb_free=2.8, wall=83870 2021-06-19 17:56:47 | INFO | train_inner | epoch 003: 1355 / 3002 loss=2.771, ppl=6.83, wps=5919.9, ups=0.09, wpb=64900, bsz=128, num_updates=7313, lr=9.99495e-05, gnorm=2.256, loss_scale=8, train_wall=10, gb_free=2.8, wall=83881 2021-06-19 17:56:58 | INFO | train_inner | epoch 003: 1356 / 3002 loss=2.779, ppl=6.86, wps=5674.9, ups=0.09, wpb=64792, bsz=128, num_updates=7314, lr=9.99495e-05, gnorm=3.736, loss_scale=8, train_wall=11, gb_free=2.8, wall=83892 2021-06-19 17:57:09 | INFO | train_inner | epoch 003: 1357 / 3002 loss=2.751, ppl=6.73, wps=5833.6, ups=0.09, wpb=64901, bsz=128, num_updates=7315, lr=9.99495e-05, gnorm=2.636, loss_scale=8, train_wall=11, gb_free=2.8, wall=83904 2021-06-19 17:57:20 | INFO | train_inner | epoch 003: 1358 / 3002 loss=2.639, ppl=6.23, wps=5849.2, ups=0.09, wpb=64836, bsz=128, num_updates=7316, lr=9.99495e-05, gnorm=2.346, loss_scale=8, train_wall=11, gb_free=2.8, wall=83915 2021-06-19 17:57:31 | INFO | train_inner | epoch 003: 1359 / 3002 loss=2.658, ppl=6.31, wps=5833.6, ups=0.09, wpb=64875, bsz=128, num_updates=7317, lr=9.99495e-05, gnorm=2.137, loss_scale=8, train_wall=11, gb_free=2.8, wall=83926 2021-06-19 17:57:42 | INFO | train_inner | epoch 003: 1360 / 3002 loss=2.866, ppl=7.29, wps=5957.8, ups=0.09, wpb=64843, bsz=128, num_updates=7318, lr=9.99495e-05, gnorm=2.296, loss_scale=8, train_wall=10, gb_free=2.8, wall=83937 2021-06-19 17:57:53 | INFO | train_inner | epoch 003: 1361 / 3002 loss=2.745, ppl=6.71, wps=6002.2, ups=0.09, wpb=64809, bsz=128, num_updates=7319, lr=9.99494e-05, gnorm=3.244, loss_scale=8, train_wall=10, gb_free=2.8, wall=83947 2021-06-19 17:58:04 | INFO | train_inner | epoch 003: 1362 / 3002 loss=2.772, ppl=6.83, wps=5712.3, ups=0.09, wpb=64879, bsz=128, num_updates=7320, lr=9.99494e-05, gnorm=2.399, loss_scale=8, train_wall=11, gb_free=2.8, wall=83959 2021-06-19 17:58:16 | INFO | train_inner | epoch 003: 1363 / 3002 loss=2.433, ppl=5.4, wps=5835.4, ups=0.09, wpb=64872, bsz=128, num_updates=7321, lr=9.99494e-05, gnorm=2.253, loss_scale=8, train_wall=11, gb_free=2.8, wall=83970 2021-06-19 17:58:27 | INFO | train_inner | epoch 003: 1364 / 3002 loss=2.658, ppl=6.31, wps=5910.9, ups=0.09, wpb=64748, bsz=128, num_updates=7322, lr=9.99494e-05, gnorm=2.187, loss_scale=8, train_wall=10, gb_free=2.8, wall=83981 2021-06-19 17:58:38 | INFO | train_inner | epoch 003: 1365 / 3002 loss=2.776, ppl=6.85, wps=5856.6, ups=0.09, wpb=64888, bsz=128, num_updates=7323, lr=9.99494e-05, gnorm=2.79, loss_scale=8, train_wall=11, gb_free=2.8, wall=83992 2021-06-19 17:58:49 | INFO | train_inner | epoch 003: 1366 / 3002 loss=2.746, ppl=6.71, wps=5897.7, ups=0.09, wpb=64813, bsz=128, num_updates=7324, lr=9.99494e-05, gnorm=2.237, loss_scale=8, train_wall=11, gb_free=2.8, wall=84003 2021-06-19 17:59:00 | INFO | train_inner | epoch 003: 1367 / 3002 loss=2.778, ppl=6.86, wps=5804.9, ups=0.09, wpb=64754, bsz=128, num_updates=7325, lr=9.99494e-05, gnorm=2.288, loss_scale=8, train_wall=11, gb_free=2.8, wall=84014 2021-06-19 17:59:11 | INFO | train_inner | epoch 003: 1368 / 3002 loss=2.585, ppl=6, wps=5844.6, ups=0.09, wpb=64787, bsz=128, num_updates=7326, lr=9.99494e-05, gnorm=2.239, loss_scale=8, train_wall=11, gb_free=2.8, wall=84025 2021-06-19 17:59:22 | INFO | train_inner | epoch 003: 1369 / 3002 loss=2.644, ppl=6.25, wps=5861, ups=0.09, wpb=64889, bsz=128, num_updates=7327, lr=9.99494e-05, gnorm=2.291, loss_scale=8, train_wall=11, gb_free=2.8, wall=84036 2021-06-19 17:59:33 | INFO | train_inner | epoch 003: 1370 / 3002 loss=2.772, ppl=6.83, wps=5775.6, ups=0.09, wpb=64843, bsz=128, num_updates=7328, lr=9.99494e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=84047 2021-06-19 17:59:44 | INFO | train_inner | epoch 003: 1371 / 3002 loss=2.682, ppl=6.42, wps=5955.3, ups=0.09, wpb=64864, bsz=128, num_updates=7329, lr=9.99494e-05, gnorm=2.168, loss_scale=8, train_wall=10, gb_free=2.8, wall=84058 2021-06-19 17:59:55 | INFO | train_inner | epoch 003: 1372 / 3002 loss=2.649, ppl=6.27, wps=5896.9, ups=0.09, wpb=64869, bsz=128, num_updates=7330, lr=9.99494e-05, gnorm=2.166, loss_scale=8, train_wall=11, gb_free=2.8, wall=84069 2021-06-19 18:00:06 | INFO | train_inner | epoch 003: 1373 / 3002 loss=2.742, ppl=6.69, wps=5755.9, ups=0.09, wpb=64781, bsz=128, num_updates=7331, lr=9.99493e-05, gnorm=2.37, loss_scale=8, train_wall=11, gb_free=2.8, wall=84081 2021-06-19 18:00:17 | INFO | train_inner | epoch 003: 1374 / 3002 loss=2.723, ppl=6.6, wps=5828.2, ups=0.09, wpb=64843, bsz=128, num_updates=7332, lr=9.99493e-05, gnorm=2.155, loss_scale=8, train_wall=11, gb_free=2.8, wall=84092 2021-06-19 18:00:29 | INFO | train_inner | epoch 003: 1375 / 3002 loss=2.616, ppl=6.13, wps=5794.2, ups=0.09, wpb=64793, bsz=128, num_updates=7333, lr=9.99493e-05, gnorm=2.418, loss_scale=8, train_wall=11, gb_free=2.8, wall=84103 2021-06-19 18:00:40 | INFO | train_inner | epoch 003: 1376 / 3002 loss=2.575, ppl=5.96, wps=5852.6, ups=0.09, wpb=64766, bsz=128, num_updates=7334, lr=9.99493e-05, gnorm=2.231, loss_scale=8, train_wall=11, gb_free=2.8, wall=84114 2021-06-19 18:00:51 | INFO | train_inner | epoch 003: 1377 / 3002 loss=2.692, ppl=6.46, wps=5743.7, ups=0.09, wpb=64860, bsz=128, num_updates=7335, lr=9.99493e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=84125 2021-06-19 18:01:02 | INFO | train_inner | epoch 003: 1378 / 3002 loss=2.717, ppl=6.58, wps=5851.2, ups=0.09, wpb=64833, bsz=128, num_updates=7336, lr=9.99493e-05, gnorm=2.26, loss_scale=8, train_wall=11, gb_free=2.8, wall=84136 2021-06-19 18:01:13 | INFO | train_inner | epoch 003: 1379 / 3002 loss=2.591, ppl=6.02, wps=5859.4, ups=0.09, wpb=64797, bsz=128, num_updates=7337, lr=9.99493e-05, gnorm=2.371, loss_scale=8, train_wall=11, gb_free=2.8, wall=84147 2021-06-19 18:01:24 | INFO | train_inner | epoch 003: 1380 / 3002 loss=2.754, ppl=6.75, wps=5917.6, ups=0.09, wpb=64874, bsz=128, num_updates=7338, lr=9.99493e-05, gnorm=2.365, loss_scale=8, train_wall=10, gb_free=2.8, wall=84158 2021-06-19 18:01:35 | INFO | train_inner | epoch 003: 1381 / 3002 loss=2.576, ppl=5.96, wps=5882.1, ups=0.09, wpb=64903, bsz=128, num_updates=7339, lr=9.99493e-05, gnorm=2.618, loss_scale=8, train_wall=11, gb_free=2.8, wall=84169 2021-06-19 18:01:46 | INFO | train_inner | epoch 003: 1382 / 3002 loss=2.706, ppl=6.52, wps=5849.7, ups=0.09, wpb=64927, bsz=128, num_updates=7340, lr=9.99493e-05, gnorm=2.223, loss_scale=8, train_wall=11, gb_free=2.8, wall=84181 2021-06-19 18:01:57 | INFO | train_inner | epoch 003: 1383 / 3002 loss=2.793, ppl=6.93, wps=5829.9, ups=0.09, wpb=64899, bsz=128, num_updates=7341, lr=9.99493e-05, gnorm=2.246, loss_scale=8, train_wall=11, gb_free=2.8, wall=84192 2021-06-19 18:02:08 | INFO | train_inner | epoch 003: 1384 / 3002 loss=2.671, ppl=6.37, wps=5846.1, ups=0.09, wpb=64743, bsz=128, num_updates=7342, lr=9.99493e-05, gnorm=2.138, loss_scale=8, train_wall=11, gb_free=2.8, wall=84203 2021-06-19 18:02:19 | INFO | train_inner | epoch 003: 1385 / 3002 loss=2.737, ppl=6.67, wps=6018.2, ups=0.09, wpb=64783, bsz=128, num_updates=7343, lr=9.99493e-05, gnorm=2.309, loss_scale=8, train_wall=10, gb_free=2.8, wall=84213 2021-06-19 18:02:30 | INFO | train_inner | epoch 003: 1386 / 3002 loss=2.68, ppl=6.41, wps=5931.2, ups=0.09, wpb=64777, bsz=128, num_updates=7344, lr=9.99492e-05, gnorm=2.159, loss_scale=8, train_wall=10, gb_free=2.8, wall=84224 2021-06-19 18:02:41 | INFO | train_inner | epoch 003: 1387 / 3002 loss=2.807, ppl=7, wps=5907.9, ups=0.09, wpb=64770, bsz=128, num_updates=7345, lr=9.99492e-05, gnorm=5.546, loss_scale=8, train_wall=10, gb_free=2.8, wall=84235 2021-06-19 18:02:52 | INFO | train_inner | epoch 003: 1388 / 3002 loss=2.526, ppl=5.76, wps=5748.7, ups=0.09, wpb=64862, bsz=128, num_updates=7346, lr=9.99492e-05, gnorm=2.123, loss_scale=8, train_wall=11, gb_free=2.8, wall=84247 2021-06-19 18:03:04 | INFO | train_inner | epoch 003: 1389 / 3002 loss=2.496, ppl=5.64, wps=5757.1, ups=0.09, wpb=64913, bsz=128, num_updates=7347, lr=9.99492e-05, gnorm=3.524, loss_scale=8, train_wall=11, gb_free=2.8, wall=84258 2021-06-19 18:03:15 | INFO | train_inner | epoch 003: 1390 / 3002 loss=2.777, ppl=6.85, wps=5711.5, ups=0.09, wpb=64792, bsz=128, num_updates=7348, lr=9.99492e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=84269 2021-06-19 18:03:26 | INFO | train_inner | epoch 003: 1391 / 3002 loss=2.737, ppl=6.67, wps=5995.9, ups=0.09, wpb=64752, bsz=128, num_updates=7349, lr=9.99492e-05, gnorm=2.127, loss_scale=8, train_wall=10, gb_free=2.8, wall=84280 2021-06-19 18:03:37 | INFO | train_inner | epoch 003: 1392 / 3002 loss=2.525, ppl=5.75, wps=5847.4, ups=0.09, wpb=64888, bsz=128, num_updates=7350, lr=9.99492e-05, gnorm=2.427, loss_scale=8, train_wall=11, gb_free=2.8, wall=84291 2021-06-19 18:03:48 | INFO | train_inner | epoch 003: 1393 / 3002 loss=2.687, ppl=6.44, wps=5835.6, ups=0.09, wpb=64879, bsz=128, num_updates=7351, lr=9.99492e-05, gnorm=2.95, loss_scale=8, train_wall=11, gb_free=2.8, wall=84302 2021-06-19 18:03:59 | INFO | train_inner | epoch 003: 1394 / 3002 loss=2.672, ppl=6.37, wps=5927.6, ups=0.09, wpb=64841, bsz=128, num_updates=7352, lr=9.99492e-05, gnorm=2.547, loss_scale=8, train_wall=10, gb_free=2.8, wall=84313 2021-06-19 18:04:10 | INFO | train_inner | epoch 003: 1395 / 3002 loss=2.777, ppl=6.85, wps=5940.1, ups=0.09, wpb=64956, bsz=128, num_updates=7353, lr=9.99492e-05, gnorm=2.393, loss_scale=8, train_wall=10, gb_free=2.8, wall=84324 2021-06-19 18:04:21 | INFO | train_inner | epoch 003: 1396 / 3002 loss=2.652, ppl=6.29, wps=5803.5, ups=0.09, wpb=64864, bsz=128, num_updates=7354, lr=9.99492e-05, gnorm=2.151, loss_scale=8, train_wall=11, gb_free=2.8, wall=84335 2021-06-19 18:04:32 | INFO | train_inner | epoch 003: 1397 / 3002 loss=2.714, ppl=6.56, wps=5810, ups=0.09, wpb=64831, bsz=128, num_updates=7355, lr=9.99492e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=84346 2021-06-19 18:04:43 | INFO | train_inner | epoch 003: 1398 / 3002 loss=2.503, ppl=5.67, wps=5921.6, ups=0.09, wpb=64789, bsz=128, num_updates=7356, lr=9.99491e-05, gnorm=2.382, loss_scale=8, train_wall=11, gb_free=2.8, wall=84357 2021-06-19 18:04:54 | INFO | train_inner | epoch 003: 1399 / 3002 loss=2.703, ppl=6.51, wps=5772.9, ups=0.09, wpb=64820, bsz=128, num_updates=7357, lr=9.99491e-05, gnorm=2.477, loss_scale=8, train_wall=11, gb_free=2.8, wall=84369 2021-06-19 18:05:06 | INFO | train_inner | epoch 003: 1400 / 3002 loss=2.722, ppl=6.6, wps=5796.8, ups=0.09, wpb=64771, bsz=128, num_updates=7358, lr=9.99491e-05, gnorm=2.081, loss_scale=8, train_wall=11, gb_free=2.8, wall=84380 2021-06-19 18:05:17 | INFO | train_inner | epoch 003: 1401 / 3002 loss=2.71, ppl=6.54, wps=5791, ups=0.09, wpb=64787, bsz=128, num_updates=7359, lr=9.99491e-05, gnorm=2.112, loss_scale=8, train_wall=11, gb_free=2.8, wall=84391 2021-06-19 18:05:28 | INFO | train_inner | epoch 003: 1402 / 3002 loss=2.72, ppl=6.59, wps=5903.4, ups=0.09, wpb=64913, bsz=128, num_updates=7360, lr=9.99491e-05, gnorm=2.498, loss_scale=8, train_wall=11, gb_free=2.8, wall=84402 2021-06-19 18:05:39 | INFO | train_inner | epoch 003: 1403 / 3002 loss=2.573, ppl=5.95, wps=5915.3, ups=0.09, wpb=64884, bsz=128, num_updates=7361, lr=9.99491e-05, gnorm=2.188, loss_scale=8, train_wall=11, gb_free=2.8, wall=84413 2021-06-19 18:05:50 | INFO | train_inner | epoch 003: 1404 / 3002 loss=2.584, ppl=6, wps=5921, ups=0.09, wpb=64814, bsz=128, num_updates=7362, lr=9.99491e-05, gnorm=2.301, loss_scale=8, train_wall=11, gb_free=2.8, wall=84424 2021-06-19 18:06:00 | INFO | train_inner | epoch 003: 1405 / 3002 loss=2.729, ppl=6.63, wps=5961.6, ups=0.09, wpb=64882, bsz=128, num_updates=7363, lr=9.99491e-05, gnorm=2.106, loss_scale=8, train_wall=10, gb_free=2.8, wall=84435 2021-06-19 18:06:12 | INFO | train_inner | epoch 003: 1406 / 3002 loss=2.577, ppl=5.97, wps=5822.3, ups=0.09, wpb=64812, bsz=128, num_updates=7364, lr=9.99491e-05, gnorm=2.167, loss_scale=8, train_wall=11, gb_free=2.8, wall=84446 2021-06-19 18:06:23 | INFO | train_inner | epoch 003: 1407 / 3002 loss=2.551, ppl=5.86, wps=5906.7, ups=0.09, wpb=64859, bsz=128, num_updates=7365, lr=9.99491e-05, gnorm=2.169, loss_scale=8, train_wall=11, gb_free=2.8, wall=84457 2021-06-19 18:06:34 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-19 18:06:45 | INFO | train_inner | epoch 003: 1409 / 3002 loss=2.654, ppl=6.29, wps=2941.6, ups=0.05, wpb=64775, bsz=128, num_updates=7366, lr=9.99491e-05, gnorm=2.53, loss_scale=4, train_wall=21, gb_free=2.8, wall=84479 2021-06-19 18:06:56 | INFO | train_inner | epoch 003: 1410 / 3002 loss=2.651, ppl=6.28, wps=5902.9, ups=0.09, wpb=64753, bsz=128, num_updates=7367, lr=9.99491e-05, gnorm=9.92, loss_scale=4, train_wall=11, gb_free=2.8, wall=84490 2021-06-19 18:07:07 | INFO | train_inner | epoch 003: 1411 / 3002 loss=2.61, ppl=6.11, wps=5813.7, ups=0.09, wpb=64807, bsz=128, num_updates=7368, lr=9.99491e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=84501 2021-06-19 18:07:18 | INFO | train_inner | epoch 003: 1412 / 3002 loss=2.479, ppl=5.58, wps=5879.9, ups=0.09, wpb=64969, bsz=128, num_updates=7369, lr=9.9949e-05, gnorm=2.113, loss_scale=4, train_wall=11, gb_free=2.8, wall=84512 2021-06-19 18:07:29 | INFO | train_inner | epoch 003: 1413 / 3002 loss=2.374, ppl=5.18, wps=5767, ups=0.09, wpb=64885, bsz=128, num_updates=7370, lr=9.9949e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=84523 2021-06-19 18:07:40 | INFO | train_inner | epoch 003: 1414 / 3002 loss=2.551, ppl=5.86, wps=5785.7, ups=0.09, wpb=64824, bsz=128, num_updates=7371, lr=9.9949e-05, gnorm=2.321, loss_scale=4, train_wall=11, gb_free=2.8, wall=84535 2021-06-19 18:07:51 | INFO | train_inner | epoch 003: 1415 / 3002 loss=2.743, ppl=6.7, wps=5768.1, ups=0.09, wpb=64871, bsz=128, num_updates=7372, lr=9.9949e-05, gnorm=2.458, loss_scale=4, train_wall=11, gb_free=2.8, wall=84546 2021-06-19 18:08:02 | INFO | train_inner | epoch 003: 1416 / 3002 loss=2.754, ppl=6.74, wps=5895.4, ups=0.09, wpb=64801, bsz=128, num_updates=7373, lr=9.9949e-05, gnorm=2.089, loss_scale=4, train_wall=11, gb_free=2.8, wall=84557 2021-06-19 18:08:13 | INFO | train_inner | epoch 003: 1417 / 3002 loss=2.618, ppl=6.14, wps=5898.5, ups=0.09, wpb=64785, bsz=128, num_updates=7374, lr=9.9949e-05, gnorm=2.257, loss_scale=4, train_wall=11, gb_free=2.8, wall=84568 2021-06-19 18:08:24 | INFO | train_inner | epoch 003: 1418 / 3002 loss=2.719, ppl=6.58, wps=5912.2, ups=0.09, wpb=64787, bsz=128, num_updates=7375, lr=9.9949e-05, gnorm=2.361, loss_scale=4, train_wall=11, gb_free=2.8, wall=84579 2021-06-19 18:08:36 | INFO | train_inner | epoch 003: 1419 / 3002 loss=2.602, ppl=6.07, wps=5742.5, ups=0.09, wpb=64873, bsz=128, num_updates=7376, lr=9.9949e-05, gnorm=2.152, loss_scale=4, train_wall=11, gb_free=2.8, wall=84590 2021-06-19 18:08:47 | INFO | train_inner | epoch 003: 1420 / 3002 loss=2.803, ppl=6.98, wps=5827.7, ups=0.09, wpb=64740, bsz=128, num_updates=7377, lr=9.9949e-05, gnorm=2.11, loss_scale=4, train_wall=11, gb_free=2.8, wall=84601 2021-06-19 18:08:58 | INFO | train_inner | epoch 003: 1421 / 3002 loss=2.624, ppl=6.17, wps=5795.4, ups=0.09, wpb=64854, bsz=128, num_updates=7378, lr=9.9949e-05, gnorm=2.171, loss_scale=4, train_wall=11, gb_free=2.8, wall=84612 2021-06-19 18:09:09 | INFO | train_inner | epoch 003: 1422 / 3002 loss=2.54, ppl=5.82, wps=5895.4, ups=0.09, wpb=64915, bsz=128, num_updates=7379, lr=9.9949e-05, gnorm=2.127, loss_scale=4, train_wall=11, gb_free=2.8, wall=84623 2021-06-19 18:09:20 | INFO | train_inner | epoch 003: 1423 / 3002 loss=2.583, ppl=5.99, wps=5808.8, ups=0.09, wpb=64795, bsz=128, num_updates=7380, lr=9.9949e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=84635 2021-06-19 18:09:31 | INFO | train_inner | epoch 003: 1424 / 3002 loss=2.723, ppl=6.6, wps=5744.7, ups=0.09, wpb=64783, bsz=128, num_updates=7381, lr=9.99489e-05, gnorm=2.12, loss_scale=4, train_wall=11, gb_free=2.8, wall=84646 2021-06-19 18:09:43 | INFO | train_inner | epoch 003: 1425 / 3002 loss=2.74, ppl=6.68, wps=5782.7, ups=0.09, wpb=64754, bsz=128, num_updates=7382, lr=9.99489e-05, gnorm=2.163, loss_scale=4, train_wall=11, gb_free=2.8, wall=84657 2021-06-19 18:09:54 | INFO | train_inner | epoch 003: 1426 / 3002 loss=2.683, ppl=6.42, wps=5840.4, ups=0.09, wpb=64834, bsz=128, num_updates=7383, lr=9.99489e-05, gnorm=2.216, loss_scale=4, train_wall=11, gb_free=2.8, wall=84668 2021-06-19 18:10:05 | INFO | train_inner | epoch 003: 1427 / 3002 loss=2.65, ppl=6.28, wps=5815.1, ups=0.09, wpb=64836, bsz=128, num_updates=7384, lr=9.99489e-05, gnorm=2.175, loss_scale=4, train_wall=11, gb_free=2.8, wall=84679 2021-06-19 18:10:16 | INFO | train_inner | epoch 003: 1428 / 3002 loss=2.555, ppl=5.88, wps=5827.6, ups=0.09, wpb=64826, bsz=128, num_updates=7385, lr=9.99489e-05, gnorm=2.483, loss_scale=4, train_wall=11, gb_free=2.8, wall=84690 2021-06-19 18:10:27 | INFO | train_inner | epoch 003: 1429 / 3002 loss=2.427, ppl=5.38, wps=5799.9, ups=0.09, wpb=64872, bsz=128, num_updates=7386, lr=9.99489e-05, gnorm=4.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=84702 2021-06-19 18:10:38 | INFO | train_inner | epoch 003: 1430 / 3002 loss=2.682, ppl=6.42, wps=5951.6, ups=0.09, wpb=64860, bsz=128, num_updates=7387, lr=9.99489e-05, gnorm=2.131, loss_scale=4, train_wall=10, gb_free=2.8, wall=84712 2021-06-19 18:10:49 | INFO | train_inner | epoch 003: 1431 / 3002 loss=2.59, ppl=6.02, wps=5866.4, ups=0.09, wpb=64847, bsz=128, num_updates=7388, lr=9.99489e-05, gnorm=2.202, loss_scale=4, train_wall=11, gb_free=2.8, wall=84724 2021-06-19 18:11:00 | INFO | train_inner | epoch 003: 1432 / 3002 loss=2.596, ppl=6.05, wps=5795.6, ups=0.09, wpb=64774, bsz=128, num_updates=7389, lr=9.99489e-05, gnorm=2.239, loss_scale=4, train_wall=11, gb_free=2.8, wall=84735 2021-06-19 18:11:11 | INFO | train_inner | epoch 003: 1433 / 3002 loss=2.66, ppl=6.32, wps=5829.8, ups=0.09, wpb=64813, bsz=128, num_updates=7390, lr=9.99489e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=84746 2021-06-19 18:11:23 | INFO | train_inner | epoch 003: 1434 / 3002 loss=2.557, ppl=5.89, wps=5865.9, ups=0.09, wpb=64769, bsz=128, num_updates=7391, lr=9.99489e-05, gnorm=2.205, loss_scale=4, train_wall=11, gb_free=2.8, wall=84757 2021-06-19 18:11:34 | INFO | train_inner | epoch 003: 1435 / 3002 loss=2.755, ppl=6.75, wps=5892.8, ups=0.09, wpb=64791, bsz=128, num_updates=7392, lr=9.99489e-05, gnorm=2.141, loss_scale=4, train_wall=11, gb_free=2.8, wall=84768 2021-06-19 18:11:44 | INFO | train_inner | epoch 003: 1436 / 3002 loss=2.696, ppl=6.48, wps=5912.6, ups=0.09, wpb=64861, bsz=128, num_updates=7393, lr=9.99489e-05, gnorm=2.448, loss_scale=4, train_wall=10, gb_free=2.8, wall=84779 2021-06-19 18:11:56 | INFO | train_inner | epoch 003: 1437 / 3002 loss=2.611, ppl=6.11, wps=5745, ups=0.09, wpb=64821, bsz=128, num_updates=7394, lr=9.99488e-05, gnorm=2.108, loss_scale=4, train_wall=11, gb_free=2.8, wall=84790 2021-06-19 18:12:07 | INFO | train_inner | epoch 003: 1438 / 3002 loss=2.559, ppl=5.89, wps=5883.7, ups=0.09, wpb=64806, bsz=128, num_updates=7395, lr=9.99488e-05, gnorm=2.388, loss_scale=4, train_wall=11, gb_free=2.8, wall=84801 2021-06-19 18:12:17 | INFO | train_inner | epoch 003: 1439 / 3002 loss=2.697, ppl=6.49, wps=6057.4, ups=0.09, wpb=64817, bsz=128, num_updates=7396, lr=9.99488e-05, gnorm=2.17, loss_scale=4, train_wall=10, gb_free=2.8, wall=84812 2021-06-19 18:12:29 | INFO | train_inner | epoch 003: 1440 / 3002 loss=2.585, ppl=6, wps=5884.3, ups=0.09, wpb=64848, bsz=128, num_updates=7397, lr=9.99488e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=84823 2021-06-19 18:12:40 | INFO | train_inner | epoch 003: 1441 / 3002 loss=2.607, ppl=6.09, wps=5898.7, ups=0.09, wpb=64887, bsz=128, num_updates=7398, lr=9.99488e-05, gnorm=2.204, loss_scale=4, train_wall=11, gb_free=2.8, wall=84834 2021-06-19 18:12:50 | INFO | train_inner | epoch 003: 1442 / 3002 loss=2.685, ppl=6.43, wps=5959.3, ups=0.09, wpb=64860, bsz=128, num_updates=7399, lr=9.99488e-05, gnorm=3.393, loss_scale=4, train_wall=10, gb_free=2.8, wall=84845 2021-06-19 18:13:02 | INFO | train_inner | epoch 003: 1443 / 3002 loss=2.654, ppl=6.3, wps=5799.7, ups=0.09, wpb=64844, bsz=128, num_updates=7400, lr=9.99488e-05, gnorm=2.333, loss_scale=4, train_wall=11, gb_free=2.8, wall=84856 2021-06-19 18:13:13 | INFO | train_inner | epoch 003: 1444 / 3002 loss=2.493, ppl=5.63, wps=5892.2, ups=0.09, wpb=64771, bsz=128, num_updates=7401, lr=9.99488e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=84867 2021-06-19 18:13:24 | INFO | train_inner | epoch 003: 1445 / 3002 loss=2.615, ppl=6.13, wps=5827.5, ups=0.09, wpb=64836, bsz=128, num_updates=7402, lr=9.99488e-05, gnorm=2.129, loss_scale=4, train_wall=11, gb_free=2.8, wall=84878 2021-06-19 18:13:35 | INFO | train_inner | epoch 003: 1446 / 3002 loss=2.596, ppl=6.04, wps=5783.8, ups=0.09, wpb=64912, bsz=128, num_updates=7403, lr=9.99488e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=84889 2021-06-19 18:13:46 | INFO | train_inner | epoch 003: 1447 / 3002 loss=2.662, ppl=6.33, wps=5770.7, ups=0.09, wpb=64832, bsz=128, num_updates=7404, lr=9.99488e-05, gnorm=2.5, loss_scale=4, train_wall=11, gb_free=2.8, wall=84900 2021-06-19 18:13:57 | INFO | train_inner | epoch 003: 1448 / 3002 loss=2.606, ppl=6.09, wps=5869.8, ups=0.09, wpb=64802, bsz=128, num_updates=7405, lr=9.99488e-05, gnorm=2.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=84912 2021-06-19 18:14:08 | INFO | train_inner | epoch 003: 1449 / 3002 loss=2.544, ppl=5.83, wps=5840.5, ups=0.09, wpb=64857, bsz=128, num_updates=7406, lr=9.99487e-05, gnorm=2.172, loss_scale=4, train_wall=11, gb_free=2.8, wall=84923 2021-06-19 18:14:19 | INFO | train_inner | epoch 003: 1450 / 3002 loss=2.765, ppl=6.8, wps=6008, ups=0.09, wpb=64906, bsz=128, num_updates=7407, lr=9.99487e-05, gnorm=2.192, loss_scale=4, train_wall=10, gb_free=2.8, wall=84933 2021-06-19 18:14:30 | INFO | train_inner | epoch 003: 1451 / 3002 loss=2.586, ppl=6, wps=5995.1, ups=0.09, wpb=64848, bsz=128, num_updates=7408, lr=9.99487e-05, gnorm=2.145, loss_scale=4, train_wall=10, gb_free=2.8, wall=84944 2021-06-19 18:14:41 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-19 18:14:52 | INFO | train_inner | epoch 003: 1453 / 3002 loss=2.721, ppl=6.59, wps=2907.6, ups=0.04, wpb=64746, bsz=128, num_updates=7409, lr=9.99487e-05, gnorm=2.621, loss_scale=2, train_wall=21, gb_free=2.8, wall=84967 2021-06-19 18:15:03 | INFO | train_inner | epoch 003: 1454 / 3002 loss=2.586, ppl=6, wps=5830.8, ups=0.09, wpb=64787, bsz=128, num_updates=7410, lr=9.99487e-05, gnorm=2.199, loss_scale=2, train_wall=11, gb_free=2.8, wall=84978 2021-06-19 18:15:14 | INFO | train_inner | epoch 003: 1455 / 3002 loss=2.696, ppl=6.48, wps=5826.4, ups=0.09, wpb=64760, bsz=128, num_updates=7411, lr=9.99487e-05, gnorm=2.229, loss_scale=2, train_wall=11, gb_free=2.8, wall=84989 2021-06-19 18:15:25 | INFO | train_inner | epoch 003: 1456 / 3002 loss=2.809, ppl=7.01, wps=5845.8, ups=0.09, wpb=64847, bsz=128, num_updates=7412, lr=9.99487e-05, gnorm=2.141, loss_scale=2, train_wall=11, gb_free=2.8, wall=85000 2021-06-19 18:15:37 | INFO | train_inner | epoch 003: 1457 / 3002 loss=2.647, ppl=6.26, wps=5854.2, ups=0.09, wpb=64812, bsz=128, num_updates=7413, lr=9.99487e-05, gnorm=2.244, loss_scale=2, train_wall=11, gb_free=2.8, wall=85011 2021-06-19 18:15:48 | INFO | train_inner | epoch 003: 1458 / 3002 loss=2.694, ppl=6.47, wps=5884.9, ups=0.09, wpb=64885, bsz=128, num_updates=7414, lr=9.99487e-05, gnorm=2.318, loss_scale=2, train_wall=11, gb_free=2.8, wall=85022 2021-06-19 18:15:58 | INFO | train_inner | epoch 003: 1459 / 3002 loss=2.691, ppl=6.46, wps=6019.9, ups=0.09, wpb=64735, bsz=128, num_updates=7415, lr=9.99487e-05, gnorm=2.145, loss_scale=2, train_wall=10, gb_free=2.8, wall=85033 2021-06-19 18:16:09 | INFO | train_inner | epoch 003: 1460 / 3002 loss=2.773, ppl=6.84, wps=5959.4, ups=0.09, wpb=64864, bsz=128, num_updates=7416, lr=9.99487e-05, gnorm=3.313, loss_scale=2, train_wall=10, gb_free=2.8, wall=85044 2021-06-19 18:16:20 | INFO | train_inner | epoch 003: 1461 / 3002 loss=2.725, ppl=6.61, wps=5848.4, ups=0.09, wpb=64730, bsz=128, num_updates=7417, lr=9.99487e-05, gnorm=2.131, loss_scale=2, train_wall=11, gb_free=2.8, wall=85055 2021-06-19 18:16:31 | INFO | train_inner | epoch 003: 1462 / 3002 loss=2.613, ppl=6.12, wps=5892.6, ups=0.09, wpb=64812, bsz=128, num_updates=7418, lr=9.99487e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=85066 2021-06-19 18:16:42 | INFO | train_inner | epoch 003: 1463 / 3002 loss=2.613, ppl=6.12, wps=5963.1, ups=0.09, wpb=64791, bsz=128, num_updates=7419, lr=9.99486e-05, gnorm=2.187, loss_scale=2, train_wall=10, gb_free=2.8, wall=85077 2021-06-19 18:16:53 | INFO | train_inner | epoch 003: 1464 / 3002 loss=2.659, ppl=6.32, wps=5872.5, ups=0.09, wpb=64909, bsz=128, num_updates=7420, lr=9.99486e-05, gnorm=2.415, loss_scale=2, train_wall=11, gb_free=2.8, wall=85088 2021-06-19 18:17:04 | INFO | train_inner | epoch 003: 1465 / 3002 loss=2.825, ppl=7.09, wps=5883.5, ups=0.09, wpb=64831, bsz=128, num_updates=7421, lr=9.99486e-05, gnorm=2.136, loss_scale=2, train_wall=11, gb_free=2.8, wall=85099 2021-06-19 18:17:15 | INFO | train_inner | epoch 003: 1466 / 3002 loss=2.632, ppl=6.2, wps=5903, ups=0.09, wpb=64796, bsz=128, num_updates=7422, lr=9.99486e-05, gnorm=2.163, loss_scale=2, train_wall=11, gb_free=2.8, wall=85110 2021-06-19 18:17:26 | INFO | train_inner | epoch 003: 1467 / 3002 loss=2.714, ppl=6.56, wps=5762.9, ups=0.09, wpb=64829, bsz=128, num_updates=7423, lr=9.99486e-05, gnorm=2.189, loss_scale=2, train_wall=11, gb_free=2.8, wall=85121 2021-06-19 18:17:38 | INFO | train_inner | epoch 003: 1468 / 3002 loss=2.754, ppl=6.74, wps=5846.5, ups=0.09, wpb=64823, bsz=128, num_updates=7424, lr=9.99486e-05, gnorm=2.07, loss_scale=2, train_wall=11, gb_free=2.8, wall=85132 2021-06-19 18:17:49 | INFO | train_inner | epoch 003: 1469 / 3002 loss=2.591, ppl=6.03, wps=5835.8, ups=0.09, wpb=64892, bsz=128, num_updates=7425, lr=9.99486e-05, gnorm=2.056, loss_scale=2, train_wall=11, gb_free=2.8, wall=85143 2021-06-19 18:18:00 | INFO | train_inner | epoch 003: 1470 / 3002 loss=2.556, ppl=5.88, wps=5940.7, ups=0.09, wpb=64819, bsz=128, num_updates=7426, lr=9.99486e-05, gnorm=2.138, loss_scale=2, train_wall=10, gb_free=2.8, wall=85154 2021-06-19 18:18:11 | INFO | train_inner | epoch 003: 1471 / 3002 loss=2.63, ppl=6.19, wps=5834.9, ups=0.09, wpb=64869, bsz=128, num_updates=7427, lr=9.99486e-05, gnorm=2.191, loss_scale=2, train_wall=11, gb_free=2.8, wall=85165 2021-06-19 18:18:22 | INFO | train_inner | epoch 003: 1472 / 3002 loss=2.663, ppl=6.33, wps=5802.3, ups=0.09, wpb=64692, bsz=128, num_updates=7428, lr=9.99486e-05, gnorm=2.17, loss_scale=2, train_wall=11, gb_free=2.8, wall=85176 2021-06-19 18:18:33 | INFO | train_inner | epoch 003: 1473 / 3002 loss=2.522, ppl=5.74, wps=5800.8, ups=0.09, wpb=64814, bsz=128, num_updates=7429, lr=9.99486e-05, gnorm=3.016, loss_scale=2, train_wall=11, gb_free=2.8, wall=85187 2021-06-19 18:18:44 | INFO | train_inner | epoch 003: 1474 / 3002 loss=2.672, ppl=6.37, wps=5827.2, ups=0.09, wpb=64849, bsz=128, num_updates=7430, lr=9.99486e-05, gnorm=2.207, loss_scale=2, train_wall=11, gb_free=2.8, wall=85198 2021-06-19 18:18:55 | INFO | train_inner | epoch 003: 1475 / 3002 loss=2.722, ppl=6.6, wps=5785, ups=0.09, wpb=64863, bsz=128, num_updates=7431, lr=9.99485e-05, gnorm=2.203, loss_scale=2, train_wall=11, gb_free=2.8, wall=85210 2021-06-19 18:19:06 | INFO | train_inner | epoch 003: 1476 / 3002 loss=2.453, ppl=5.48, wps=5859.3, ups=0.09, wpb=64848, bsz=128, num_updates=7432, lr=9.99485e-05, gnorm=2.196, loss_scale=2, train_wall=11, gb_free=2.8, wall=85221 2021-06-19 18:19:18 | INFO | train_inner | epoch 003: 1477 / 3002 loss=2.561, ppl=5.9, wps=5800.7, ups=0.09, wpb=64902, bsz=128, num_updates=7433, lr=9.99485e-05, gnorm=2.046, loss_scale=2, train_wall=11, gb_free=2.8, wall=85232 2021-06-19 18:19:29 | INFO | train_inner | epoch 003: 1478 / 3002 loss=2.594, ppl=6.04, wps=5797.4, ups=0.09, wpb=64729, bsz=128, num_updates=7434, lr=9.99485e-05, gnorm=2.117, loss_scale=2, train_wall=11, gb_free=2.8, wall=85243 2021-06-19 18:19:40 | INFO | train_inner | epoch 003: 1479 / 3002 loss=2.641, ppl=6.24, wps=5827.8, ups=0.09, wpb=64808, bsz=128, num_updates=7435, lr=9.99485e-05, gnorm=2.134, loss_scale=2, train_wall=11, gb_free=2.8, wall=85254 2021-06-19 18:19:51 | INFO | train_inner | epoch 003: 1480 / 3002 loss=2.525, ppl=5.76, wps=5878, ups=0.09, wpb=64790, bsz=128, num_updates=7436, lr=9.99485e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=85265 2021-06-19 18:20:02 | INFO | train_inner | epoch 003: 1481 / 3002 loss=2.521, ppl=5.74, wps=5796.3, ups=0.09, wpb=64903, bsz=128, num_updates=7437, lr=9.99485e-05, gnorm=2.381, loss_scale=2, train_wall=11, gb_free=2.8, wall=85276 2021-06-19 18:20:13 | INFO | train_inner | epoch 003: 1482 / 3002 loss=2.733, ppl=6.65, wps=5863.2, ups=0.09, wpb=64797, bsz=128, num_updates=7438, lr=9.99485e-05, gnorm=2.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=85288 2021-06-19 18:20:24 | INFO | train_inner | epoch 003: 1483 / 3002 loss=2.741, ppl=6.69, wps=5959.2, ups=0.09, wpb=64869, bsz=128, num_updates=7439, lr=9.99485e-05, gnorm=2.259, loss_scale=2, train_wall=10, gb_free=2.8, wall=85298 2021-06-19 18:20:35 | INFO | train_inner | epoch 003: 1484 / 3002 loss=2.575, ppl=5.96, wps=5836.5, ups=0.09, wpb=64796, bsz=128, num_updates=7440, lr=9.99485e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=85310 2021-06-19 18:20:46 | INFO | train_inner | epoch 003: 1485 / 3002 loss=2.66, ppl=6.32, wps=5761.7, ups=0.09, wpb=64850, bsz=128, num_updates=7441, lr=9.99485e-05, gnorm=2.157, loss_scale=2, train_wall=11, gb_free=2.8, wall=85321 2021-06-19 18:20:58 | INFO | train_inner | epoch 003: 1486 / 3002 loss=2.599, ppl=6.06, wps=5738.3, ups=0.09, wpb=64788, bsz=128, num_updates=7442, lr=9.99485e-05, gnorm=2.214, loss_scale=2, train_wall=11, gb_free=2.8, wall=85332 2021-06-19 18:21:09 | INFO | train_inner | epoch 003: 1487 / 3002 loss=2.57, ppl=5.94, wps=5874.9, ups=0.09, wpb=64819, bsz=128, num_updates=7443, lr=9.99485e-05, gnorm=2.2, loss_scale=2, train_wall=11, gb_free=2.8, wall=85343 2021-06-19 18:21:20 | INFO | train_inner | epoch 003: 1488 / 3002 loss=2.697, ppl=6.49, wps=5858.6, ups=0.09, wpb=64781, bsz=128, num_updates=7444, lr=9.99484e-05, gnorm=2.427, loss_scale=2, train_wall=11, gb_free=2.8, wall=85354 2021-06-19 18:21:31 | INFO | train_inner | epoch 003: 1489 / 3002 loss=2.568, ppl=5.93, wps=5991.9, ups=0.09, wpb=64741, bsz=128, num_updates=7445, lr=9.99484e-05, gnorm=2.058, loss_scale=2, train_wall=10, gb_free=2.8, wall=85365 2021-06-19 18:21:42 | INFO | train_inner | epoch 003: 1490 / 3002 loss=2.532, ppl=5.78, wps=5894.8, ups=0.09, wpb=64799, bsz=128, num_updates=7446, lr=9.99484e-05, gnorm=2.416, loss_scale=2, train_wall=11, gb_free=2.8, wall=85376 2021-06-19 18:21:53 | INFO | train_inner | epoch 003: 1491 / 3002 loss=2.773, ppl=6.83, wps=5791.4, ups=0.09, wpb=64839, bsz=128, num_updates=7447, lr=9.99484e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=85387 2021-06-19 18:22:04 | INFO | train_inner | epoch 003: 1492 / 3002 loss=2.585, ppl=6, wps=5814, ups=0.09, wpb=64793, bsz=128, num_updates=7448, lr=9.99484e-05, gnorm=2.061, loss_scale=2, train_wall=11, gb_free=2.8, wall=85398 2021-06-19 18:22:15 | INFO | train_inner | epoch 003: 1493 / 3002 loss=2.773, ppl=6.83, wps=5851.4, ups=0.09, wpb=64867, bsz=128, num_updates=7449, lr=9.99484e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=85409 2021-06-19 18:22:26 | INFO | train_inner | epoch 003: 1494 / 3002 loss=2.57, ppl=5.94, wps=5854.3, ups=0.09, wpb=64855, bsz=128, num_updates=7450, lr=9.99484e-05, gnorm=2.13, loss_scale=2, train_wall=11, gb_free=2.8, wall=85420 2021-06-19 18:22:37 | INFO | train_inner | epoch 003: 1495 / 3002 loss=2.656, ppl=6.3, wps=5869.9, ups=0.09, wpb=64829, bsz=128, num_updates=7451, lr=9.99484e-05, gnorm=2.153, loss_scale=2, train_wall=11, gb_free=2.8, wall=85431 2021-06-19 18:22:48 | INFO | train_inner | epoch 003: 1496 / 3002 loss=2.703, ppl=6.51, wps=5840.1, ups=0.09, wpb=64876, bsz=128, num_updates=7452, lr=9.99484e-05, gnorm=2.226, loss_scale=2, train_wall=11, gb_free=2.8, wall=85443 2021-06-19 18:22:59 | INFO | train_inner | epoch 003: 1497 / 3002 loss=2.74, ppl=6.68, wps=5794.8, ups=0.09, wpb=64843, bsz=128, num_updates=7453, lr=9.99484e-05, gnorm=2.11, loss_scale=2, train_wall=11, gb_free=2.8, wall=85454 2021-06-19 18:23:11 | INFO | train_inner | epoch 003: 1498 / 3002 loss=2.627, ppl=6.18, wps=5815.5, ups=0.09, wpb=64815, bsz=128, num_updates=7454, lr=9.99484e-05, gnorm=2.133, loss_scale=2, train_wall=11, gb_free=2.8, wall=85465 2021-06-19 18:23:22 | INFO | train_inner | epoch 003: 1499 / 3002 loss=2.742, ppl=6.69, wps=5881.5, ups=0.09, wpb=64923, bsz=128, num_updates=7455, lr=9.99484e-05, gnorm=2.158, loss_scale=2, train_wall=11, gb_free=2.8, wall=85476 2021-06-19 18:23:33 | INFO | train_inner | epoch 003: 1500 / 3002 loss=2.583, ppl=5.99, wps=5837.6, ups=0.09, wpb=64835, bsz=128, num_updates=7456, lr=9.99483e-05, gnorm=2.009, loss_scale=2, train_wall=11, gb_free=2.8, wall=85487 2021-06-19 18:23:44 | INFO | train_inner | epoch 003: 1501 / 3002 loss=2.585, ppl=6, wps=5885.1, ups=0.09, wpb=64891, bsz=128, num_updates=7457, lr=9.99483e-05, gnorm=2.936, loss_scale=2, train_wall=11, gb_free=2.8, wall=85498 2021-06-19 18:23:55 | INFO | train_inner | epoch 003: 1502 / 3002 loss=2.633, ppl=6.2, wps=5872.2, ups=0.09, wpb=64811, bsz=128, num_updates=7458, lr=9.99483e-05, gnorm=2.225, loss_scale=2, train_wall=11, gb_free=2.8, wall=85509 2021-06-19 18:24:06 | INFO | train_inner | epoch 003: 1503 / 3002 loss=2.653, ppl=6.29, wps=5834.1, ups=0.09, wpb=64821, bsz=128, num_updates=7459, lr=9.99483e-05, gnorm=2.046, loss_scale=2, train_wall=11, gb_free=2.8, wall=85520 2021-06-19 18:24:17 | INFO | train_inner | epoch 003: 1504 / 3002 loss=2.644, ppl=6.25, wps=5875.6, ups=0.09, wpb=64804, bsz=128, num_updates=7460, lr=9.99483e-05, gnorm=2.548, loss_scale=2, train_wall=11, gb_free=2.8, wall=85531 2021-06-19 18:24:28 | INFO | train_inner | epoch 003: 1505 / 3002 loss=2.64, ppl=6.23, wps=5947.3, ups=0.09, wpb=64816, bsz=128, num_updates=7461, lr=9.99483e-05, gnorm=2.139, loss_scale=2, train_wall=10, gb_free=2.8, wall=85542 2021-06-19 18:24:39 | INFO | train_inner | epoch 003: 1506 / 3002 loss=2.754, ppl=6.75, wps=5919, ups=0.09, wpb=64775, bsz=128, num_updates=7462, lr=9.99483e-05, gnorm=2.104, loss_scale=2, train_wall=11, gb_free=2.8, wall=85553 2021-06-19 18:24:50 | INFO | train_inner | epoch 003: 1507 / 3002 loss=2.705, ppl=6.52, wps=5772.6, ups=0.09, wpb=64811, bsz=128, num_updates=7463, lr=9.99483e-05, gnorm=2.759, loss_scale=2, train_wall=11, gb_free=2.8, wall=85564 2021-06-19 18:25:01 | INFO | train_inner | epoch 003: 1508 / 3002 loss=2.513, ppl=5.71, wps=5818.3, ups=0.09, wpb=64839, bsz=128, num_updates=7464, lr=9.99483e-05, gnorm=2.124, loss_scale=2, train_wall=11, gb_free=2.8, wall=85575 2021-06-19 18:25:12 | INFO | train_inner | epoch 003: 1509 / 3002 loss=2.94, ppl=7.67, wps=5762.5, ups=0.09, wpb=64884, bsz=128, num_updates=7465, lr=9.99483e-05, gnorm=2.188, loss_scale=2, train_wall=11, gb_free=2.8, wall=85587 2021-06-19 18:25:24 | INFO | train_inner | epoch 003: 1510 / 3002 loss=2.595, ppl=6.04, wps=5775.8, ups=0.09, wpb=64811, bsz=128, num_updates=7466, lr=9.99483e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=85598 2021-06-19 18:25:35 | INFO | train_inner | epoch 003: 1511 / 3002 loss=2.448, ppl=5.46, wps=5759.4, ups=0.09, wpb=64782, bsz=128, num_updates=7467, lr=9.99483e-05, gnorm=2.203, loss_scale=2, train_wall=11, gb_free=2.8, wall=85609 2021-06-19 18:25:46 | INFO | train_inner | epoch 003: 1512 / 3002 loss=2.579, ppl=5.97, wps=5893, ups=0.09, wpb=64809, bsz=128, num_updates=7468, lr=9.99483e-05, gnorm=2.18, loss_scale=2, train_wall=11, gb_free=2.8, wall=85620 2021-06-19 18:25:57 | INFO | train_inner | epoch 003: 1513 / 3002 loss=2.777, ppl=6.85, wps=5855.6, ups=0.09, wpb=64829, bsz=128, num_updates=7469, lr=9.99482e-05, gnorm=2.242, loss_scale=2, train_wall=11, gb_free=2.8, wall=85631 2021-06-19 18:26:08 | INFO | train_inner | epoch 003: 1514 / 3002 loss=2.665, ppl=6.34, wps=5777.8, ups=0.09, wpb=64670, bsz=128, num_updates=7470, lr=9.99482e-05, gnorm=2.165, loss_scale=2, train_wall=11, gb_free=2.8, wall=85642 2021-06-19 18:26:19 | INFO | train_inner | epoch 003: 1515 / 3002 loss=2.585, ppl=6, wps=5788.1, ups=0.09, wpb=64818, bsz=128, num_updates=7471, lr=9.99482e-05, gnorm=2.193, loss_scale=2, train_wall=11, gb_free=2.8, wall=85654 2021-06-19 18:26:30 | INFO | train_inner | epoch 003: 1516 / 3002 loss=2.726, ppl=6.62, wps=5886.5, ups=0.09, wpb=64858, bsz=128, num_updates=7472, lr=9.99482e-05, gnorm=2.133, loss_scale=2, train_wall=11, gb_free=2.8, wall=85665 2021-06-19 18:26:42 | INFO | train_inner | epoch 003: 1517 / 3002 loss=2.492, ppl=5.62, wps=5795.9, ups=0.09, wpb=64832, bsz=128, num_updates=7473, lr=9.99482e-05, gnorm=2.282, loss_scale=2, train_wall=11, gb_free=2.8, wall=85676 2021-06-19 18:26:53 | INFO | train_inner | epoch 003: 1518 / 3002 loss=2.589, ppl=6.02, wps=5920.5, ups=0.09, wpb=64825, bsz=128, num_updates=7474, lr=9.99482e-05, gnorm=2.054, loss_scale=2, train_wall=10, gb_free=2.8, wall=85687 2021-06-19 18:27:04 | INFO | train_inner | epoch 003: 1519 / 3002 loss=2.543, ppl=5.83, wps=5798.1, ups=0.09, wpb=64848, bsz=128, num_updates=7475, lr=9.99482e-05, gnorm=2.139, loss_scale=2, train_wall=11, gb_free=2.8, wall=85698 2021-06-19 18:27:15 | INFO | train_inner | epoch 003: 1520 / 3002 loss=2.57, ppl=5.94, wps=5948.6, ups=0.09, wpb=64906, bsz=128, num_updates=7476, lr=9.99482e-05, gnorm=2.163, loss_scale=2, train_wall=10, gb_free=2.8, wall=85709 2021-06-19 18:27:25 | INFO | train_inner | epoch 003: 1521 / 3002 loss=2.485, ppl=5.6, wps=5958.7, ups=0.09, wpb=64822, bsz=128, num_updates=7477, lr=9.99482e-05, gnorm=2.982, loss_scale=2, train_wall=10, gb_free=2.8, wall=85720 2021-06-19 18:27:37 | INFO | train_inner | epoch 003: 1522 / 3002 loss=2.562, ppl=5.9, wps=5842.8, ups=0.09, wpb=64876, bsz=128, num_updates=7478, lr=9.99482e-05, gnorm=2.109, loss_scale=2, train_wall=11, gb_free=2.8, wall=85731 2021-06-19 18:27:48 | INFO | train_inner | epoch 003: 1523 / 3002 loss=2.937, ppl=7.66, wps=5915, ups=0.09, wpb=64826, bsz=128, num_updates=7479, lr=9.99482e-05, gnorm=2.172, loss_scale=2, train_wall=10, gb_free=2.8, wall=85742 2021-06-19 18:27:58 | INFO | train_inner | epoch 003: 1524 / 3002 loss=2.529, ppl=5.77, wps=6007.3, ups=0.09, wpb=64886, bsz=128, num_updates=7480, lr=9.99482e-05, gnorm=2.091, loss_scale=2, train_wall=10, gb_free=2.8, wall=85753 2021-06-19 18:28:09 | INFO | train_inner | epoch 003: 1525 / 3002 loss=2.548, ppl=5.85, wps=5903.3, ups=0.09, wpb=64886, bsz=128, num_updates=7481, lr=9.99481e-05, gnorm=2.104, loss_scale=2, train_wall=11, gb_free=2.8, wall=85764 2021-06-19 18:28:20 | INFO | train_inner | epoch 003: 1526 / 3002 loss=2.648, ppl=6.27, wps=5868.6, ups=0.09, wpb=64782, bsz=128, num_updates=7482, lr=9.99481e-05, gnorm=2.046, loss_scale=2, train_wall=11, gb_free=2.8, wall=85775 2021-06-19 18:28:31 | INFO | train_inner | epoch 003: 1527 / 3002 loss=2.591, ppl=6.03, wps=5867.6, ups=0.09, wpb=64799, bsz=128, num_updates=7483, lr=9.99481e-05, gnorm=2.135, loss_scale=2, train_wall=11, gb_free=2.8, wall=85786 2021-06-19 18:28:43 | INFO | train_inner | epoch 003: 1528 / 3002 loss=2.714, ppl=6.56, wps=5846.6, ups=0.09, wpb=64869, bsz=128, num_updates=7484, lr=9.99481e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=85797 2021-06-19 18:28:53 | INFO | train_inner | epoch 003: 1529 / 3002 loss=2.532, ppl=5.78, wps=6012.8, ups=0.09, wpb=64856, bsz=128, num_updates=7485, lr=9.99481e-05, gnorm=2.34, loss_scale=2, train_wall=10, gb_free=2.8, wall=85808 2021-06-19 18:29:04 | INFO | train_inner | epoch 003: 1530 / 3002 loss=2.719, ppl=6.58, wps=5928.2, ups=0.09, wpb=64848, bsz=128, num_updates=7486, lr=9.99481e-05, gnorm=2.116, loss_scale=2, train_wall=10, gb_free=2.8, wall=85819 2021-06-19 18:29:15 | INFO | train_inner | epoch 003: 1531 / 3002 loss=2.531, ppl=5.78, wps=5837.3, ups=0.09, wpb=64755, bsz=128, num_updates=7487, lr=9.99481e-05, gnorm=2.13, loss_scale=2, train_wall=11, gb_free=2.8, wall=85830 2021-06-19 18:29:27 | INFO | train_inner | epoch 003: 1532 / 3002 loss=2.587, ppl=6.01, wps=5809.2, ups=0.09, wpb=64849, bsz=128, num_updates=7488, lr=9.99481e-05, gnorm=2.18, loss_scale=2, train_wall=11, gb_free=2.8, wall=85841 2021-06-19 18:29:38 | INFO | train_inner | epoch 003: 1533 / 3002 loss=2.501, ppl=5.66, wps=5897.6, ups=0.09, wpb=64876, bsz=128, num_updates=7489, lr=9.99481e-05, gnorm=2.164, loss_scale=2, train_wall=11, gb_free=2.8, wall=85852 2021-06-19 18:29:48 | INFO | train_inner | epoch 003: 1534 / 3002 loss=2.78, ppl=6.87, wps=5953, ups=0.09, wpb=64844, bsz=128, num_updates=7490, lr=9.99481e-05, gnorm=2.147, loss_scale=2, train_wall=10, gb_free=2.8, wall=85863 2021-06-19 18:29:59 | INFO | train_inner | epoch 003: 1535 / 3002 loss=2.525, ppl=5.75, wps=5883.7, ups=0.09, wpb=64868, bsz=128, num_updates=7491, lr=9.99481e-05, gnorm=2.671, loss_scale=2, train_wall=11, gb_free=2.8, wall=85874 2021-06-19 18:30:11 | INFO | train_inner | epoch 003: 1536 / 3002 loss=2.6, ppl=6.06, wps=5760, ups=0.09, wpb=64869, bsz=128, num_updates=7492, lr=9.99481e-05, gnorm=3.217, loss_scale=2, train_wall=11, gb_free=2.8, wall=85885 2021-06-19 18:30:22 | INFO | train_inner | epoch 003: 1537 / 3002 loss=2.758, ppl=6.76, wps=5868.9, ups=0.09, wpb=64822, bsz=128, num_updates=7493, lr=9.99481e-05, gnorm=2.183, loss_scale=2, train_wall=11, gb_free=2.8, wall=85896 2021-06-19 18:30:33 | INFO | train_inner | epoch 003: 1538 / 3002 loss=2.583, ppl=5.99, wps=5853.1, ups=0.09, wpb=64850, bsz=128, num_updates=7494, lr=9.9948e-05, gnorm=2.267, loss_scale=2, train_wall=11, gb_free=2.8, wall=85907 2021-06-19 18:30:44 | INFO | train_inner | epoch 003: 1539 / 3002 loss=2.562, ppl=5.91, wps=5711.8, ups=0.09, wpb=64800, bsz=128, num_updates=7495, lr=9.9948e-05, gnorm=2.249, loss_scale=2, train_wall=11, gb_free=2.8, wall=85918 2021-06-19 18:30:55 | INFO | train_inner | epoch 003: 1540 / 3002 loss=2.736, ppl=6.66, wps=5916.4, ups=0.09, wpb=64877, bsz=128, num_updates=7496, lr=9.9948e-05, gnorm=2.295, loss_scale=2, train_wall=11, gb_free=2.8, wall=85929 2021-06-19 18:31:06 | INFO | train_inner | epoch 003: 1541 / 3002 loss=2.619, ppl=6.14, wps=5833.9, ups=0.09, wpb=64794, bsz=128, num_updates=7497, lr=9.9948e-05, gnorm=2.174, loss_scale=2, train_wall=11, gb_free=2.8, wall=85941 2021-06-19 18:31:17 | INFO | train_inner | epoch 003: 1542 / 3002 loss=2.636, ppl=6.22, wps=5789.9, ups=0.09, wpb=64837, bsz=128, num_updates=7498, lr=9.9948e-05, gnorm=2.208, loss_scale=2, train_wall=11, gb_free=2.8, wall=85952 2021-06-19 18:31:29 | INFO | train_inner | epoch 003: 1543 / 3002 loss=2.647, ppl=6.27, wps=5787, ups=0.09, wpb=64891, bsz=128, num_updates=7499, lr=9.9948e-05, gnorm=2.304, loss_scale=2, train_wall=11, gb_free=2.8, wall=85963 2021-06-19 18:31:40 | INFO | train_inner | epoch 003: 1544 / 3002 loss=2.591, ppl=6.02, wps=5900.5, ups=0.09, wpb=64874, bsz=128, num_updates=7500, lr=9.9948e-05, gnorm=2.746, loss_scale=2, train_wall=11, gb_free=2.8, wall=85974 2021-06-19 18:31:51 | INFO | train_inner | epoch 003: 1545 / 3002 loss=2.445, ppl=5.44, wps=5737.1, ups=0.09, wpb=64779, bsz=128, num_updates=7501, lr=9.9948e-05, gnorm=2.397, loss_scale=2, train_wall=11, gb_free=2.8, wall=85985 2021-06-19 18:32:02 | INFO | train_inner | epoch 003: 1546 / 3002 loss=2.586, ppl=6, wps=5838, ups=0.09, wpb=64749, bsz=128, num_updates=7502, lr=9.9948e-05, gnorm=2.014, loss_scale=2, train_wall=11, gb_free=2.8, wall=85996 2021-06-19 18:32:13 | INFO | train_inner | epoch 003: 1547 / 3002 loss=2.646, ppl=6.26, wps=5830.5, ups=0.09, wpb=64765, bsz=128, num_updates=7503, lr=9.9948e-05, gnorm=2.188, loss_scale=2, train_wall=11, gb_free=2.8, wall=86007 2021-06-19 18:32:24 | INFO | train_inner | epoch 003: 1548 / 3002 loss=2.758, ppl=6.77, wps=5770.8, ups=0.09, wpb=64767, bsz=128, num_updates=7504, lr=9.9948e-05, gnorm=2.174, loss_scale=2, train_wall=11, gb_free=2.8, wall=86019 2021-06-19 18:32:35 | INFO | train_inner | epoch 003: 1549 / 3002 loss=2.74, ppl=6.68, wps=5835.4, ups=0.09, wpb=64863, bsz=128, num_updates=7505, lr=9.9948e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=86030 2021-06-19 18:32:47 | INFO | train_inner | epoch 003: 1550 / 3002 loss=2.579, ppl=5.98, wps=5857.8, ups=0.09, wpb=64816, bsz=128, num_updates=7506, lr=9.99479e-05, gnorm=2.132, loss_scale=2, train_wall=11, gb_free=2.8, wall=86041 2021-06-19 18:32:58 | INFO | train_inner | epoch 003: 1551 / 3002 loss=2.616, ppl=6.13, wps=5807.3, ups=0.09, wpb=64805, bsz=128, num_updates=7507, lr=9.99479e-05, gnorm=2.162, loss_scale=2, train_wall=11, gb_free=2.8, wall=86052 2021-06-19 18:33:09 | INFO | train_inner | epoch 003: 1552 / 3002 loss=2.663, ppl=6.33, wps=5863.7, ups=0.09, wpb=64773, bsz=128, num_updates=7508, lr=9.99479e-05, gnorm=2.132, loss_scale=2, train_wall=11, gb_free=2.8, wall=86063 2021-06-19 18:33:20 | INFO | train_inner | epoch 003: 1553 / 3002 loss=2.588, ppl=6.01, wps=5861, ups=0.09, wpb=64878, bsz=128, num_updates=7509, lr=9.99479e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=86074 2021-06-19 18:33:31 | INFO | train_inner | epoch 003: 1554 / 3002 loss=2.627, ppl=6.18, wps=5825.4, ups=0.09, wpb=64856, bsz=128, num_updates=7510, lr=9.99479e-05, gnorm=2.14, loss_scale=2, train_wall=11, gb_free=2.8, wall=86085 2021-06-19 18:33:42 | INFO | train_inner | epoch 003: 1555 / 3002 loss=2.403, ppl=5.29, wps=5875.9, ups=0.09, wpb=64856, bsz=128, num_updates=7511, lr=9.99479e-05, gnorm=2.157, loss_scale=2, train_wall=11, gb_free=2.8, wall=86096 2021-06-19 18:33:53 | INFO | train_inner | epoch 003: 1556 / 3002 loss=2.804, ppl=6.98, wps=5789.6, ups=0.09, wpb=64839, bsz=128, num_updates=7512, lr=9.99479e-05, gnorm=2.239, loss_scale=2, train_wall=11, gb_free=2.8, wall=86108 2021-06-19 18:34:04 | INFO | train_inner | epoch 003: 1557 / 3002 loss=2.54, ppl=5.82, wps=5838.8, ups=0.09, wpb=64858, bsz=128, num_updates=7513, lr=9.99479e-05, gnorm=19.763, loss_scale=2, train_wall=11, gb_free=2.8, wall=86119 2021-06-19 18:34:15 | INFO | train_inner | epoch 003: 1558 / 3002 loss=2.749, ppl=6.72, wps=5895.1, ups=0.09, wpb=64865, bsz=128, num_updates=7514, lr=9.99479e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=86130 2021-06-19 18:34:26 | INFO | train_inner | epoch 003: 1559 / 3002 loss=2.546, ppl=5.84, wps=5888, ups=0.09, wpb=64823, bsz=128, num_updates=7515, lr=9.99479e-05, gnorm=2.135, loss_scale=2, train_wall=11, gb_free=2.8, wall=86141 2021-06-19 18:34:37 | INFO | train_inner | epoch 003: 1560 / 3002 loss=2.738, ppl=6.67, wps=5787, ups=0.09, wpb=64749, bsz=128, num_updates=7516, lr=9.99479e-05, gnorm=4.29, loss_scale=2, train_wall=11, gb_free=2.8, wall=86152 2021-06-19 18:34:49 | INFO | train_inner | epoch 003: 1561 / 3002 loss=2.727, ppl=6.62, wps=5774, ups=0.09, wpb=64775, bsz=128, num_updates=7517, lr=9.99479e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=86163 2021-06-19 18:35:00 | INFO | train_inner | epoch 003: 1562 / 3002 loss=2.524, ppl=5.75, wps=5903, ups=0.09, wpb=64866, bsz=128, num_updates=7518, lr=9.99479e-05, gnorm=2.887, loss_scale=2, train_wall=11, gb_free=2.8, wall=86174 2021-06-19 18:35:11 | INFO | train_inner | epoch 003: 1563 / 3002 loss=2.666, ppl=6.35, wps=5710.9, ups=0.09, wpb=64834, bsz=128, num_updates=7519, lr=9.99478e-05, gnorm=2.423, loss_scale=2, train_wall=11, gb_free=2.8, wall=86185 2021-06-19 18:35:22 | INFO | train_inner | epoch 003: 1564 / 3002 loss=2.619, ppl=6.14, wps=5894.5, ups=0.09, wpb=64787, bsz=128, num_updates=7520, lr=9.99478e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=86196 2021-06-19 18:35:33 | INFO | train_inner | epoch 003: 1565 / 3002 loss=2.59, ppl=6.02, wps=5872.3, ups=0.09, wpb=64794, bsz=128, num_updates=7521, lr=9.99478e-05, gnorm=2.24, loss_scale=2, train_wall=11, gb_free=2.8, wall=86207 2021-06-19 18:35:44 | INFO | train_inner | epoch 003: 1566 / 3002 loss=2.586, ppl=6, wps=5849.6, ups=0.09, wpb=64847, bsz=128, num_updates=7522, lr=9.99478e-05, gnorm=2.509, loss_scale=2, train_wall=11, gb_free=2.8, wall=86218 2021-06-19 18:35:56 | INFO | train_inner | epoch 003: 1567 / 3002 loss=2.772, ppl=6.83, wps=5690.9, ups=0.09, wpb=64802, bsz=128, num_updates=7523, lr=9.99478e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=86230 2021-06-19 18:36:07 | INFO | train_inner | epoch 003: 1568 / 3002 loss=2.728, ppl=6.62, wps=5755.2, ups=0.09, wpb=64778, bsz=128, num_updates=7524, lr=9.99478e-05, gnorm=2.235, loss_scale=2, train_wall=11, gb_free=2.8, wall=86241 2021-06-19 18:36:18 | INFO | train_inner | epoch 003: 1569 / 3002 loss=2.711, ppl=6.55, wps=5921.3, ups=0.09, wpb=64947, bsz=128, num_updates=7525, lr=9.99478e-05, gnorm=2.179, loss_scale=2, train_wall=10, gb_free=2.8, wall=86252 2021-06-19 18:36:29 | INFO | train_inner | epoch 003: 1570 / 3002 loss=2.601, ppl=6.07, wps=5728.1, ups=0.09, wpb=64875, bsz=128, num_updates=7526, lr=9.99478e-05, gnorm=2.264, loss_scale=2, train_wall=11, gb_free=2.8, wall=86263 2021-06-19 18:36:40 | INFO | train_inner | epoch 003: 1571 / 3002 loss=2.666, ppl=6.35, wps=5966.1, ups=0.09, wpb=64909, bsz=128, num_updates=7527, lr=9.99478e-05, gnorm=2.249, loss_scale=2, train_wall=10, gb_free=2.8, wall=86274 2021-06-19 18:36:51 | INFO | train_inner | epoch 003: 1572 / 3002 loss=2.724, ppl=6.61, wps=5821.7, ups=0.09, wpb=64772, bsz=128, num_updates=7528, lr=9.99478e-05, gnorm=2.218, loss_scale=2, train_wall=11, gb_free=2.8, wall=86285 2021-06-19 18:37:02 | INFO | train_inner | epoch 003: 1573 / 3002 loss=2.522, ppl=5.74, wps=5823.6, ups=0.09, wpb=64854, bsz=128, num_updates=7529, lr=9.99478e-05, gnorm=2.138, loss_scale=2, train_wall=11, gb_free=2.8, wall=86297 2021-06-19 18:37:13 | INFO | train_inner | epoch 003: 1574 / 3002 loss=2.612, ppl=6.11, wps=5809.1, ups=0.09, wpb=64783, bsz=128, num_updates=7530, lr=9.99478e-05, gnorm=3.115, loss_scale=2, train_wall=11, gb_free=2.8, wall=86308 2021-06-19 18:37:25 | INFO | train_inner | epoch 003: 1575 / 3002 loss=2.699, ppl=6.49, wps=5797.7, ups=0.09, wpb=64807, bsz=128, num_updates=7531, lr=9.99477e-05, gnorm=3.136, loss_scale=2, train_wall=11, gb_free=2.8, wall=86319 2021-06-19 18:37:36 | INFO | train_inner | epoch 003: 1576 / 3002 loss=2.647, ppl=6.26, wps=5810.8, ups=0.09, wpb=64874, bsz=128, num_updates=7532, lr=9.99477e-05, gnorm=2.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=86330 2021-06-19 18:37:47 | INFO | train_inner | epoch 003: 1577 / 3002 loss=2.607, ppl=6.09, wps=5828.7, ups=0.09, wpb=64840, bsz=128, num_updates=7533, lr=9.99477e-05, gnorm=2.107, loss_scale=2, train_wall=11, gb_free=2.8, wall=86341 2021-06-19 18:37:58 | INFO | train_inner | epoch 003: 1578 / 3002 loss=2.772, ppl=6.83, wps=5881.1, ups=0.09, wpb=64852, bsz=128, num_updates=7534, lr=9.99477e-05, gnorm=2.134, loss_scale=2, train_wall=11, gb_free=2.8, wall=86352 2021-06-19 18:38:09 | INFO | train_inner | epoch 003: 1579 / 3002 loss=2.742, ppl=6.69, wps=5927.2, ups=0.09, wpb=64865, bsz=128, num_updates=7535, lr=9.99477e-05, gnorm=2.263, loss_scale=2, train_wall=10, gb_free=2.8, wall=86363 2021-06-19 18:38:20 | INFO | train_inner | epoch 003: 1580 / 3002 loss=2.547, ppl=5.84, wps=5765.3, ups=0.09, wpb=64836, bsz=128, num_updates=7536, lr=9.99477e-05, gnorm=2.39, loss_scale=4, train_wall=11, gb_free=2.8, wall=86374 2021-06-19 18:38:31 | INFO | train_inner | epoch 003: 1581 / 3002 loss=2.657, ppl=6.31, wps=5905.9, ups=0.09, wpb=64886, bsz=128, num_updates=7537, lr=9.99477e-05, gnorm=2.382, loss_scale=4, train_wall=11, gb_free=2.8, wall=86385 2021-06-19 18:38:42 | INFO | train_inner | epoch 003: 1582 / 3002 loss=2.609, ppl=6.1, wps=5860.1, ups=0.09, wpb=64866, bsz=128, num_updates=7538, lr=9.99477e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=86396 2021-06-19 18:38:53 | INFO | train_inner | epoch 003: 1583 / 3002 loss=2.819, ppl=7.06, wps=5817.1, ups=0.09, wpb=64725, bsz=128, num_updates=7539, lr=9.99477e-05, gnorm=2.192, loss_scale=4, train_wall=11, gb_free=2.8, wall=86408 2021-06-19 18:39:04 | INFO | train_inner | epoch 003: 1584 / 3002 loss=2.69, ppl=6.46, wps=5793.5, ups=0.09, wpb=64848, bsz=128, num_updates=7540, lr=9.99477e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=86419 2021-06-19 18:39:15 | INFO | train_inner | epoch 003: 1585 / 3002 loss=2.711, ppl=6.55, wps=5870.9, ups=0.09, wpb=64722, bsz=128, num_updates=7541, lr=9.99477e-05, gnorm=2.552, loss_scale=4, train_wall=11, gb_free=2.8, wall=86430 2021-06-19 18:39:27 | INFO | train_inner | epoch 003: 1586 / 3002 loss=2.555, ppl=5.88, wps=5847.2, ups=0.09, wpb=64787, bsz=128, num_updates=7542, lr=9.99477e-05, gnorm=2.15, loss_scale=4, train_wall=11, gb_free=2.8, wall=86441 2021-06-19 18:39:38 | INFO | train_inner | epoch 003: 1587 / 3002 loss=2.579, ppl=5.98, wps=5751.9, ups=0.09, wpb=64875, bsz=128, num_updates=7543, lr=9.99477e-05, gnorm=2.067, loss_scale=4, train_wall=11, gb_free=2.8, wall=86452 2021-06-19 18:39:49 | INFO | train_inner | epoch 003: 1588 / 3002 loss=2.801, ppl=6.97, wps=5903.9, ups=0.09, wpb=64805, bsz=128, num_updates=7544, lr=9.99476e-05, gnorm=2.128, loss_scale=4, train_wall=11, gb_free=2.8, wall=86463 2021-06-19 18:40:00 | INFO | train_inner | epoch 003: 1589 / 3002 loss=2.636, ppl=6.22, wps=5979.7, ups=0.09, wpb=64858, bsz=128, num_updates=7545, lr=9.99476e-05, gnorm=3.784, loss_scale=4, train_wall=10, gb_free=2.8, wall=86474 2021-06-19 18:40:11 | INFO | train_inner | epoch 003: 1590 / 3002 loss=2.604, ppl=6.08, wps=5793.2, ups=0.09, wpb=64875, bsz=128, num_updates=7546, lr=9.99476e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=86485 2021-06-19 18:40:22 | INFO | train_inner | epoch 003: 1591 / 3002 loss=2.529, ppl=5.77, wps=5887.8, ups=0.09, wpb=64825, bsz=128, num_updates=7547, lr=9.99476e-05, gnorm=2.128, loss_scale=4, train_wall=11, gb_free=2.8, wall=86496 2021-06-19 18:40:33 | INFO | train_inner | epoch 003: 1592 / 3002 loss=2.583, ppl=5.99, wps=5923.6, ups=0.09, wpb=64788, bsz=128, num_updates=7548, lr=9.99476e-05, gnorm=2.258, loss_scale=4, train_wall=10, gb_free=2.8, wall=86507 2021-06-19 18:40:44 | INFO | train_inner | epoch 003: 1593 / 3002 loss=2.639, ppl=6.23, wps=5746.7, ups=0.09, wpb=64748, bsz=128, num_updates=7549, lr=9.99476e-05, gnorm=2.545, loss_scale=4, train_wall=11, gb_free=2.8, wall=86518 2021-06-19 18:40:55 | INFO | train_inner | epoch 003: 1594 / 3002 loss=2.626, ppl=6.17, wps=5829.4, ups=0.09, wpb=64841, bsz=128, num_updates=7550, lr=9.99476e-05, gnorm=2.157, loss_scale=4, train_wall=11, gb_free=2.8, wall=86530 2021-06-19 18:41:07 | INFO | train_inner | epoch 003: 1595 / 3002 loss=2.437, ppl=5.42, wps=5725.2, ups=0.09, wpb=64815, bsz=128, num_updates=7551, lr=9.99476e-05, gnorm=2.152, loss_scale=4, train_wall=11, gb_free=2.8, wall=86541 2021-06-19 18:41:18 | INFO | train_inner | epoch 003: 1596 / 3002 loss=2.764, ppl=6.79, wps=5840.7, ups=0.09, wpb=64740, bsz=128, num_updates=7552, lr=9.99476e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=86552 2021-06-19 18:41:29 | INFO | train_inner | epoch 003: 1597 / 3002 loss=2.657, ppl=6.31, wps=5780.6, ups=0.09, wpb=64789, bsz=128, num_updates=7553, lr=9.99476e-05, gnorm=2.171, loss_scale=4, train_wall=11, gb_free=2.8, wall=86563 2021-06-19 18:41:40 | INFO | train_inner | epoch 003: 1598 / 3002 loss=2.721, ppl=6.59, wps=5854.5, ups=0.09, wpb=64803, bsz=128, num_updates=7554, lr=9.99476e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=86574 2021-06-19 18:41:51 | INFO | train_inner | epoch 003: 1599 / 3002 loss=2.555, ppl=5.88, wps=5856.6, ups=0.09, wpb=64877, bsz=128, num_updates=7555, lr=9.99476e-05, gnorm=2.19, loss_scale=4, train_wall=11, gb_free=2.8, wall=86585 2021-06-19 18:42:02 | INFO | train_inner | epoch 003: 1600 / 3002 loss=2.583, ppl=5.99, wps=5765.5, ups=0.09, wpb=64821, bsz=128, num_updates=7556, lr=9.99475e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=86597 2021-06-19 18:42:13 | INFO | train_inner | epoch 003: 1601 / 3002 loss=2.647, ppl=6.27, wps=5839, ups=0.09, wpb=64853, bsz=128, num_updates=7557, lr=9.99475e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=86608 2021-06-19 18:42:24 | INFO | train_inner | epoch 003: 1602 / 3002 loss=2.685, ppl=6.43, wps=5891.1, ups=0.09, wpb=64846, bsz=128, num_updates=7558, lr=9.99475e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=86619 2021-06-19 18:42:36 | INFO | train_inner | epoch 003: 1603 / 3002 loss=2.656, ppl=6.3, wps=5779.1, ups=0.09, wpb=64824, bsz=128, num_updates=7559, lr=9.99475e-05, gnorm=2.101, loss_scale=4, train_wall=11, gb_free=2.8, wall=86630 2021-06-19 18:42:47 | INFO | train_inner | epoch 003: 1604 / 3002 loss=2.608, ppl=6.1, wps=5811.4, ups=0.09, wpb=64864, bsz=128, num_updates=7560, lr=9.99475e-05, gnorm=2.747, loss_scale=4, train_wall=11, gb_free=2.8, wall=86641 2021-06-19 18:42:58 | INFO | train_inner | epoch 003: 1605 / 3002 loss=2.664, ppl=6.34, wps=5719.6, ups=0.09, wpb=64765, bsz=128, num_updates=7561, lr=9.99475e-05, gnorm=2.103, loss_scale=4, train_wall=11, gb_free=2.8, wall=86652 2021-06-19 18:43:09 | INFO | train_inner | epoch 003: 1606 / 3002 loss=2.655, ppl=6.3, wps=5875.6, ups=0.09, wpb=64910, bsz=128, num_updates=7562, lr=9.99475e-05, gnorm=2.28, loss_scale=4, train_wall=11, gb_free=2.8, wall=86663 2021-06-19 18:43:20 | INFO | train_inner | epoch 003: 1607 / 3002 loss=2.62, ppl=6.15, wps=5926.2, ups=0.09, wpb=64912, bsz=128, num_updates=7563, lr=9.99475e-05, gnorm=2.117, loss_scale=4, train_wall=11, gb_free=2.8, wall=86674 2021-06-19 18:43:31 | INFO | train_inner | epoch 003: 1608 / 3002 loss=2.501, ppl=5.66, wps=5909.2, ups=0.09, wpb=64849, bsz=128, num_updates=7564, lr=9.99475e-05, gnorm=2.129, loss_scale=4, train_wall=11, gb_free=2.8, wall=86685 2021-06-19 18:43:42 | INFO | train_inner | epoch 003: 1609 / 3002 loss=2.416, ppl=5.34, wps=5750, ups=0.09, wpb=64849, bsz=128, num_updates=7565, lr=9.99475e-05, gnorm=2.471, loss_scale=4, train_wall=11, gb_free=2.8, wall=86697 2021-06-19 18:43:53 | INFO | train_inner | epoch 003: 1610 / 3002 loss=2.705, ppl=6.52, wps=5777.4, ups=0.09, wpb=64820, bsz=128, num_updates=7566, lr=9.99475e-05, gnorm=2.087, loss_scale=4, train_wall=11, gb_free=2.8, wall=86708 2021-06-19 18:44:05 | INFO | train_inner | epoch 003: 1611 / 3002 loss=2.767, ppl=6.8, wps=5757.4, ups=0.09, wpb=64760, bsz=128, num_updates=7567, lr=9.99475e-05, gnorm=2.291, loss_scale=4, train_wall=11, gb_free=2.8, wall=86719 2021-06-19 18:44:16 | INFO | train_inner | epoch 003: 1612 / 3002 loss=2.663, ppl=6.34, wps=5931.2, ups=0.09, wpb=64842, bsz=128, num_updates=7568, lr=9.99475e-05, gnorm=2.152, loss_scale=4, train_wall=10, gb_free=2.8, wall=86730 2021-06-19 18:44:27 | INFO | train_inner | epoch 003: 1613 / 3002 loss=2.628, ppl=6.18, wps=5848.2, ups=0.09, wpb=64791, bsz=128, num_updates=7569, lr=9.99474e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=86741 2021-06-19 18:44:38 | INFO | train_inner | epoch 003: 1614 / 3002 loss=2.461, ppl=5.5, wps=5930.8, ups=0.09, wpb=64922, bsz=128, num_updates=7570, lr=9.99474e-05, gnorm=2.038, loss_scale=4, train_wall=10, gb_free=2.8, wall=86752 2021-06-19 18:44:49 | INFO | train_inner | epoch 003: 1615 / 3002 loss=2.626, ppl=6.17, wps=5808.7, ups=0.09, wpb=64818, bsz=128, num_updates=7571, lr=9.99474e-05, gnorm=2.099, loss_scale=4, train_wall=11, gb_free=2.8, wall=86763 2021-06-19 18:44:59 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-19 18:45:11 | INFO | train_inner | epoch 003: 1617 / 3002 loss=2.664, ppl=6.34, wps=2968, ups=0.05, wpb=64773, bsz=128, num_updates=7572, lr=9.99474e-05, gnorm=2.323, loss_scale=2, train_wall=21, gb_free=2.8, wall=86785 2021-06-19 18:45:22 | INFO | train_inner | epoch 003: 1618 / 3002 loss=2.629, ppl=6.18, wps=5808.6, ups=0.09, wpb=64803, bsz=128, num_updates=7573, lr=9.99474e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=86796 2021-06-19 18:45:33 | INFO | train_inner | epoch 003: 1619 / 3002 loss=2.639, ppl=6.23, wps=5659.8, ups=0.09, wpb=64861, bsz=128, num_updates=7574, lr=9.99474e-05, gnorm=2.19, loss_scale=2, train_wall=11, gb_free=2.8, wall=86808 2021-06-19 18:45:44 | INFO | train_inner | epoch 003: 1620 / 3002 loss=2.642, ppl=6.24, wps=5856.8, ups=0.09, wpb=64842, bsz=128, num_updates=7575, lr=9.99474e-05, gnorm=2.253, loss_scale=2, train_wall=11, gb_free=2.8, wall=86819 2021-06-19 18:45:55 | INFO | train_inner | epoch 003: 1621 / 3002 loss=2.555, ppl=5.88, wps=5832.6, ups=0.09, wpb=64870, bsz=128, num_updates=7576, lr=9.99474e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=86830 2021-06-19 18:46:06 | INFO | train_inner | epoch 003: 1622 / 3002 loss=2.758, ppl=6.77, wps=5981.3, ups=0.09, wpb=64835, bsz=128, num_updates=7577, lr=9.99474e-05, gnorm=2.245, loss_scale=2, train_wall=10, gb_free=2.8, wall=86841 2021-06-19 18:46:18 | INFO | train_inner | epoch 003: 1623 / 3002 loss=2.567, ppl=5.93, wps=5797.3, ups=0.09, wpb=64847, bsz=128, num_updates=7578, lr=9.99474e-05, gnorm=2.082, loss_scale=2, train_wall=11, gb_free=2.8, wall=86852 2021-06-19 18:46:28 | INFO | train_inner | epoch 003: 1624 / 3002 loss=2.53, ppl=5.78, wps=6070.7, ups=0.09, wpb=64894, bsz=128, num_updates=7579, lr=9.99474e-05, gnorm=2.113, loss_scale=2, train_wall=10, gb_free=2.8, wall=86863 2021-06-19 18:46:39 | INFO | train_inner | epoch 003: 1625 / 3002 loss=2.767, ppl=6.81, wps=5843.4, ups=0.09, wpb=64788, bsz=128, num_updates=7580, lr=9.99474e-05, gnorm=2.131, loss_scale=2, train_wall=11, gb_free=2.8, wall=86874 2021-06-19 18:46:50 | INFO | train_inner | epoch 003: 1626 / 3002 loss=2.686, ppl=6.44, wps=5810.5, ups=0.09, wpb=64786, bsz=128, num_updates=7581, lr=9.99473e-05, gnorm=2.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=86885 2021-06-19 18:47:02 | INFO | train_inner | epoch 003: 1627 / 3002 loss=2.513, ppl=5.71, wps=5824.3, ups=0.09, wpb=64787, bsz=128, num_updates=7582, lr=9.99473e-05, gnorm=2.137, loss_scale=2, train_wall=11, gb_free=2.8, wall=86896 2021-06-19 18:47:13 | INFO | train_inner | epoch 003: 1628 / 3002 loss=2.785, ppl=6.89, wps=5828, ups=0.09, wpb=64854, bsz=128, num_updates=7583, lr=9.99473e-05, gnorm=3.169, loss_scale=2, train_wall=11, gb_free=2.8, wall=86907 2021-06-19 18:47:24 | INFO | train_inner | epoch 003: 1629 / 3002 loss=2.601, ppl=6.07, wps=5810, ups=0.09, wpb=64888, bsz=128, num_updates=7584, lr=9.99473e-05, gnorm=2.098, loss_scale=2, train_wall=11, gb_free=2.8, wall=86918 2021-06-19 18:47:35 | INFO | train_inner | epoch 003: 1630 / 3002 loss=2.714, ppl=6.56, wps=5800.9, ups=0.09, wpb=64698, bsz=128, num_updates=7585, lr=9.99473e-05, gnorm=2.593, loss_scale=2, train_wall=11, gb_free=2.8, wall=86929 2021-06-19 18:47:46 | INFO | train_inner | epoch 003: 1631 / 3002 loss=2.69, ppl=6.45, wps=5829, ups=0.09, wpb=64918, bsz=128, num_updates=7586, lr=9.99473e-05, gnorm=2.131, loss_scale=2, train_wall=11, gb_free=2.8, wall=86940 2021-06-19 18:47:57 | INFO | train_inner | epoch 003: 1632 / 3002 loss=2.66, ppl=6.32, wps=5849.8, ups=0.09, wpb=64947, bsz=128, num_updates=7587, lr=9.99473e-05, gnorm=2.208, loss_scale=2, train_wall=11, gb_free=2.8, wall=86952 2021-06-19 18:48:08 | INFO | train_inner | epoch 003: 1633 / 3002 loss=2.644, ppl=6.25, wps=5845.5, ups=0.09, wpb=64809, bsz=128, num_updates=7588, lr=9.99473e-05, gnorm=2.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=86963 2021-06-19 18:48:19 | INFO | train_inner | epoch 003: 1634 / 3002 loss=2.708, ppl=6.53, wps=5896.6, ups=0.09, wpb=64794, bsz=128, num_updates=7589, lr=9.99473e-05, gnorm=2.115, loss_scale=2, train_wall=11, gb_free=2.8, wall=86974 2021-06-19 18:48:30 | INFO | train_inner | epoch 003: 1635 / 3002 loss=2.648, ppl=6.27, wps=5847.3, ups=0.09, wpb=64836, bsz=128, num_updates=7590, lr=9.99473e-05, gnorm=2.18, loss_scale=2, train_wall=11, gb_free=2.8, wall=86985 2021-06-19 18:48:42 | INFO | train_inner | epoch 003: 1636 / 3002 loss=2.56, ppl=5.9, wps=5812.9, ups=0.09, wpb=64844, bsz=128, num_updates=7591, lr=9.99473e-05, gnorm=2.261, loss_scale=2, train_wall=11, gb_free=2.8, wall=86996 2021-06-19 18:48:53 | INFO | train_inner | epoch 003: 1637 / 3002 loss=2.646, ppl=6.26, wps=5811.1, ups=0.09, wpb=64809, bsz=128, num_updates=7592, lr=9.99473e-05, gnorm=2.932, loss_scale=2, train_wall=11, gb_free=2.8, wall=87007 2021-06-19 18:49:04 | INFO | train_inner | epoch 003: 1638 / 3002 loss=2.751, ppl=6.73, wps=5817.3, ups=0.09, wpb=64824, bsz=128, num_updates=7593, lr=9.99473e-05, gnorm=2.235, loss_scale=2, train_wall=11, gb_free=2.8, wall=87018 2021-06-19 18:49:15 | INFO | train_inner | epoch 003: 1639 / 3002 loss=2.779, ppl=6.86, wps=5806.6, ups=0.09, wpb=64783, bsz=128, num_updates=7594, lr=9.99472e-05, gnorm=2.168, loss_scale=2, train_wall=11, gb_free=2.8, wall=87029 2021-06-19 18:49:26 | INFO | train_inner | epoch 003: 1640 / 3002 loss=2.877, ppl=7.34, wps=5870.9, ups=0.09, wpb=64821, bsz=128, num_updates=7595, lr=9.99472e-05, gnorm=2.921, loss_scale=2, train_wall=11, gb_free=2.8, wall=87040 2021-06-19 18:49:37 | INFO | train_inner | epoch 003: 1641 / 3002 loss=2.624, ppl=6.16, wps=5829.9, ups=0.09, wpb=64922, bsz=128, num_updates=7596, lr=9.99472e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=87052 2021-06-19 18:49:48 | INFO | train_inner | epoch 003: 1642 / 3002 loss=2.627, ppl=6.18, wps=5781.9, ups=0.09, wpb=64862, bsz=128, num_updates=7597, lr=9.99472e-05, gnorm=2.502, loss_scale=2, train_wall=11, gb_free=2.8, wall=87063 2021-06-19 18:49:59 | INFO | train_inner | epoch 003: 1643 / 3002 loss=2.702, ppl=6.5, wps=5898, ups=0.09, wpb=64853, bsz=128, num_updates=7598, lr=9.99472e-05, gnorm=2.825, loss_scale=2, train_wall=11, gb_free=2.8, wall=87074 2021-06-19 18:50:10 | INFO | train_inner | epoch 003: 1644 / 3002 loss=2.624, ppl=6.16, wps=5860, ups=0.09, wpb=64831, bsz=128, num_updates=7599, lr=9.99472e-05, gnorm=3.547, loss_scale=2, train_wall=11, gb_free=2.8, wall=87085 2021-06-19 18:50:21 | INFO | train_inner | epoch 003: 1645 / 3002 loss=2.664, ppl=6.34, wps=5894.4, ups=0.09, wpb=64833, bsz=128, num_updates=7600, lr=9.99472e-05, gnorm=8.556, loss_scale=2, train_wall=11, gb_free=2.8, wall=87096 2021-06-19 18:50:33 | INFO | train_inner | epoch 003: 1646 / 3002 loss=2.527, ppl=5.76, wps=5829.8, ups=0.09, wpb=64775, bsz=128, num_updates=7601, lr=9.99472e-05, gnorm=2.164, loss_scale=2, train_wall=11, gb_free=2.8, wall=87107 2021-06-19 18:50:44 | INFO | train_inner | epoch 003: 1647 / 3002 loss=2.708, ppl=6.54, wps=5819.7, ups=0.09, wpb=64866, bsz=128, num_updates=7602, lr=9.99472e-05, gnorm=2.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=87118 2021-06-19 18:50:55 | INFO | train_inner | epoch 003: 1648 / 3002 loss=2.637, ppl=6.22, wps=5909.3, ups=0.09, wpb=64784, bsz=128, num_updates=7603, lr=9.99472e-05, gnorm=2.108, loss_scale=2, train_wall=11, gb_free=2.8, wall=87129 2021-06-19 18:51:06 | INFO | train_inner | epoch 003: 1649 / 3002 loss=2.65, ppl=6.28, wps=5825.2, ups=0.09, wpb=64830, bsz=128, num_updates=7604, lr=9.99472e-05, gnorm=2.419, loss_scale=2, train_wall=11, gb_free=2.8, wall=87140 2021-06-19 18:51:17 | INFO | train_inner | epoch 003: 1650 / 3002 loss=2.623, ppl=6.16, wps=5811.4, ups=0.09, wpb=64857, bsz=128, num_updates=7605, lr=9.99472e-05, gnorm=2.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=87151 2021-06-19 18:51:28 | INFO | train_inner | epoch 003: 1651 / 3002 loss=2.624, ppl=6.16, wps=5833.8, ups=0.09, wpb=64905, bsz=128, num_updates=7606, lr=9.99471e-05, gnorm=4.276, loss_scale=2, train_wall=11, gb_free=2.8, wall=87162 2021-06-19 18:51:39 | INFO | train_inner | epoch 003: 1652 / 3002 loss=2.71, ppl=6.54, wps=5757.6, ups=0.09, wpb=64788, bsz=128, num_updates=7607, lr=9.99471e-05, gnorm=2.198, loss_scale=2, train_wall=11, gb_free=2.8, wall=87174 2021-06-19 18:51:51 | INFO | train_inner | epoch 003: 1653 / 3002 loss=2.688, ppl=6.45, wps=5789.3, ups=0.09, wpb=64791, bsz=128, num_updates=7608, lr=9.99471e-05, gnorm=2.681, loss_scale=2, train_wall=11, gb_free=2.8, wall=87185 2021-06-19 18:52:02 | INFO | train_inner | epoch 003: 1654 / 3002 loss=2.584, ppl=6, wps=5829.3, ups=0.09, wpb=64786, bsz=128, num_updates=7609, lr=9.99471e-05, gnorm=2.846, loss_scale=2, train_wall=11, gb_free=2.8, wall=87196 2021-06-19 18:52:13 | INFO | train_inner | epoch 003: 1655 / 3002 loss=2.713, ppl=6.56, wps=5908.2, ups=0.09, wpb=64845, bsz=128, num_updates=7610, lr=9.99471e-05, gnorm=2.157, loss_scale=2, train_wall=10, gb_free=2.8, wall=87207 2021-06-19 18:52:24 | INFO | train_inner | epoch 003: 1656 / 3002 loss=2.701, ppl=6.5, wps=5844.4, ups=0.09, wpb=64891, bsz=128, num_updates=7611, lr=9.99471e-05, gnorm=8.015, loss_scale=2, train_wall=11, gb_free=2.8, wall=87218 2021-06-19 18:52:35 | INFO | train_inner | epoch 003: 1657 / 3002 loss=2.534, ppl=5.79, wps=5810.8, ups=0.09, wpb=64769, bsz=128, num_updates=7612, lr=9.99471e-05, gnorm=2.301, loss_scale=2, train_wall=11, gb_free=2.8, wall=87229 2021-06-19 18:52:46 | INFO | train_inner | epoch 003: 1658 / 3002 loss=2.518, ppl=5.73, wps=5786.6, ups=0.09, wpb=64897, bsz=128, num_updates=7613, lr=9.99471e-05, gnorm=4.319, loss_scale=2, train_wall=11, gb_free=2.8, wall=87240 2021-06-19 18:52:57 | INFO | train_inner | epoch 003: 1659 / 3002 loss=2.652, ppl=6.29, wps=5861, ups=0.09, wpb=64838, bsz=128, num_updates=7614, lr=9.99471e-05, gnorm=2.149, loss_scale=2, train_wall=11, gb_free=2.8, wall=87252 2021-06-19 18:53:09 | INFO | train_inner | epoch 003: 1660 / 3002 loss=2.628, ppl=6.18, wps=5699.4, ups=0.09, wpb=64851, bsz=128, num_updates=7615, lr=9.99471e-05, gnorm=2.171, loss_scale=2, train_wall=11, gb_free=2.8, wall=87263 2021-06-19 18:53:20 | INFO | train_inner | epoch 003: 1661 / 3002 loss=2.566, ppl=5.92, wps=5843.1, ups=0.09, wpb=64836, bsz=128, num_updates=7616, lr=9.99471e-05, gnorm=2.449, loss_scale=2, train_wall=11, gb_free=2.8, wall=87274 2021-06-19 18:53:31 | INFO | train_inner | epoch 003: 1662 / 3002 loss=2.571, ppl=5.94, wps=5903.3, ups=0.09, wpb=64898, bsz=128, num_updates=7617, lr=9.99471e-05, gnorm=18.783, loss_scale=2, train_wall=11, gb_free=2.8, wall=87285 2021-06-19 18:53:42 | INFO | train_inner | epoch 003: 1663 / 3002 loss=2.628, ppl=6.18, wps=5734.5, ups=0.09, wpb=64799, bsz=128, num_updates=7618, lr=9.99471e-05, gnorm=2.285, loss_scale=2, train_wall=11, gb_free=2.8, wall=87296 2021-06-19 18:53:53 | INFO | train_inner | epoch 003: 1664 / 3002 loss=2.597, ppl=6.05, wps=5835, ups=0.09, wpb=64847, bsz=128, num_updates=7619, lr=9.9947e-05, gnorm=2.841, loss_scale=2, train_wall=11, gb_free=2.8, wall=87307 2021-06-19 18:54:04 | INFO | train_inner | epoch 003: 1665 / 3002 loss=2.619, ppl=6.14, wps=5847.3, ups=0.09, wpb=64881, bsz=128, num_updates=7620, lr=9.9947e-05, gnorm=2.271, loss_scale=2, train_wall=11, gb_free=2.8, wall=87319 2021-06-19 18:54:15 | INFO | train_inner | epoch 003: 1666 / 3002 loss=2.667, ppl=6.35, wps=6002.2, ups=0.09, wpb=64844, bsz=128, num_updates=7621, lr=9.9947e-05, gnorm=2.487, loss_scale=2, train_wall=10, gb_free=2.8, wall=87329 2021-06-19 18:54:26 | INFO | train_inner | epoch 003: 1667 / 3002 loss=2.487, ppl=5.61, wps=5925.7, ups=0.09, wpb=64890, bsz=128, num_updates=7622, lr=9.9947e-05, gnorm=2.466, loss_scale=2, train_wall=10, gb_free=2.8, wall=87340 2021-06-19 18:54:37 | INFO | train_inner | epoch 003: 1668 / 3002 loss=2.631, ppl=6.19, wps=5969.8, ups=0.09, wpb=64795, bsz=128, num_updates=7623, lr=9.9947e-05, gnorm=2.179, loss_scale=2, train_wall=10, gb_free=2.8, wall=87351 2021-06-19 18:54:48 | INFO | train_inner | epoch 003: 1669 / 3002 loss=2.692, ppl=6.46, wps=5948.3, ups=0.09, wpb=64794, bsz=128, num_updates=7624, lr=9.9947e-05, gnorm=2.266, loss_scale=2, train_wall=10, gb_free=2.8, wall=87362 2021-06-19 18:54:59 | INFO | train_inner | epoch 003: 1670 / 3002 loss=2.612, ppl=6.11, wps=5850.9, ups=0.09, wpb=64826, bsz=128, num_updates=7625, lr=9.9947e-05, gnorm=2.129, loss_scale=2, train_wall=11, gb_free=2.8, wall=87373 2021-06-19 18:55:10 | INFO | train_inner | epoch 003: 1671 / 3002 loss=2.867, ppl=7.3, wps=5717.7, ups=0.09, wpb=64838, bsz=128, num_updates=7626, lr=9.9947e-05, gnorm=2.212, loss_scale=2, train_wall=11, gb_free=2.8, wall=87384 2021-06-19 18:55:21 | INFO | train_inner | epoch 003: 1672 / 3002 loss=2.683, ppl=6.42, wps=5874, ups=0.09, wpb=64819, bsz=128, num_updates=7627, lr=9.9947e-05, gnorm=2.224, loss_scale=2, train_wall=11, gb_free=2.8, wall=87395 2021-06-19 18:55:32 | INFO | train_inner | epoch 003: 1673 / 3002 loss=2.694, ppl=6.47, wps=5892.7, ups=0.09, wpb=64897, bsz=128, num_updates=7628, lr=9.9947e-05, gnorm=3.795, loss_scale=2, train_wall=11, gb_free=2.8, wall=87406 2021-06-19 18:55:43 | INFO | train_inner | epoch 003: 1674 / 3002 loss=2.682, ppl=6.42, wps=5839.5, ups=0.09, wpb=64807, bsz=128, num_updates=7629, lr=9.9947e-05, gnorm=2.271, loss_scale=2, train_wall=11, gb_free=2.8, wall=87418 2021-06-19 18:55:54 | INFO | train_inner | epoch 003: 1675 / 3002 loss=2.742, ppl=6.69, wps=5898.6, ups=0.09, wpb=64851, bsz=128, num_updates=7630, lr=9.9947e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=87429 2021-06-19 18:56:05 | INFO | train_inner | epoch 003: 1676 / 3002 loss=2.678, ppl=6.4, wps=5805.2, ups=0.09, wpb=64813, bsz=128, num_updates=7631, lr=9.99469e-05, gnorm=2.224, loss_scale=2, train_wall=11, gb_free=2.8, wall=87440 2021-06-19 18:56:16 | INFO | train_inner | epoch 003: 1677 / 3002 loss=2.715, ppl=6.57, wps=5848.2, ups=0.09, wpb=64706, bsz=128, num_updates=7632, lr=9.99469e-05, gnorm=2.786, loss_scale=2, train_wall=11, gb_free=2.8, wall=87451 2021-06-19 18:56:28 | INFO | train_inner | epoch 003: 1678 / 3002 loss=2.671, ppl=6.37, wps=5838, ups=0.09, wpb=64880, bsz=128, num_updates=7633, lr=9.99469e-05, gnorm=2.381, loss_scale=2, train_wall=11, gb_free=2.8, wall=87462 2021-06-19 18:56:39 | INFO | train_inner | epoch 003: 1679 / 3002 loss=2.589, ppl=6.02, wps=5918.3, ups=0.09, wpb=64805, bsz=128, num_updates=7634, lr=9.99469e-05, gnorm=2.242, loss_scale=2, train_wall=10, gb_free=2.8, wall=87473 2021-06-19 18:56:50 | INFO | train_inner | epoch 003: 1680 / 3002 loss=2.778, ppl=6.86, wps=5828.9, ups=0.09, wpb=64777, bsz=128, num_updates=7635, lr=9.99469e-05, gnorm=2.437, loss_scale=2, train_wall=11, gb_free=2.8, wall=87484 2021-06-19 18:57:01 | INFO | train_inner | epoch 003: 1681 / 3002 loss=2.587, ppl=6.01, wps=5812.6, ups=0.09, wpb=64822, bsz=128, num_updates=7636, lr=9.99469e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=87495 2021-06-19 18:57:12 | INFO | train_inner | epoch 003: 1682 / 3002 loss=2.692, ppl=6.46, wps=5903.6, ups=0.09, wpb=64923, bsz=128, num_updates=7637, lr=9.99469e-05, gnorm=2.2, loss_scale=2, train_wall=11, gb_free=2.8, wall=87506 2021-06-19 18:57:23 | INFO | train_inner | epoch 003: 1683 / 3002 loss=2.533, ppl=5.79, wps=5899.9, ups=0.09, wpb=64909, bsz=128, num_updates=7638, lr=9.99469e-05, gnorm=3.948, loss_scale=2, train_wall=11, gb_free=2.8, wall=87517 2021-06-19 18:57:34 | INFO | train_inner | epoch 003: 1684 / 3002 loss=2.732, ppl=6.64, wps=5821.5, ups=0.09, wpb=64745, bsz=128, num_updates=7639, lr=9.99469e-05, gnorm=2.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=87528 2021-06-19 18:57:45 | INFO | train_inner | epoch 003: 1685 / 3002 loss=2.549, ppl=5.85, wps=5856.4, ups=0.09, wpb=64793, bsz=128, num_updates=7640, lr=9.99469e-05, gnorm=2.103, loss_scale=2, train_wall=11, gb_free=2.8, wall=87539 2021-06-19 18:57:56 | INFO | train_inner | epoch 003: 1686 / 3002 loss=2.598, ppl=6.06, wps=5851.3, ups=0.09, wpb=64883, bsz=128, num_updates=7641, lr=9.99469e-05, gnorm=2.467, loss_scale=2, train_wall=11, gb_free=2.8, wall=87550 2021-06-19 18:58:07 | INFO | train_inner | epoch 003: 1687 / 3002 loss=2.678, ppl=6.4, wps=5873.2, ups=0.09, wpb=64714, bsz=128, num_updates=7642, lr=9.99469e-05, gnorm=2.217, loss_scale=2, train_wall=11, gb_free=2.8, wall=87561 2021-06-19 18:58:18 | INFO | train_inner | epoch 003: 1688 / 3002 loss=2.655, ppl=6.3, wps=5784.1, ups=0.09, wpb=64801, bsz=128, num_updates=7643, lr=9.99469e-05, gnorm=7.026, loss_scale=2, train_wall=11, gb_free=2.8, wall=87573 2021-06-19 18:58:29 | INFO | train_inner | epoch 003: 1689 / 3002 loss=2.538, ppl=5.81, wps=5843.6, ups=0.09, wpb=64865, bsz=128, num_updates=7644, lr=9.99468e-05, gnorm=2.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=87584 2021-06-19 18:58:40 | INFO | train_inner | epoch 003: 1690 / 3002 loss=2.654, ppl=6.29, wps=5885.2, ups=0.09, wpb=64923, bsz=128, num_updates=7645, lr=9.99468e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=87595 2021-06-19 18:58:52 | INFO | train_inner | epoch 003: 1691 / 3002 loss=2.747, ppl=6.71, wps=5744.6, ups=0.09, wpb=64761, bsz=128, num_updates=7646, lr=9.99468e-05, gnorm=5.714, loss_scale=2, train_wall=11, gb_free=2.8, wall=87606 2021-06-19 18:59:03 | INFO | train_inner | epoch 003: 1692 / 3002 loss=2.785, ppl=6.89, wps=5820.8, ups=0.09, wpb=64828, bsz=128, num_updates=7647, lr=9.99468e-05, gnorm=2.078, loss_scale=2, train_wall=11, gb_free=2.8, wall=87617 2021-06-19 18:59:14 | INFO | train_inner | epoch 003: 1693 / 3002 loss=2.674, ppl=6.38, wps=5888.4, ups=0.09, wpb=64862, bsz=128, num_updates=7648, lr=9.99468e-05, gnorm=3.773, loss_scale=2, train_wall=11, gb_free=2.8, wall=87628 2021-06-19 18:59:25 | INFO | train_inner | epoch 003: 1694 / 3002 loss=2.615, ppl=6.13, wps=5885.9, ups=0.09, wpb=64880, bsz=128, num_updates=7649, lr=9.99468e-05, gnorm=2.309, loss_scale=2, train_wall=11, gb_free=2.8, wall=87639 2021-06-19 18:59:36 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 2021-06-19 18:59:47 | INFO | train_inner | epoch 003: 1696 / 3002 loss=2.618, ppl=6.14, wps=2896.6, ups=0.04, wpb=64723, bsz=128, num_updates=7650, lr=9.99468e-05, gnorm=6.973, loss_scale=1, train_wall=21, gb_free=2.8, wall=87662 2021-06-19 18:59:58 | INFO | train_inner | epoch 003: 1697 / 3002 loss=2.637, ppl=6.22, wps=5836.4, ups=0.09, wpb=64872, bsz=128, num_updates=7651, lr=9.99468e-05, gnorm=2.387, loss_scale=1, train_wall=11, gb_free=2.8, wall=87673 2021-06-19 19:00:09 | INFO | train_inner | epoch 003: 1698 / 3002 loss=2.693, ppl=6.47, wps=5935.3, ups=0.09, wpb=64799, bsz=128, num_updates=7652, lr=9.99468e-05, gnorm=2.801, loss_scale=1, train_wall=10, gb_free=2.8, wall=87684 2021-06-19 19:00:20 | INFO | train_inner | epoch 003: 1699 / 3002 loss=2.537, ppl=5.81, wps=5876.3, ups=0.09, wpb=64822, bsz=128, num_updates=7653, lr=9.99468e-05, gnorm=2.359, loss_scale=1, train_wall=11, gb_free=2.8, wall=87695 2021-06-19 19:00:31 | INFO | train_inner | epoch 003: 1700 / 3002 loss=2.681, ppl=6.41, wps=5914.7, ups=0.09, wpb=64875, bsz=128, num_updates=7654, lr=9.99468e-05, gnorm=2.253, loss_scale=1, train_wall=11, gb_free=2.8, wall=87706 2021-06-19 19:00:42 | INFO | train_inner | epoch 003: 1701 / 3002 loss=2.598, ppl=6.06, wps=5829.9, ups=0.09, wpb=64812, bsz=128, num_updates=7655, lr=9.99468e-05, gnorm=2.201, loss_scale=1, train_wall=11, gb_free=2.8, wall=87717 2021-06-19 19:00:54 | INFO | train_inner | epoch 003: 1702 / 3002 loss=2.771, ppl=6.82, wps=5789.9, ups=0.09, wpb=64761, bsz=128, num_updates=7656, lr=9.99467e-05, gnorm=2.39, loss_scale=1, train_wall=11, gb_free=2.8, wall=87728 2021-06-19 19:01:05 | INFO | train_inner | epoch 003: 1703 / 3002 loss=2.694, ppl=6.47, wps=5830, ups=0.09, wpb=64922, bsz=128, num_updates=7657, lr=9.99467e-05, gnorm=2.448, loss_scale=1, train_wall=11, gb_free=2.8, wall=87739 2021-06-19 19:01:16 | INFO | train_inner | epoch 003: 1704 / 3002 loss=2.717, ppl=6.57, wps=5768.9, ups=0.09, wpb=64847, bsz=128, num_updates=7658, lr=9.99467e-05, gnorm=3.489, loss_scale=1, train_wall=11, gb_free=2.8, wall=87750 2021-06-19 19:01:27 | INFO | train_inner | epoch 003: 1705 / 3002 loss=2.611, ppl=6.11, wps=5668.9, ups=0.09, wpb=64767, bsz=128, num_updates=7659, lr=9.99467e-05, gnorm=2.304, loss_scale=1, train_wall=11, gb_free=2.8, wall=87762 2021-06-19 19:01:38 | INFO | train_inner | epoch 003: 1706 / 3002 loss=2.666, ppl=6.35, wps=5891, ups=0.09, wpb=64874, bsz=128, num_updates=7660, lr=9.99467e-05, gnorm=3.019, loss_scale=1, train_wall=11, gb_free=2.8, wall=87773 2021-06-19 19:01:49 | INFO | train_inner | epoch 003: 1707 / 3002 loss=2.808, ppl=7, wps=5898.1, ups=0.09, wpb=64885, bsz=128, num_updates=7661, lr=9.99467e-05, gnorm=2.551, loss_scale=1, train_wall=11, gb_free=2.8, wall=87784 2021-06-19 19:02:00 | INFO | train_inner | epoch 003: 1708 / 3002 loss=2.808, ppl=7, wps=5984.1, ups=0.09, wpb=64877, bsz=128, num_updates=7662, lr=9.99467e-05, gnorm=2.319, loss_scale=1, train_wall=10, gb_free=2.8, wall=87795 2021-06-19 19:02:11 | INFO | train_inner | epoch 003: 1709 / 3002 loss=2.535, ppl=5.79, wps=5859.2, ups=0.09, wpb=64814, bsz=128, num_updates=7663, lr=9.99467e-05, gnorm=2.238, loss_scale=1, train_wall=11, gb_free=2.8, wall=87806 2021-06-19 19:02:22 | INFO | train_inner | epoch 003: 1710 / 3002 loss=2.64, ppl=6.23, wps=5821.1, ups=0.09, wpb=64814, bsz=128, num_updates=7664, lr=9.99467e-05, gnorm=2.184, loss_scale=1, train_wall=11, gb_free=2.8, wall=87817 2021-06-19 19:02:34 | INFO | train_inner | epoch 003: 1711 / 3002 loss=2.711, ppl=6.55, wps=5761, ups=0.09, wpb=64880, bsz=128, num_updates=7665, lr=9.99467e-05, gnorm=3.185, loss_scale=1, train_wall=11, gb_free=2.8, wall=87828 2021-06-19 19:02:45 | INFO | train_inner | epoch 003: 1712 / 3002 loss=2.668, ppl=6.36, wps=5847.9, ups=0.09, wpb=64958, bsz=128, num_updates=7666, lr=9.99467e-05, gnorm=2.471, loss_scale=1, train_wall=11, gb_free=2.8, wall=87839 2021-06-19 19:02:56 | INFO | train_inner | epoch 003: 1713 / 3002 loss=2.501, ppl=5.66, wps=6005.5, ups=0.09, wpb=64851, bsz=128, num_updates=7667, lr=9.99467e-05, gnorm=2.541, loss_scale=1, train_wall=10, gb_free=2.8, wall=87850 2021-06-19 19:03:06 | INFO | train_inner | epoch 003: 1714 / 3002 loss=2.603, ppl=6.07, wps=6011.9, ups=0.09, wpb=64726, bsz=128, num_updates=7668, lr=9.99467e-05, gnorm=3.316, loss_scale=1, train_wall=10, gb_free=2.8, wall=87861 2021-06-19 19:03:17 | INFO | train_inner | epoch 003: 1715 / 3002 loss=2.54, ppl=5.82, wps=5855.9, ups=0.09, wpb=64805, bsz=128, num_updates=7669, lr=9.99466e-05, gnorm=2.213, loss_scale=1, train_wall=11, gb_free=2.8, wall=87872 2021-06-19 19:03:28 | INFO | train_inner | epoch 003: 1716 / 3002 loss=2.631, ppl=6.19, wps=5989.7, ups=0.09, wpb=64948, bsz=128, num_updates=7670, lr=9.99466e-05, gnorm=2.181, loss_scale=1, train_wall=10, gb_free=2.8, wall=87883 2021-06-19 19:03:39 | INFO | train_inner | epoch 003: 1717 / 3002 loss=2.648, ppl=6.27, wps=5861.8, ups=0.09, wpb=64797, bsz=128, num_updates=7671, lr=9.99466e-05, gnorm=2.47, loss_scale=1, train_wall=11, gb_free=2.8, wall=87894 2021-06-19 19:03:50 | INFO | train_inner | epoch 003: 1718 / 3002 loss=2.522, ppl=5.74, wps=5874.4, ups=0.09, wpb=64872, bsz=128, num_updates=7672, lr=9.99466e-05, gnorm=2.213, loss_scale=1, train_wall=11, gb_free=2.8, wall=87905 2021-06-19 19:04:02 | INFO | train_inner | epoch 003: 1719 / 3002 loss=2.673, ppl=6.38, wps=5803.6, ups=0.09, wpb=64927, bsz=128, num_updates=7673, lr=9.99466e-05, gnorm=2.196, loss_scale=1, train_wall=11, gb_free=2.8, wall=87916 2021-06-19 19:04:12 | INFO | train_inner | epoch 003: 1720 / 3002 loss=2.715, ppl=6.57, wps=5922.5, ups=0.09, wpb=64817, bsz=128, num_updates=7674, lr=9.99466e-05, gnorm=3.086, loss_scale=1, train_wall=10, gb_free=2.8, wall=87927 2021-06-19 19:04:24 | INFO | train_inner | epoch 003: 1721 / 3002 loss=2.68, ppl=6.41, wps=5786.2, ups=0.09, wpb=64840, bsz=128, num_updates=7675, lr=9.99466e-05, gnorm=2.646, loss_scale=1, train_wall=11, gb_free=2.8, wall=87938 2021-06-19 19:04:35 | INFO | train_inner | epoch 003: 1722 / 3002 loss=2.727, ppl=6.62, wps=5776.2, ups=0.09, wpb=64760, bsz=128, num_updates=7676, lr=9.99466e-05, gnorm=2.404, loss_scale=1, train_wall=11, gb_free=2.8, wall=87949 2021-06-19 19:04:46 | INFO | train_inner | epoch 003: 1723 / 3002 loss=2.588, ppl=6.01, wps=5759.7, ups=0.09, wpb=64867, bsz=128, num_updates=7677, lr=9.99466e-05, gnorm=2.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=87961 2021-06-19 19:04:57 | INFO | train_inner | epoch 003: 1724 / 3002 loss=2.815, ppl=7.04, wps=5792, ups=0.09, wpb=64828, bsz=128, num_updates=7678, lr=9.99466e-05, gnorm=2.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=87972 2021-06-19 19:05:08 | INFO | train_inner | epoch 003: 1725 / 3002 loss=2.616, ppl=6.13, wps=5984.3, ups=0.09, wpb=64856, bsz=128, num_updates=7679, lr=9.99466e-05, gnorm=3.459, loss_scale=1, train_wall=10, gb_free=2.8, wall=87983 2021-06-19 19:05:19 | INFO | train_inner | epoch 003: 1726 / 3002 loss=2.588, ppl=6.01, wps=5884.4, ups=0.09, wpb=64889, bsz=128, num_updates=7680, lr=9.99466e-05, gnorm=2.157, loss_scale=1, train_wall=11, gb_free=2.8, wall=87994 2021-06-19 19:05:30 | INFO | train_inner | epoch 003: 1727 / 3002 loss=2.695, ppl=6.48, wps=6072.4, ups=0.09, wpb=64938, bsz=128, num_updates=7681, lr=9.99465e-05, gnorm=2.18, loss_scale=1, train_wall=10, gb_free=2.8, wall=88004 2021-06-19 19:05:41 | INFO | train_inner | epoch 003: 1728 / 3002 loss=2.577, ppl=5.97, wps=5899.1, ups=0.09, wpb=64812, bsz=128, num_updates=7682, lr=9.99465e-05, gnorm=2.136, loss_scale=1, train_wall=11, gb_free=2.8, wall=88015 2021-06-19 19:05:52 | INFO | train_inner | epoch 003: 1729 / 3002 loss=2.714, ppl=6.56, wps=5885.3, ups=0.09, wpb=64889, bsz=128, num_updates=7683, lr=9.99465e-05, gnorm=2.229, loss_scale=1, train_wall=11, gb_free=2.8, wall=88026 2021-06-19 19:06:03 | INFO | train_inner | epoch 003: 1730 / 3002 loss=2.744, ppl=6.7, wps=5810.9, ups=0.09, wpb=64827, bsz=128, num_updates=7684, lr=9.99465e-05, gnorm=5.538, loss_scale=1, train_wall=11, gb_free=2.8, wall=88037 2021-06-19 19:06:14 | INFO | train_inner | epoch 003: 1731 / 3002 loss=2.776, ppl=6.85, wps=5929.5, ups=0.09, wpb=64746, bsz=128, num_updates=7685, lr=9.99465e-05, gnorm=2.105, loss_scale=1, train_wall=10, gb_free=2.8, wall=88048 2021-06-19 19:06:25 | INFO | train_inner | epoch 003: 1732 / 3002 loss=2.663, ppl=6.34, wps=5771.4, ups=0.09, wpb=64799, bsz=128, num_updates=7686, lr=9.99465e-05, gnorm=2.145, loss_scale=1, train_wall=11, gb_free=2.8, wall=88060 2021-06-19 19:06:36 | INFO | train_inner | epoch 003: 1733 / 3002 loss=2.579, ppl=5.98, wps=5858.7, ups=0.09, wpb=64882, bsz=128, num_updates=7687, lr=9.99465e-05, gnorm=2.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=88071 2021-06-19 19:06:47 | INFO | train_inner | epoch 003: 1734 / 3002 loss=2.833, ppl=7.13, wps=5889, ups=0.09, wpb=64800, bsz=128, num_updates=7688, lr=9.99465e-05, gnorm=2.224, loss_scale=1, train_wall=11, gb_free=2.8, wall=88082 2021-06-19 19:06:58 | INFO | train_inner | epoch 003: 1735 / 3002 loss=2.63, ppl=6.19, wps=5846.9, ups=0.09, wpb=64798, bsz=128, num_updates=7689, lr=9.99465e-05, gnorm=2.285, loss_scale=1, train_wall=11, gb_free=2.8, wall=88093 2021-06-19 19:07:09 | INFO | train_inner | epoch 003: 1736 / 3002 loss=2.552, ppl=5.86, wps=5980.2, ups=0.09, wpb=64764, bsz=128, num_updates=7690, lr=9.99465e-05, gnorm=2.314, loss_scale=1, train_wall=10, gb_free=2.8, wall=88104 2021-06-19 19:07:20 | INFO | train_inner | epoch 003: 1737 / 3002 loss=2.672, ppl=6.38, wps=5803.4, ups=0.09, wpb=64893, bsz=128, num_updates=7691, lr=9.99465e-05, gnorm=4.061, loss_scale=1, train_wall=11, gb_free=2.8, wall=88115 2021-06-19 19:07:32 | INFO | train_inner | epoch 003: 1738 / 3002 loss=2.744, ppl=6.7, wps=5830.6, ups=0.09, wpb=64786, bsz=128, num_updates=7692, lr=9.99465e-05, gnorm=2.156, loss_scale=1, train_wall=11, gb_free=2.8, wall=88126 2021-06-19 19:07:43 | INFO | train_inner | epoch 003: 1739 / 3002 loss=2.53, ppl=5.77, wps=5839.5, ups=0.09, wpb=64771, bsz=128, num_updates=7693, lr=9.99465e-05, gnorm=2.16, loss_scale=1, train_wall=11, gb_free=2.8, wall=88137 2021-06-19 19:07:54 | INFO | train_inner | epoch 003: 1740 / 3002 loss=2.548, ppl=5.85, wps=5688.3, ups=0.09, wpb=64825, bsz=128, num_updates=7694, lr=9.99464e-05, gnorm=2.19, loss_scale=1, train_wall=11, gb_free=2.8, wall=88148 2021-06-19 19:08:05 | INFO | train_inner | epoch 003: 1741 / 3002 loss=2.753, ppl=6.74, wps=5938.8, ups=0.09, wpb=64763, bsz=128, num_updates=7695, lr=9.99464e-05, gnorm=2.141, loss_scale=1, train_wall=10, gb_free=2.8, wall=88159 2021-06-19 19:08:16 | INFO | train_inner | epoch 003: 1742 / 3002 loss=2.748, ppl=6.72, wps=5834.4, ups=0.09, wpb=64743, bsz=128, num_updates=7696, lr=9.99464e-05, gnorm=2.184, loss_scale=1, train_wall=11, gb_free=2.8, wall=88170 2021-06-19 19:08:27 | INFO | train_inner | epoch 003: 1743 / 3002 loss=2.81, ppl=7.01, wps=5796.7, ups=0.09, wpb=64802, bsz=128, num_updates=7697, lr=9.99464e-05, gnorm=2.176, loss_scale=1, train_wall=11, gb_free=2.8, wall=88182 2021-06-19 19:08:38 | INFO | train_inner | epoch 003: 1744 / 3002 loss=2.581, ppl=5.98, wps=5993.3, ups=0.09, wpb=64912, bsz=128, num_updates=7698, lr=9.99464e-05, gnorm=2.035, loss_scale=1, train_wall=10, gb_free=2.8, wall=88192 2021-06-19 19:08:49 | INFO | train_inner | epoch 003: 1745 / 3002 loss=2.706, ppl=6.52, wps=5900.8, ups=0.09, wpb=64751, bsz=128, num_updates=7699, lr=9.99464e-05, gnorm=5.684, loss_scale=1, train_wall=11, gb_free=2.8, wall=88203 2021-06-19 19:09:00 | INFO | train_inner | epoch 003: 1746 / 3002 loss=2.715, ppl=6.57, wps=5891, ups=0.09, wpb=64863, bsz=128, num_updates=7700, lr=9.99464e-05, gnorm=2.245, loss_scale=1, train_wall=11, gb_free=2.8, wall=88214 2021-06-19 19:09:11 | INFO | train_inner | epoch 003: 1747 / 3002 loss=2.593, ppl=6.03, wps=5813.9, ups=0.09, wpb=64883, bsz=128, num_updates=7701, lr=9.99464e-05, gnorm=2.375, loss_scale=1, train_wall=11, gb_free=2.8, wall=88226 2021-06-19 19:09:22 | INFO | train_inner | epoch 003: 1748 / 3002 loss=2.571, ppl=5.94, wps=5967.4, ups=0.09, wpb=64889, bsz=128, num_updates=7702, lr=9.99464e-05, gnorm=3.187, loss_scale=1, train_wall=10, gb_free=2.8, wall=88236 2021-06-19 19:09:33 | INFO | train_inner | epoch 003: 1749 / 3002 loss=2.662, ppl=6.33, wps=5775.5, ups=0.09, wpb=64827, bsz=128, num_updates=7703, lr=9.99464e-05, gnorm=2.352, loss_scale=1, train_wall=11, gb_free=2.8, wall=88248 2021-06-19 19:09:44 | INFO | train_inner | epoch 003: 1750 / 3002 loss=2.7, ppl=6.5, wps=5865.2, ups=0.09, wpb=64880, bsz=128, num_updates=7704, lr=9.99464e-05, gnorm=2.302, loss_scale=1, train_wall=11, gb_free=2.8, wall=88259 2021-06-19 19:09:56 | INFO | train_inner | epoch 003: 1751 / 3002 loss=2.666, ppl=6.34, wps=5731.6, ups=0.09, wpb=64941, bsz=128, num_updates=7705, lr=9.99464e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=88270 2021-06-19 19:10:07 | INFO | train_inner | epoch 003: 1752 / 3002 loss=2.782, ppl=6.88, wps=5803.9, ups=0.09, wpb=64782, bsz=128, num_updates=7706, lr=9.99463e-05, gnorm=3.387, loss_scale=1, train_wall=11, gb_free=2.8, wall=88281 2021-06-19 19:10:18 | INFO | train_inner | epoch 003: 1753 / 3002 loss=2.571, ppl=5.94, wps=5847.6, ups=0.09, wpb=64832, bsz=128, num_updates=7707, lr=9.99463e-05, gnorm=2.159, loss_scale=1, train_wall=11, gb_free=2.8, wall=88292 2021-06-19 19:10:29 | INFO | train_inner | epoch 003: 1754 / 3002 loss=2.583, ppl=5.99, wps=5823, ups=0.09, wpb=64828, bsz=128, num_updates=7708, lr=9.99463e-05, gnorm=2.289, loss_scale=1, train_wall=11, gb_free=2.8, wall=88303 2021-06-19 19:10:40 | INFO | train_inner | epoch 003: 1755 / 3002 loss=2.617, ppl=6.13, wps=6008, ups=0.09, wpb=64829, bsz=128, num_updates=7709, lr=9.99463e-05, gnorm=2.997, loss_scale=1, train_wall=10, gb_free=2.8, wall=88314 2021-06-19 19:10:51 | INFO | train_inner | epoch 003: 1756 / 3002 loss=2.599, ppl=6.06, wps=5877.8, ups=0.09, wpb=64915, bsz=128, num_updates=7710, lr=9.99463e-05, gnorm=2.122, loss_scale=1, train_wall=11, gb_free=2.8, wall=88325 2021-06-19 19:11:02 | INFO | train_inner | epoch 003: 1757 / 3002 loss=2.56, ppl=5.9, wps=5775.5, ups=0.09, wpb=64770, bsz=128, num_updates=7711, lr=9.99463e-05, gnorm=2.657, loss_scale=1, train_wall=11, gb_free=2.8, wall=88336 2021-06-19 19:11:13 | INFO | train_inner | epoch 003: 1758 / 3002 loss=2.466, ppl=5.53, wps=5769.3, ups=0.09, wpb=64889, bsz=128, num_updates=7712, lr=9.99463e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=88348 2021-06-19 19:11:25 | INFO | train_inner | epoch 003: 1759 / 3002 loss=2.665, ppl=6.34, wps=5793.4, ups=0.09, wpb=64787, bsz=128, num_updates=7713, lr=9.99463e-05, gnorm=3.741, loss_scale=1, train_wall=11, gb_free=2.8, wall=88359 2021-06-19 19:11:36 | INFO | train_inner | epoch 003: 1760 / 3002 loss=2.816, ppl=7.04, wps=5834.6, ups=0.09, wpb=64756, bsz=128, num_updates=7714, lr=9.99463e-05, gnorm=2.289, loss_scale=1, train_wall=11, gb_free=2.8, wall=88370 2021-06-19 19:11:47 | INFO | train_inner | epoch 003: 1761 / 3002 loss=2.716, ppl=6.57, wps=5709.9, ups=0.09, wpb=64844, bsz=128, num_updates=7715, lr=9.99463e-05, gnorm=2.425, loss_scale=1, train_wall=11, gb_free=2.8, wall=88381 2021-06-19 19:11:58 | INFO | train_inner | epoch 003: 1762 / 3002 loss=2.48, ppl=5.58, wps=5895.4, ups=0.09, wpb=64738, bsz=128, num_updates=7716, lr=9.99463e-05, gnorm=2.242, loss_scale=1, train_wall=11, gb_free=2.8, wall=88392 2021-06-19 19:12:09 | INFO | train_inner | epoch 003: 1763 / 3002 loss=2.734, ppl=6.65, wps=5956.3, ups=0.09, wpb=64877, bsz=128, num_updates=7717, lr=9.99463e-05, gnorm=2.343, loss_scale=1, train_wall=10, gb_free=2.8, wall=88403 2021-06-19 19:12:20 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 2021-06-19 19:12:31 | INFO | train_inner | epoch 003: 1765 / 3002 loss=2.757, ppl=6.76, wps=2924, ups=0.05, wpb=64761, bsz=128, num_updates=7718, lr=9.99463e-05, gnorm=2.227, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=88425 2021-06-19 19:12:42 | INFO | train_inner | epoch 003: 1766 / 3002 loss=2.673, ppl=6.38, wps=5733.3, ups=0.09, wpb=64818, bsz=128, num_updates=7719, lr=9.99462e-05, gnorm=2.106, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88437 2021-06-19 19:12:54 | INFO | train_inner | epoch 003: 1767 / 3002 loss=2.517, ppl=5.72, wps=5767.7, ups=0.09, wpb=64864, bsz=128, num_updates=7720, lr=9.99462e-05, gnorm=2.37, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88448 2021-06-19 19:13:04 | INFO | train_inner | epoch 003: 1768 / 3002 loss=2.695, ppl=6.47, wps=5966.3, ups=0.09, wpb=64896, bsz=128, num_updates=7721, lr=9.99462e-05, gnorm=2.958, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88459 2021-06-19 19:13:15 | INFO | train_inner | epoch 003: 1769 / 3002 loss=2.563, ppl=5.91, wps=5923.6, ups=0.09, wpb=64850, bsz=128, num_updates=7722, lr=9.99462e-05, gnorm=2.491, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88470 2021-06-19 19:13:27 | INFO | train_inner | epoch 003: 1770 / 3002 loss=2.604, ppl=6.08, wps=5783.8, ups=0.09, wpb=64839, bsz=128, num_updates=7723, lr=9.99462e-05, gnorm=2.213, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88481 2021-06-19 19:13:38 | INFO | train_inner | epoch 003: 1771 / 3002 loss=2.676, ppl=6.39, wps=5874.8, ups=0.09, wpb=64889, bsz=128, num_updates=7724, lr=9.99462e-05, gnorm=2.137, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88492 2021-06-19 19:13:49 | INFO | train_inner | epoch 003: 1772 / 3002 loss=2.557, ppl=5.88, wps=5919.6, ups=0.09, wpb=64771, bsz=128, num_updates=7725, lr=9.99462e-05, gnorm=2.098, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88503 2021-06-19 19:14:00 | INFO | train_inner | epoch 003: 1773 / 3002 loss=2.739, ppl=6.68, wps=5776.2, ups=0.09, wpb=64833, bsz=128, num_updates=7726, lr=9.99462e-05, gnorm=3.379, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88514 2021-06-19 19:14:11 | INFO | train_inner | epoch 003: 1774 / 3002 loss=2.471, ppl=5.55, wps=5847.6, ups=0.09, wpb=64844, bsz=128, num_updates=7727, lr=9.99462e-05, gnorm=2.218, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88525 2021-06-19 19:14:22 | INFO | train_inner | epoch 003: 1775 / 3002 loss=2.683, ppl=6.42, wps=5823, ups=0.09, wpb=64837, bsz=128, num_updates=7728, lr=9.99462e-05, gnorm=3.16, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88536 2021-06-19 19:14:33 | INFO | train_inner | epoch 003: 1776 / 3002 loss=2.727, ppl=6.62, wps=5905.8, ups=0.09, wpb=64867, bsz=128, num_updates=7729, lr=9.99462e-05, gnorm=2.163, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88547 2021-06-19 19:14:44 | INFO | train_inner | epoch 003: 1777 / 3002 loss=2.464, ppl=5.52, wps=5760.7, ups=0.09, wpb=64819, bsz=128, num_updates=7730, lr=9.99462e-05, gnorm=2.331, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88559 2021-06-19 19:14:55 | INFO | train_inner | epoch 003: 1778 / 3002 loss=2.737, ppl=6.67, wps=5935.7, ups=0.09, wpb=64923, bsz=128, num_updates=7731, lr=9.99461e-05, gnorm=2.247, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88570 2021-06-19 19:15:06 | INFO | train_inner | epoch 003: 1779 / 3002 loss=2.591, ppl=6.03, wps=5847, ups=0.09, wpb=64857, bsz=128, num_updates=7732, lr=9.99461e-05, gnorm=2.14, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88581 2021-06-19 19:15:17 | INFO | train_inner | epoch 003: 1780 / 3002 loss=2.607, ppl=6.09, wps=6079.9, ups=0.09, wpb=64861, bsz=128, num_updates=7733, lr=9.99461e-05, gnorm=2.4, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88591 2021-06-19 19:15:28 | INFO | train_inner | epoch 003: 1781 / 3002 loss=2.71, ppl=6.54, wps=5815.8, ups=0.09, wpb=64787, bsz=128, num_updates=7734, lr=9.99461e-05, gnorm=2.244, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88602 2021-06-19 19:15:39 | INFO | train_inner | epoch 003: 1782 / 3002 loss=2.688, ppl=6.45, wps=5742.5, ups=0.09, wpb=64853, bsz=128, num_updates=7735, lr=9.99461e-05, gnorm=2.269, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88614 2021-06-19 19:15:51 | INFO | train_inner | epoch 003: 1783 / 3002 loss=2.684, ppl=6.43, wps=5788.9, ups=0.09, wpb=64833, bsz=128, num_updates=7736, lr=9.99461e-05, gnorm=2.231, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88625 2021-06-19 19:16:02 | INFO | train_inner | epoch 003: 1784 / 3002 loss=2.633, ppl=6.21, wps=5749.4, ups=0.09, wpb=64779, bsz=128, num_updates=7737, lr=9.99461e-05, gnorm=2.156, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88636 2021-06-19 19:16:13 | INFO | train_inner | epoch 003: 1785 / 3002 loss=2.618, ppl=6.14, wps=5784.8, ups=0.09, wpb=64851, bsz=128, num_updates=7738, lr=9.99461e-05, gnorm=2.191, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88647 2021-06-19 19:16:24 | INFO | train_inner | epoch 003: 1786 / 3002 loss=2.597, ppl=6.05, wps=5790.9, ups=0.09, wpb=64853, bsz=128, num_updates=7739, lr=9.99461e-05, gnorm=2.079, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88659 2021-06-19 19:16:36 | INFO | train_inner | epoch 003: 1787 / 3002 loss=2.57, ppl=5.94, wps=5730.8, ups=0.09, wpb=64716, bsz=128, num_updates=7740, lr=9.99461e-05, gnorm=2.237, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88670 2021-06-19 19:16:47 | INFO | train_inner | epoch 003: 1788 / 3002 loss=2.673, ppl=6.38, wps=5869.7, ups=0.09, wpb=64846, bsz=128, num_updates=7741, lr=9.99461e-05, gnorm=2.304, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88681 2021-06-19 19:16:58 | INFO | train_inner | epoch 003: 1789 / 3002 loss=2.697, ppl=6.49, wps=5866.4, ups=0.09, wpb=64837, bsz=128, num_updates=7742, lr=9.99461e-05, gnorm=2.247, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88692 2021-06-19 19:17:09 | INFO | train_inner | epoch 003: 1790 / 3002 loss=2.669, ppl=6.36, wps=5886, ups=0.09, wpb=64863, bsz=128, num_updates=7743, lr=9.99461e-05, gnorm=2.273, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88703 2021-06-19 19:17:20 | INFO | train_inner | epoch 003: 1791 / 3002 loss=2.535, ppl=5.8, wps=5806.2, ups=0.09, wpb=64886, bsz=128, num_updates=7744, lr=9.9946e-05, gnorm=2.108, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88714 2021-06-19 19:17:31 | INFO | train_inner | epoch 003: 1792 / 3002 loss=2.569, ppl=5.93, wps=5835.4, ups=0.09, wpb=64757, bsz=128, num_updates=7745, lr=9.9946e-05, gnorm=2.061, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88725 2021-06-19 19:17:42 | INFO | train_inner | epoch 003: 1793 / 3002 loss=2.714, ppl=6.56, wps=5897.2, ups=0.09, wpb=64842, bsz=128, num_updates=7746, lr=9.9946e-05, gnorm=2.193, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88736 2021-06-19 19:17:53 | INFO | train_inner | epoch 003: 1794 / 3002 loss=2.627, ppl=6.18, wps=5860.9, ups=0.09, wpb=64849, bsz=128, num_updates=7747, lr=9.9946e-05, gnorm=2.269, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88747 2021-06-19 19:18:04 | INFO | train_inner | epoch 003: 1795 / 3002 loss=2.697, ppl=6.48, wps=5905.5, ups=0.09, wpb=64781, bsz=128, num_updates=7748, lr=9.9946e-05, gnorm=2.497, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88758 2021-06-19 19:18:15 | INFO | train_inner | epoch 003: 1796 / 3002 loss=2.469, ppl=5.54, wps=5804, ups=0.09, wpb=64803, bsz=128, num_updates=7749, lr=9.9946e-05, gnorm=2.285, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88769 2021-06-19 19:18:26 | INFO | train_inner | epoch 003: 1797 / 3002 loss=2.584, ppl=5.99, wps=5733.3, ups=0.09, wpb=64824, bsz=128, num_updates=7750, lr=9.9946e-05, gnorm=6.936, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88781 2021-06-19 19:18:37 | INFO | train_inner | epoch 003: 1798 / 3002 loss=2.659, ppl=6.31, wps=5875.7, ups=0.09, wpb=64822, bsz=128, num_updates=7751, lr=9.9946e-05, gnorm=2.163, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88792 2021-06-19 19:18:49 | INFO | train_inner | epoch 003: 1799 / 3002 loss=2.632, ppl=6.2, wps=5821.8, ups=0.09, wpb=64829, bsz=128, num_updates=7752, lr=9.9946e-05, gnorm=6.507, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88803 2021-06-19 19:19:00 | INFO | train_inner | epoch 003: 1800 / 3002 loss=2.637, ppl=6.22, wps=5868.9, ups=0.09, wpb=64893, bsz=128, num_updates=7753, lr=9.9946e-05, gnorm=2.157, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88814 2021-06-19 19:19:11 | INFO | train_inner | epoch 003: 1801 / 3002 loss=2.675, ppl=6.38, wps=5801.8, ups=0.09, wpb=64777, bsz=128, num_updates=7754, lr=9.9946e-05, gnorm=2.325, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88825 2021-06-19 19:19:22 | INFO | train_inner | epoch 003: 1802 / 3002 loss=2.605, ppl=6.08, wps=5831.2, ups=0.09, wpb=64843, bsz=128, num_updates=7755, lr=9.9946e-05, gnorm=2.027, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88836 2021-06-19 19:19:33 | INFO | train_inner | epoch 003: 1803 / 3002 loss=2.797, ppl=6.95, wps=5856.4, ups=0.09, wpb=64877, bsz=128, num_updates=7756, lr=9.99459e-05, gnorm=2.162, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88847 2021-06-19 19:19:44 | INFO | train_inner | epoch 003: 1804 / 3002 loss=2.501, ppl=5.66, wps=5820.3, ups=0.09, wpb=64857, bsz=128, num_updates=7757, lr=9.99459e-05, gnorm=2.072, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88859 2021-06-19 19:19:55 | INFO | train_inner | epoch 003: 1805 / 3002 loss=2.592, ppl=6.03, wps=5770.9, ups=0.09, wpb=64817, bsz=128, num_updates=7758, lr=9.99459e-05, gnorm=2.83, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88870 2021-06-19 19:20:07 | INFO | train_inner | epoch 003: 1806 / 3002 loss=2.697, ppl=6.48, wps=5851.9, ups=0.09, wpb=64815, bsz=128, num_updates=7759, lr=9.99459e-05, gnorm=2.297, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88881 2021-06-19 19:20:18 | INFO | train_inner | epoch 003: 1807 / 3002 loss=2.698, ppl=6.49, wps=5810.5, ups=0.09, wpb=64875, bsz=128, num_updates=7760, lr=9.99459e-05, gnorm=6.609, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88892 2021-06-19 19:20:29 | INFO | train_inner | epoch 003: 1808 / 3002 loss=2.673, ppl=6.38, wps=5883.9, ups=0.09, wpb=64837, bsz=128, num_updates=7761, lr=9.99459e-05, gnorm=2.192, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88903 2021-06-19 19:20:40 | INFO | train_inner | epoch 003: 1809 / 3002 loss=2.63, ppl=6.19, wps=5910.9, ups=0.09, wpb=64838, bsz=128, num_updates=7762, lr=9.99459e-05, gnorm=2.197, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88914 2021-06-19 19:20:51 | INFO | train_inner | epoch 003: 1810 / 3002 loss=2.71, ppl=6.54, wps=5832.4, ups=0.09, wpb=64864, bsz=128, num_updates=7763, lr=9.99459e-05, gnorm=2.104, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88925 2021-06-19 19:21:02 | INFO | train_inner | epoch 003: 1811 / 3002 loss=2.557, ppl=5.89, wps=5790.7, ups=0.09, wpb=64750, bsz=128, num_updates=7764, lr=9.99459e-05, gnorm=2.161, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88936 2021-06-19 19:21:13 | INFO | train_inner | epoch 003: 1812 / 3002 loss=2.815, ppl=7.04, wps=5851.1, ups=0.09, wpb=64881, bsz=128, num_updates=7765, lr=9.99459e-05, gnorm=2.16, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88947 2021-06-19 19:21:24 | INFO | train_inner | epoch 003: 1813 / 3002 loss=2.614, ppl=6.12, wps=5894.8, ups=0.09, wpb=64931, bsz=128, num_updates=7766, lr=9.99459e-05, gnorm=2.187, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88958 2021-06-19 19:21:35 | INFO | train_inner | epoch 003: 1814 / 3002 loss=2.586, ppl=6, wps=5912.7, ups=0.09, wpb=64830, bsz=128, num_updates=7767, lr=9.99459e-05, gnorm=9.168, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88969 2021-06-19 19:21:46 | INFO | train_inner | epoch 003: 1815 / 3002 loss=2.632, ppl=6.2, wps=5862.3, ups=0.09, wpb=64854, bsz=128, num_updates=7768, lr=9.99459e-05, gnorm=2.932, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88980 2021-06-19 19:21:57 | INFO | train_inner | epoch 003: 1816 / 3002 loss=2.718, ppl=6.58, wps=6002, ups=0.09, wpb=64818, bsz=128, num_updates=7769, lr=9.99458e-05, gnorm=4.272, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88991 2021-06-19 19:22:08 | INFO | train_inner | epoch 003: 1817 / 3002 loss=2.768, ppl=6.81, wps=5711.8, ups=0.09, wpb=64803, bsz=128, num_updates=7770, lr=9.99458e-05, gnorm=2.368, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89003 2021-06-19 19:22:19 | INFO | train_inner | epoch 003: 1818 / 3002 loss=2.594, ppl=6.04, wps=5884.7, ups=0.09, wpb=64927, bsz=128, num_updates=7771, lr=9.99458e-05, gnorm=2.14, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89014 2021-06-19 19:22:30 | INFO | train_inner | epoch 003: 1819 / 3002 loss=2.713, ppl=6.56, wps=5820.1, ups=0.09, wpb=64847, bsz=128, num_updates=7772, lr=9.99458e-05, gnorm=2.258, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89025 2021-06-19 19:22:42 | INFO | train_inner | epoch 003: 1820 / 3002 loss=2.633, ppl=6.2, wps=5833, ups=0.09, wpb=64890, bsz=128, num_updates=7773, lr=9.99458e-05, gnorm=2.256, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89036 2021-06-19 19:22:52 | INFO | train_inner | epoch 003: 1821 / 3002 loss=2.605, ppl=6.08, wps=6002, ups=0.09, wpb=64917, bsz=128, num_updates=7774, lr=9.99458e-05, gnorm=2.173, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89047 2021-06-19 19:23:03 | INFO | train_inner | epoch 003: 1822 / 3002 loss=2.606, ppl=6.09, wps=5850.6, ups=0.09, wpb=64742, bsz=128, num_updates=7775, lr=9.99458e-05, gnorm=2.182, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89058 2021-06-19 19:23:15 | INFO | train_inner | epoch 003: 1823 / 3002 loss=2.624, ppl=6.16, wps=5746.2, ups=0.09, wpb=64838, bsz=128, num_updates=7776, lr=9.99458e-05, gnorm=2.134, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89069 2021-06-19 19:23:26 | INFO | train_inner | epoch 003: 1824 / 3002 loss=2.751, ppl=6.73, wps=5893.1, ups=0.09, wpb=64886, bsz=128, num_updates=7777, lr=9.99458e-05, gnorm=2.344, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89080 2021-06-19 19:23:37 | INFO | train_inner | epoch 003: 1825 / 3002 loss=2.704, ppl=6.52, wps=5894.6, ups=0.09, wpb=64787, bsz=128, num_updates=7778, lr=9.99458e-05, gnorm=2.134, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89091 2021-06-19 19:23:48 | INFO | train_inner | epoch 003: 1826 / 3002 loss=2.701, ppl=6.5, wps=5793.7, ups=0.09, wpb=64841, bsz=128, num_updates=7779, lr=9.99458e-05, gnorm=2.191, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89102 2021-06-19 19:23:59 | INFO | train_inner | epoch 003: 1827 / 3002 loss=2.582, ppl=5.99, wps=5982.7, ups=0.09, wpb=64827, bsz=128, num_updates=7780, lr=9.99458e-05, gnorm=2.316, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89113 2021-06-19 19:24:10 | INFO | train_inner | epoch 003: 1828 / 3002 loss=2.69, ppl=6.45, wps=5729.4, ups=0.09, wpb=64805, bsz=128, num_updates=7781, lr=9.99457e-05, gnorm=2.09, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89124 2021-06-19 19:24:21 | INFO | train_inner | epoch 003: 1829 / 3002 loss=2.527, ppl=5.76, wps=5885.8, ups=0.09, wpb=64909, bsz=128, num_updates=7782, lr=9.99457e-05, gnorm=2.196, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89135 2021-06-19 19:24:32 | INFO | train_inner | epoch 003: 1830 / 3002 loss=2.619, ppl=6.14, wps=5803.3, ups=0.09, wpb=64882, bsz=128, num_updates=7783, lr=9.99457e-05, gnorm=2.181, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89147 2021-06-19 19:24:43 | INFO | train_inner | epoch 003: 1831 / 3002 loss=2.612, ppl=6.11, wps=5926, ups=0.09, wpb=64842, bsz=128, num_updates=7784, lr=9.99457e-05, gnorm=2.23, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89158 2021-06-19 19:24:54 | INFO | train_inner | epoch 003: 1832 / 3002 loss=2.74, ppl=6.68, wps=5923.9, ups=0.09, wpb=64790, bsz=128, num_updates=7785, lr=9.99457e-05, gnorm=2.217, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89168 2021-06-19 19:25:05 | INFO | train_inner | epoch 003: 1833 / 3002 loss=2.476, ppl=5.56, wps=5766.4, ups=0.09, wpb=64905, bsz=128, num_updates=7786, lr=9.99457e-05, gnorm=2.157, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89180 2021-06-19 19:25:17 | INFO | train_inner | epoch 003: 1834 / 3002 loss=2.677, ppl=6.39, wps=5778.7, ups=0.09, wpb=64793, bsz=128, num_updates=7787, lr=9.99457e-05, gnorm=2.13, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89191 2021-06-19 19:25:28 | INFO | train_inner | epoch 003: 1835 / 3002 loss=2.619, ppl=6.14, wps=5881.1, ups=0.09, wpb=64912, bsz=128, num_updates=7788, lr=9.99457e-05, gnorm=2.168, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89202 2021-06-19 19:25:39 | INFO | train_inner | epoch 003: 1836 / 3002 loss=2.61, ppl=6.11, wps=5807.7, ups=0.09, wpb=64867, bsz=128, num_updates=7789, lr=9.99457e-05, gnorm=2.166, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89213 2021-06-19 19:25:50 | INFO | train_inner | epoch 003: 1837 / 3002 loss=2.657, ppl=6.31, wps=5910.7, ups=0.09, wpb=64812, bsz=128, num_updates=7790, lr=9.99457e-05, gnorm=2.339, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89224 2021-06-19 19:26:01 | INFO | train_inner | epoch 003: 1838 / 3002 loss=2.601, ppl=6.07, wps=5873, ups=0.09, wpb=64767, bsz=128, num_updates=7791, lr=9.99457e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89235 2021-06-19 19:26:12 | INFO | train_inner | epoch 003: 1839 / 3002 loss=2.616, ppl=6.13, wps=5800.1, ups=0.09, wpb=64801, bsz=128, num_updates=7792, lr=9.99457e-05, gnorm=2.111, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89246 2021-06-19 19:26:23 | INFO | train_inner | epoch 003: 1840 / 3002 loss=2.58, ppl=5.98, wps=5939.1, ups=0.09, wpb=64817, bsz=128, num_updates=7793, lr=9.99457e-05, gnorm=2.149, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89257 2021-06-19 19:26:34 | INFO | train_inner | epoch 003: 1841 / 3002 loss=2.655, ppl=6.3, wps=5760.6, ups=0.09, wpb=64756, bsz=128, num_updates=7794, lr=9.99456e-05, gnorm=2.229, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89268 2021-06-19 19:26:45 | INFO | train_inner | epoch 003: 1842 / 3002 loss=2.498, ppl=5.65, wps=5813.7, ups=0.09, wpb=64806, bsz=128, num_updates=7795, lr=9.99456e-05, gnorm=2.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89280 2021-06-19 19:26:57 | INFO | train_inner | epoch 003: 1843 / 3002 loss=2.612, ppl=6.11, wps=5743.7, ups=0.09, wpb=64912, bsz=128, num_updates=7796, lr=9.99456e-05, gnorm=2.051, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89291 2021-06-19 19:27:08 | INFO | train_inner | epoch 003: 1844 / 3002 loss=2.616, ppl=6.13, wps=5795.7, ups=0.09, wpb=64729, bsz=128, num_updates=7797, lr=9.99456e-05, gnorm=2.21, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89302 2021-06-19 19:27:19 | INFO | train_inner | epoch 003: 1845 / 3002 loss=2.61, ppl=6.11, wps=5748.8, ups=0.09, wpb=64837, bsz=128, num_updates=7798, lr=9.99456e-05, gnorm=2.162, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89313 2021-06-19 19:27:30 | INFO | train_inner | epoch 003: 1846 / 3002 loss=2.755, ppl=6.75, wps=5814.4, ups=0.09, wpb=64730, bsz=128, num_updates=7799, lr=9.99456e-05, gnorm=2.331, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89325 2021-06-19 19:27:41 | INFO | train_inner | epoch 003: 1847 / 3002 loss=2.644, ppl=6.25, wps=5913.3, ups=0.09, wpb=64878, bsz=128, num_updates=7800, lr=9.99456e-05, gnorm=2.217, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89335 2021-06-19 19:27:52 | INFO | train_inner | epoch 003: 1848 / 3002 loss=2.546, ppl=5.84, wps=5861.9, ups=0.09, wpb=64902, bsz=128, num_updates=7801, lr=9.99456e-05, gnorm=5.917, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89347 2021-06-19 19:28:03 | INFO | train_inner | epoch 003: 1849 / 3002 loss=2.583, ppl=5.99, wps=5953.4, ups=0.09, wpb=64794, bsz=128, num_updates=7802, lr=9.99456e-05, gnorm=2.802, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89357 2021-06-19 19:28:14 | INFO | train_inner | epoch 003: 1850 / 3002 loss=2.505, ppl=5.68, wps=5748, ups=0.09, wpb=64788, bsz=128, num_updates=7803, lr=9.99456e-05, gnorm=2.652, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89369 2021-06-19 19:28:25 | INFO | train_inner | epoch 003: 1851 / 3002 loss=2.509, ppl=5.69, wps=5853.2, ups=0.09, wpb=64817, bsz=128, num_updates=7804, lr=9.99456e-05, gnorm=2.675, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89380 2021-06-19 19:28:36 | INFO | train_inner | epoch 003: 1852 / 3002 loss=2.494, ppl=5.63, wps=5899, ups=0.09, wpb=64795, bsz=128, num_updates=7805, lr=9.99456e-05, gnorm=2.204, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89391 2021-06-19 19:28:47 | INFO | train_inner | epoch 003: 1853 / 3002 loss=2.596, ppl=6.05, wps=5902.1, ups=0.09, wpb=64856, bsz=128, num_updates=7806, lr=9.99455e-05, gnorm=2.103, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89402 2021-06-19 19:28:58 | INFO | train_inner | epoch 003: 1854 / 3002 loss=2.607, ppl=6.09, wps=5897.3, ups=0.09, wpb=64840, bsz=128, num_updates=7807, lr=9.99455e-05, gnorm=2.091, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89413 2021-06-19 19:29:09 | INFO | train_inner | epoch 003: 1855 / 3002 loss=2.578, ppl=5.97, wps=5877.5, ups=0.09, wpb=64893, bsz=128, num_updates=7808, lr=9.99455e-05, gnorm=2, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89424 2021-06-19 19:29:21 | INFO | train_inner | epoch 003: 1856 / 3002 loss=2.701, ppl=6.5, wps=5857.2, ups=0.09, wpb=64895, bsz=128, num_updates=7809, lr=9.99455e-05, gnorm=2.062, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89435 2021-06-19 19:29:32 | INFO | train_inner | epoch 003: 1857 / 3002 loss=2.624, ppl=6.16, wps=5893.7, ups=0.09, wpb=64794, bsz=128, num_updates=7810, lr=9.99455e-05, gnorm=2.139, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89446 2021-06-19 19:29:43 | INFO | train_inner | epoch 003: 1858 / 3002 loss=2.55, ppl=5.85, wps=5841.2, ups=0.09, wpb=64826, bsz=128, num_updates=7811, lr=9.99455e-05, gnorm=2.227, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89457 2021-06-19 19:29:54 | INFO | train_inner | epoch 003: 1859 / 3002 loss=2.621, ppl=6.15, wps=5893.1, ups=0.09, wpb=64851, bsz=128, num_updates=7812, lr=9.99455e-05, gnorm=2.206, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89468 2021-06-19 19:30:05 | INFO | train_inner | epoch 003: 1860 / 3002 loss=2.364, ppl=5.15, wps=5920.9, ups=0.09, wpb=64821, bsz=128, num_updates=7813, lr=9.99455e-05, gnorm=2.122, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89479 2021-06-19 19:30:16 | INFO | train_inner | epoch 003: 1861 / 3002 loss=2.608, ppl=6.1, wps=5927, ups=0.09, wpb=64843, bsz=128, num_updates=7814, lr=9.99455e-05, gnorm=7.6, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89490 2021-06-19 19:30:27 | INFO | train_inner | epoch 003: 1862 / 3002 loss=2.679, ppl=6.4, wps=5716.6, ups=0.09, wpb=64852, bsz=128, num_updates=7815, lr=9.99455e-05, gnorm=2.119, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89501 2021-06-19 19:30:38 | INFO | train_inner | epoch 003: 1863 / 3002 loss=2.582, ppl=5.99, wps=5985.8, ups=0.09, wpb=64864, bsz=128, num_updates=7816, lr=9.99455e-05, gnorm=2.793, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89512 2021-06-19 19:30:49 | INFO | train_inner | epoch 003: 1864 / 3002 loss=2.495, ppl=5.64, wps=5781.9, ups=0.09, wpb=64824, bsz=128, num_updates=7817, lr=9.99455e-05, gnorm=2.058, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89523 2021-06-19 19:31:00 | INFO | train_inner | epoch 003: 1865 / 3002 loss=2.644, ppl=6.25, wps=5848, ups=0.09, wpb=64826, bsz=128, num_updates=7818, lr=9.99455e-05, gnorm=2.284, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89534 2021-06-19 19:31:11 | INFO | train_inner | epoch 003: 1866 / 3002 loss=2.638, ppl=6.23, wps=5948.1, ups=0.09, wpb=64838, bsz=128, num_updates=7819, lr=9.99454e-05, gnorm=2.141, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89545 2021-06-19 19:31:22 | INFO | train_inner | epoch 003: 1867 / 3002 loss=2.673, ppl=6.38, wps=5906.8, ups=0.09, wpb=64884, bsz=128, num_updates=7820, lr=9.99454e-05, gnorm=2.093, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89556 2021-06-19 19:31:33 | INFO | train_inner | epoch 003: 1868 / 3002 loss=2.495, ppl=5.64, wps=5776.8, ups=0.09, wpb=64859, bsz=128, num_updates=7821, lr=9.99454e-05, gnorm=2.067, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89567 2021-06-19 19:31:44 | INFO | train_inner | epoch 003: 1869 / 3002 loss=2.722, ppl=6.6, wps=5867.6, ups=0.09, wpb=64713, bsz=128, num_updates=7822, lr=9.99454e-05, gnorm=2.169, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89578 2021-06-19 19:31:55 | INFO | train_inner | epoch 003: 1870 / 3002 loss=2.499, ppl=5.65, wps=5876.5, ups=0.09, wpb=64908, bsz=128, num_updates=7823, lr=9.99454e-05, gnorm=2.18, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89590 2021-06-19 19:32:06 | INFO | train_inner | epoch 003: 1871 / 3002 loss=2.581, ppl=5.98, wps=5853.7, ups=0.09, wpb=64955, bsz=128, num_updates=7824, lr=9.99454e-05, gnorm=2.172, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89601 2021-06-19 19:32:17 | INFO | train_inner | epoch 003: 1872 / 3002 loss=2.495, ppl=5.64, wps=5840.2, ups=0.09, wpb=64855, bsz=128, num_updates=7825, lr=9.99454e-05, gnorm=2.122, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89612 2021-06-19 19:32:28 | INFO | train_inner | epoch 003: 1873 / 3002 loss=2.814, ppl=7.03, wps=5911, ups=0.09, wpb=64875, bsz=128, num_updates=7826, lr=9.99454e-05, gnorm=2.317, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89623 2021-06-19 19:32:39 | INFO | train_inner | epoch 003: 1874 / 3002 loss=2.65, ppl=6.28, wps=5838, ups=0.09, wpb=64890, bsz=128, num_updates=7827, lr=9.99454e-05, gnorm=2.117, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89634 2021-06-19 19:32:51 | INFO | train_inner | epoch 003: 1875 / 3002 loss=2.543, ppl=5.83, wps=5754.1, ups=0.09, wpb=64822, bsz=128, num_updates=7828, lr=9.99454e-05, gnorm=2.17, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89645 2021-06-19 19:33:02 | INFO | train_inner | epoch 003: 1876 / 3002 loss=2.748, ppl=6.72, wps=5982.2, ups=0.09, wpb=64933, bsz=128, num_updates=7829, lr=9.99454e-05, gnorm=2.098, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89656 2021-06-19 19:33:13 | INFO | train_inner | epoch 003: 1877 / 3002 loss=2.582, ppl=5.99, wps=5858.7, ups=0.09, wpb=64856, bsz=128, num_updates=7830, lr=9.99454e-05, gnorm=2.579, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89667 2021-06-19 19:33:24 | INFO | train_inner | epoch 003: 1878 / 3002 loss=2.554, ppl=5.87, wps=5838.5, ups=0.09, wpb=64769, bsz=128, num_updates=7831, lr=9.99453e-05, gnorm=2.061, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89678 2021-06-19 19:33:35 | INFO | train_inner | epoch 003: 1879 / 3002 loss=2.465, ppl=5.52, wps=5939.2, ups=0.09, wpb=64745, bsz=128, num_updates=7832, lr=9.99453e-05, gnorm=2.128, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89689 2021-06-19 19:33:46 | INFO | train_inner | epoch 003: 1880 / 3002 loss=2.559, ppl=5.89, wps=5956.3, ups=0.09, wpb=64919, bsz=128, num_updates=7833, lr=9.99453e-05, gnorm=3.718, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89700 2021-06-19 19:33:57 | INFO | train_inner | epoch 003: 1881 / 3002 loss=2.642, ppl=6.24, wps=5797.4, ups=0.09, wpb=64763, bsz=128, num_updates=7834, lr=9.99453e-05, gnorm=2.115, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89711 2021-06-19 19:34:08 | INFO | train_inner | epoch 003: 1882 / 3002 loss=2.654, ppl=6.29, wps=5737.8, ups=0.09, wpb=64802, bsz=128, num_updates=7835, lr=9.99453e-05, gnorm=2.225, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89722 2021-06-19 19:34:19 | INFO | train_inner | epoch 003: 1883 / 3002 loss=2.631, ppl=6.2, wps=5886.4, ups=0.09, wpb=64827, bsz=128, num_updates=7836, lr=9.99453e-05, gnorm=2.398, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89733 2021-06-19 19:34:30 | INFO | train_inner | epoch 003: 1884 / 3002 loss=2.675, ppl=6.39, wps=5730.2, ups=0.09, wpb=64779, bsz=128, num_updates=7837, lr=9.99453e-05, gnorm=2.445, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89745 2021-06-19 19:34:41 | INFO | train_inner | epoch 003: 1885 / 3002 loss=2.711, ppl=6.55, wps=5873.8, ups=0.09, wpb=64852, bsz=128, num_updates=7838, lr=9.99453e-05, gnorm=7.802, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89756 2021-06-19 19:34:53 | INFO | train_inner | epoch 003: 1886 / 3002 loss=2.525, ppl=5.76, wps=5723.7, ups=0.09, wpb=64820, bsz=128, num_updates=7839, lr=9.99453e-05, gnorm=2.276, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89767 2021-06-19 19:35:04 | INFO | train_inner | epoch 003: 1887 / 3002 loss=2.813, ppl=7.03, wps=5888.3, ups=0.09, wpb=64802, bsz=128, num_updates=7840, lr=9.99453e-05, gnorm=2.104, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89778 2021-06-19 19:35:15 | INFO | train_inner | epoch 003: 1888 / 3002 loss=2.552, ppl=5.86, wps=5791.4, ups=0.09, wpb=64853, bsz=128, num_updates=7841, lr=9.99453e-05, gnorm=6.057, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89789 2021-06-19 19:35:26 | INFO | train_inner | epoch 003: 1889 / 3002 loss=2.768, ppl=6.81, wps=5972.7, ups=0.09, wpb=64865, bsz=128, num_updates=7842, lr=9.99453e-05, gnorm=2.321, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89800 2021-06-19 19:35:37 | INFO | train_inner | epoch 003: 1890 / 3002 loss=2.641, ppl=6.24, wps=5883.9, ups=0.09, wpb=64932, bsz=128, num_updates=7843, lr=9.99453e-05, gnorm=3.36, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89811 2021-06-19 19:35:48 | INFO | train_inner | epoch 003: 1891 / 3002 loss=2.722, ppl=6.6, wps=5733.6, ups=0.09, wpb=64860, bsz=128, num_updates=7844, lr=9.99452e-05, gnorm=2.122, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89822 2021-06-19 19:35:59 | INFO | train_inner | epoch 003: 1892 / 3002 loss=2.516, ppl=5.72, wps=5787.5, ups=0.09, wpb=64840, bsz=128, num_updates=7845, lr=9.99452e-05, gnorm=2.158, loss_scale=1, train_wall=11, gb_free=2.8, wall=89834 2021-06-19 19:36:10 | INFO | train_inner | epoch 003: 1893 / 3002 loss=2.666, ppl=6.35, wps=5817.2, ups=0.09, wpb=64787, bsz=128, num_updates=7846, lr=9.99452e-05, gnorm=2.143, loss_scale=1, train_wall=11, gb_free=2.8, wall=89845 2021-06-19 19:36:22 | INFO | train_inner | epoch 003: 1894 / 3002 loss=2.485, ppl=5.6, wps=5830, ups=0.09, wpb=64850, bsz=128, num_updates=7847, lr=9.99452e-05, gnorm=2.094, loss_scale=1, train_wall=11, gb_free=2.8, wall=89856 2021-06-19 19:36:33 | INFO | train_inner | epoch 003: 1895 / 3002 loss=2.76, ppl=6.78, wps=5822.2, ups=0.09, wpb=64851, bsz=128, num_updates=7848, lr=9.99452e-05, gnorm=2.132, loss_scale=1, train_wall=11, gb_free=2.8, wall=89867 2021-06-19 19:36:44 | INFO | train_inner | epoch 003: 1896 / 3002 loss=2.662, ppl=6.33, wps=5878.6, ups=0.09, wpb=64845, bsz=128, num_updates=7849, lr=9.99452e-05, gnorm=2.034, loss_scale=1, train_wall=11, gb_free=2.8, wall=89878 2021-06-19 19:36:55 | INFO | train_inner | epoch 003: 1897 / 3002 loss=2.502, ppl=5.66, wps=5998.6, ups=0.09, wpb=64840, bsz=128, num_updates=7850, lr=9.99452e-05, gnorm=2.101, loss_scale=1, train_wall=10, gb_free=2.8, wall=89889 2021-06-19 19:37:06 | INFO | train_inner | epoch 003: 1898 / 3002 loss=2.647, ppl=6.27, wps=5844.9, ups=0.09, wpb=64775, bsz=128, num_updates=7851, lr=9.99452e-05, gnorm=9.615, loss_scale=1, train_wall=11, gb_free=2.8, wall=89900 2021-06-19 19:37:17 | INFO | train_inner | epoch 003: 1899 / 3002 loss=2.582, ppl=5.99, wps=5841.9, ups=0.09, wpb=64903, bsz=128, num_updates=7852, lr=9.99452e-05, gnorm=2.559, loss_scale=1, train_wall=11, gb_free=2.8, wall=89911 2021-06-19 19:37:28 | INFO | train_inner | epoch 003: 1900 / 3002 loss=2.639, ppl=6.23, wps=5694.9, ups=0.09, wpb=64781, bsz=128, num_updates=7853, lr=9.99452e-05, gnorm=2.181, loss_scale=1, train_wall=11, gb_free=2.8, wall=89922 2021-06-19 19:37:39 | INFO | train_inner | epoch 003: 1901 / 3002 loss=2.705, ppl=6.52, wps=5845.4, ups=0.09, wpb=64826, bsz=128, num_updates=7854, lr=9.99452e-05, gnorm=2.13, loss_scale=1, train_wall=11, gb_free=2.8, wall=89934 2021-06-19 19:37:50 | INFO | train_inner | epoch 003: 1902 / 3002 loss=2.551, ppl=5.86, wps=5749.9, ups=0.09, wpb=64753, bsz=128, num_updates=7855, lr=9.99452e-05, gnorm=2.052, loss_scale=1, train_wall=11, gb_free=2.8, wall=89945 2021-06-19 19:38:02 | INFO | train_inner | epoch 003: 1903 / 3002 loss=2.561, ppl=5.9, wps=5754.5, ups=0.09, wpb=64845, bsz=128, num_updates=7856, lr=9.99451e-05, gnorm=2.133, loss_scale=1, train_wall=11, gb_free=2.8, wall=89956 2021-06-19 19:38:13 | INFO | train_inner | epoch 003: 1904 / 3002 loss=2.709, ppl=6.54, wps=5948, ups=0.09, wpb=64863, bsz=128, num_updates=7857, lr=9.99451e-05, gnorm=2.211, loss_scale=1, train_wall=10, gb_free=2.8, wall=89967 2021-06-19 19:38:24 | INFO | train_inner | epoch 003: 1905 / 3002 loss=2.562, ppl=5.91, wps=5729.4, ups=0.09, wpb=64818, bsz=128, num_updates=7858, lr=9.99451e-05, gnorm=2.133, loss_scale=1, train_wall=11, gb_free=2.8, wall=89978 2021-06-19 19:38:35 | INFO | train_inner | epoch 003: 1906 / 3002 loss=2.707, ppl=6.53, wps=5770.8, ups=0.09, wpb=64773, bsz=128, num_updates=7859, lr=9.99451e-05, gnorm=2.137, loss_scale=1, train_wall=11, gb_free=2.8, wall=89990 2021-06-19 19:38:46 | INFO | train_inner | epoch 003: 1907 / 3002 loss=2.582, ppl=5.99, wps=5899.9, ups=0.09, wpb=64804, bsz=128, num_updates=7860, lr=9.99451e-05, gnorm=2.101, loss_scale=1, train_wall=11, gb_free=2.8, wall=90001 2021-06-19 19:38:57 | INFO | train_inner | epoch 003: 1908 / 3002 loss=2.631, ppl=6.19, wps=5786.5, ups=0.09, wpb=64790, bsz=128, num_updates=7861, lr=9.99451e-05, gnorm=2.133, loss_scale=1, train_wall=11, gb_free=2.8, wall=90012 2021-06-19 19:39:09 | INFO | train_inner | epoch 003: 1909 / 3002 loss=2.559, ppl=5.89, wps=5774.4, ups=0.09, wpb=64826, bsz=128, num_updates=7862, lr=9.99451e-05, gnorm=2.123, loss_scale=1, train_wall=11, gb_free=2.8, wall=90023 2021-06-19 19:39:20 | INFO | train_inner | epoch 003: 1910 / 3002 loss=2.602, ppl=6.07, wps=5823.4, ups=0.09, wpb=64835, bsz=128, num_updates=7863, lr=9.99451e-05, gnorm=2.126, loss_scale=1, train_wall=11, gb_free=2.8, wall=90034 2021-06-19 19:39:31 | INFO | train_inner | epoch 003: 1911 / 3002 loss=2.669, ppl=6.36, wps=5874.1, ups=0.09, wpb=64812, bsz=128, num_updates=7864, lr=9.99451e-05, gnorm=2.05, loss_scale=1, train_wall=11, gb_free=2.8, wall=90045 2021-06-19 19:39:42 | INFO | train_inner | epoch 003: 1912 / 3002 loss=2.59, ppl=6.02, wps=5937.3, ups=0.09, wpb=64853, bsz=128, num_updates=7865, lr=9.99451e-05, gnorm=2.189, loss_scale=1, train_wall=10, gb_free=2.8, wall=90056 2021-06-19 19:39:53 | INFO | train_inner | epoch 003: 1913 / 3002 loss=2.745, ppl=6.7, wps=5863.5, ups=0.09, wpb=64828, bsz=128, num_updates=7866, lr=9.99451e-05, gnorm=3.818, loss_scale=1, train_wall=11, gb_free=2.8, wall=90067 2021-06-19 19:40:04 | INFO | train_inner | epoch 003: 1914 / 3002 loss=2.699, ppl=6.49, wps=5892.1, ups=0.09, wpb=64799, bsz=128, num_updates=7867, lr=9.99451e-05, gnorm=2.279, loss_scale=1, train_wall=11, gb_free=2.8, wall=90078 2021-06-19 19:40:15 | INFO | train_inner | epoch 003: 1915 / 3002 loss=2.631, ppl=6.2, wps=5821, ups=0.09, wpb=64809, bsz=128, num_updates=7868, lr=9.99451e-05, gnorm=2.348, loss_scale=1, train_wall=11, gb_free=2.8, wall=90089 2021-06-19 19:40:26 | INFO | train_inner | epoch 003: 1916 / 3002 loss=2.714, ppl=6.56, wps=5848, ups=0.09, wpb=64831, bsz=128, num_updates=7869, lr=9.9945e-05, gnorm=2.221, loss_scale=1, train_wall=11, gb_free=2.8, wall=90100 2021-06-19 19:40:37 | INFO | train_inner | epoch 003: 1917 / 3002 loss=2.631, ppl=6.19, wps=5855.4, ups=0.09, wpb=64851, bsz=128, num_updates=7870, lr=9.9945e-05, gnorm=2.158, loss_scale=1, train_wall=11, gb_free=2.8, wall=90111 2021-06-19 19:40:48 | INFO | train_inner | epoch 003: 1918 / 3002 loss=2.542, ppl=5.82, wps=5812.1, ups=0.09, wpb=64887, bsz=128, num_updates=7871, lr=9.9945e-05, gnorm=2.642, loss_scale=1, train_wall=11, gb_free=2.8, wall=90123 2021-06-19 19:40:59 | INFO | train_inner | epoch 003: 1919 / 3002 loss=2.717, ppl=6.58, wps=5872.4, ups=0.09, wpb=64817, bsz=128, num_updates=7872, lr=9.9945e-05, gnorm=2.209, loss_scale=1, train_wall=11, gb_free=2.8, wall=90134 2021-06-19 19:41:10 | INFO | train_inner | epoch 003: 1920 / 3002 loss=2.498, ppl=5.65, wps=5955.9, ups=0.09, wpb=64776, bsz=128, num_updates=7873, lr=9.9945e-05, gnorm=2.528, loss_scale=1, train_wall=10, gb_free=2.8, wall=90144 2021-06-19 19:41:21 | INFO | train_inner | epoch 003: 1921 / 3002 loss=2.855, ppl=7.23, wps=5853.8, ups=0.09, wpb=64822, bsz=128, num_updates=7874, lr=9.9945e-05, gnorm=2.144, loss_scale=1, train_wall=11, gb_free=2.8, wall=90156 2021-06-19 19:41:32 | INFO | train_inner | epoch 003: 1922 / 3002 loss=2.595, ppl=6.04, wps=5889.2, ups=0.09, wpb=64823, bsz=128, num_updates=7875, lr=9.9945e-05, gnorm=2.175, loss_scale=1, train_wall=11, gb_free=2.8, wall=90167 2021-06-19 19:41:43 | INFO | train_inner | epoch 003: 1923 / 3002 loss=2.69, ppl=6.46, wps=5814.8, ups=0.09, wpb=64878, bsz=128, num_updates=7876, lr=9.9945e-05, gnorm=2.233, loss_scale=1, train_wall=11, gb_free=2.8, wall=90178 2021-06-19 19:41:55 | INFO | train_inner | epoch 003: 1924 / 3002 loss=2.539, ppl=5.81, wps=5800.3, ups=0.09, wpb=64871, bsz=128, num_updates=7877, lr=9.9945e-05, gnorm=2.667, loss_scale=1, train_wall=11, gb_free=2.8, wall=90189 2021-06-19 19:42:06 | INFO | train_inner | epoch 003: 1925 / 3002 loss=2.81, ppl=7.01, wps=5805.7, ups=0.09, wpb=64799, bsz=128, num_updates=7878, lr=9.9945e-05, gnorm=2.172, loss_scale=1, train_wall=11, gb_free=2.8, wall=90200 2021-06-19 19:42:17 | INFO | train_inner | epoch 003: 1926 / 3002 loss=2.567, ppl=5.92, wps=5818.5, ups=0.09, wpb=64743, bsz=128, num_updates=7879, lr=9.9945e-05, gnorm=2.361, loss_scale=1, train_wall=11, gb_free=2.8, wall=90211 2021-06-19 19:42:28 | INFO | train_inner | epoch 003: 1927 / 3002 loss=2.732, ppl=6.64, wps=5903.7, ups=0.09, wpb=64831, bsz=128, num_updates=7880, lr=9.9945e-05, gnorm=6.456, loss_scale=1, train_wall=11, gb_free=2.8, wall=90222 2021-06-19 19:42:39 | INFO | train_inner | epoch 003: 1928 / 3002 loss=2.593, ppl=6.03, wps=5827.6, ups=0.09, wpb=64907, bsz=128, num_updates=7881, lr=9.99449e-05, gnorm=2.14, loss_scale=1, train_wall=11, gb_free=2.8, wall=90233 2021-06-19 19:42:50 | INFO | train_inner | epoch 003: 1929 / 3002 loss=2.65, ppl=6.28, wps=5921, ups=0.09, wpb=64845, bsz=128, num_updates=7882, lr=9.99449e-05, gnorm=2.146, loss_scale=1, train_wall=10, gb_free=2.8, wall=90244 2021-06-19 19:43:01 | INFO | train_inner | epoch 003: 1930 / 3002 loss=2.747, ppl=6.71, wps=5772.5, ups=0.09, wpb=64815, bsz=128, num_updates=7883, lr=9.99449e-05, gnorm=2.497, loss_scale=1, train_wall=11, gb_free=2.8, wall=90255 2021-06-19 19:43:12 | INFO | train_inner | epoch 003: 1931 / 3002 loss=2.716, ppl=6.57, wps=5894.3, ups=0.09, wpb=64923, bsz=128, num_updates=7884, lr=9.99449e-05, gnorm=2.145, loss_scale=1, train_wall=11, gb_free=2.8, wall=90266 2021-06-19 19:43:23 | INFO | train_inner | epoch 003: 1932 / 3002 loss=2.5, ppl=5.66, wps=5810.9, ups=0.09, wpb=64766, bsz=128, num_updates=7885, lr=9.99449e-05, gnorm=2.194, loss_scale=1, train_wall=11, gb_free=2.8, wall=90278 2021-06-19 19:43:34 | INFO | train_inner | epoch 003: 1933 / 3002 loss=2.635, ppl=6.21, wps=5877.7, ups=0.09, wpb=64842, bsz=128, num_updates=7886, lr=9.99449e-05, gnorm=2.039, loss_scale=1, train_wall=11, gb_free=2.8, wall=90289 2021-06-19 19:43:46 | INFO | train_inner | epoch 003: 1934 / 3002 loss=2.673, ppl=6.38, wps=5771, ups=0.09, wpb=64818, bsz=128, num_updates=7887, lr=9.99449e-05, gnorm=2.055, loss_scale=1, train_wall=11, gb_free=2.8, wall=90300 2021-06-19 19:43:56 | INFO | train_inner | epoch 003: 1935 / 3002 loss=2.659, ppl=6.32, wps=6012.6, ups=0.09, wpb=64897, bsz=128, num_updates=7888, lr=9.99449e-05, gnorm=2.11, loss_scale=1, train_wall=10, gb_free=2.8, wall=90311 2021-06-19 19:44:07 | INFO | train_inner | epoch 003: 1936 / 3002 loss=2.479, ppl=5.57, wps=5914.1, ups=0.09, wpb=64770, bsz=128, num_updates=7889, lr=9.99449e-05, gnorm=2.287, loss_scale=1, train_wall=10, gb_free=2.8, wall=90322 2021-06-19 19:44:18 | INFO | train_inner | epoch 003: 1937 / 3002 loss=2.61, ppl=6.11, wps=5816.1, ups=0.09, wpb=64870, bsz=128, num_updates=7890, lr=9.99449e-05, gnorm=2.147, loss_scale=1, train_wall=11, gb_free=2.8, wall=90333 2021-06-19 19:44:30 | INFO | train_inner | epoch 003: 1938 / 3002 loss=2.6, ppl=6.06, wps=5786.4, ups=0.09, wpb=64905, bsz=128, num_updates=7891, lr=9.99449e-05, gnorm=2.137, loss_scale=1, train_wall=11, gb_free=2.8, wall=90344 2021-06-19 19:44:41 | INFO | train_inner | epoch 003: 1939 / 3002 loss=2.614, ppl=6.12, wps=5832.4, ups=0.09, wpb=64811, bsz=128, num_updates=7892, lr=9.99449e-05, gnorm=2.189, loss_scale=1, train_wall=11, gb_free=2.8, wall=90355 2021-06-19 19:44:52 | INFO | train_inner | epoch 003: 1940 / 3002 loss=2.6, ppl=6.06, wps=5888.5, ups=0.09, wpb=64772, bsz=128, num_updates=7893, lr=9.99449e-05, gnorm=6.39, loss_scale=1, train_wall=11, gb_free=2.8, wall=90366 2021-06-19 19:45:03 | INFO | train_inner | epoch 003: 1941 / 3002 loss=2.69, ppl=6.45, wps=5843.2, ups=0.09, wpb=64841, bsz=128, num_updates=7894, lr=9.99448e-05, gnorm=2.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=90377 2021-06-19 19:45:14 | INFO | train_inner | epoch 003: 1942 / 3002 loss=2.624, ppl=6.16, wps=5885.3, ups=0.09, wpb=64762, bsz=128, num_updates=7895, lr=9.99448e-05, gnorm=2.242, loss_scale=1, train_wall=11, gb_free=2.8, wall=90388 2021-06-19 19:45:25 | INFO | train_inner | epoch 003: 1943 / 3002 loss=2.659, ppl=6.32, wps=5840.4, ups=0.09, wpb=64849, bsz=128, num_updates=7896, lr=9.99448e-05, gnorm=2.168, loss_scale=1, train_wall=11, gb_free=2.8, wall=90399 2021-06-19 19:45:36 | INFO | train_inner | epoch 003: 1944 / 3002 loss=2.605, ppl=6.08, wps=5894.2, ups=0.09, wpb=64771, bsz=128, num_updates=7897, lr=9.99448e-05, gnorm=2.156, loss_scale=1, train_wall=11, gb_free=2.8, wall=90410 2021-06-19 19:45:47 | INFO | train_inner | epoch 003: 1945 / 3002 loss=2.527, ppl=5.76, wps=5918, ups=0.09, wpb=64764, bsz=128, num_updates=7898, lr=9.99448e-05, gnorm=2.02, loss_scale=1, train_wall=10, gb_free=2.8, wall=90421 2021-06-19 19:45:58 | INFO | train_inner | epoch 003: 1946 / 3002 loss=2.747, ppl=6.71, wps=5758.8, ups=0.09, wpb=64774, bsz=128, num_updates=7899, lr=9.99448e-05, gnorm=2.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=90433 2021-06-19 19:46:09 | INFO | train_inner | epoch 003: 1947 / 3002 loss=2.64, ppl=6.23, wps=5909.6, ups=0.09, wpb=64781, bsz=128, num_updates=7900, lr=9.99448e-05, gnorm=2.089, loss_scale=1, train_wall=10, gb_free=2.8, wall=90443 2021-06-19 19:46:20 | INFO | train_inner | epoch 003: 1948 / 3002 loss=2.645, ppl=6.25, wps=6025.3, ups=0.09, wpb=64930, bsz=128, num_updates=7901, lr=9.99448e-05, gnorm=2.142, loss_scale=1, train_wall=10, gb_free=2.8, wall=90454 2021-06-19 19:46:31 | INFO | train_inner | epoch 003: 1949 / 3002 loss=2.524, ppl=5.75, wps=5859.6, ups=0.09, wpb=64830, bsz=128, num_updates=7902, lr=9.99448e-05, gnorm=2.075, loss_scale=1, train_wall=11, gb_free=2.8, wall=90465 2021-06-19 19:46:42 | INFO | train_inner | epoch 003: 1950 / 3002 loss=2.655, ppl=6.3, wps=5864.1, ups=0.09, wpb=64794, bsz=128, num_updates=7903, lr=9.99448e-05, gnorm=2.158, loss_scale=1, train_wall=11, gb_free=2.8, wall=90476 2021-06-19 19:46:53 | INFO | train_inner | epoch 003: 1951 / 3002 loss=2.559, ppl=5.89, wps=5824.2, ups=0.09, wpb=64816, bsz=128, num_updates=7904, lr=9.99448e-05, gnorm=2.272, loss_scale=1, train_wall=11, gb_free=2.8, wall=90488 2021-06-19 19:47:04 | INFO | train_inner | epoch 003: 1952 / 3002 loss=2.429, ppl=5.38, wps=5794.5, ups=0.09, wpb=64852, bsz=128, num_updates=7905, lr=9.99448e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=90499 2021-06-19 19:47:15 | INFO | train_inner | epoch 003: 1953 / 3002 loss=2.741, ppl=6.68, wps=5948.3, ups=0.09, wpb=64797, bsz=128, num_updates=7906, lr=9.99447e-05, gnorm=2.238, loss_scale=1, train_wall=10, gb_free=2.8, wall=90510 2021-06-19 19:47:26 | INFO | train_inner | epoch 003: 1954 / 3002 loss=2.644, ppl=6.25, wps=5768.4, ups=0.09, wpb=64833, bsz=128, num_updates=7907, lr=9.99447e-05, gnorm=2.186, loss_scale=1, train_wall=11, gb_free=2.8, wall=90521 2021-06-19 19:47:38 | INFO | train_inner | epoch 003: 1955 / 3002 loss=2.727, ppl=6.62, wps=5721.3, ups=0.09, wpb=64810, bsz=128, num_updates=7908, lr=9.99447e-05, gnorm=2.242, loss_scale=1, train_wall=11, gb_free=2.8, wall=90532 2021-06-19 19:47:49 | INFO | train_inner | epoch 003: 1956 / 3002 loss=2.649, ppl=6.27, wps=5958.4, ups=0.09, wpb=64780, bsz=128, num_updates=7909, lr=9.99447e-05, gnorm=3.367, loss_scale=1, train_wall=10, gb_free=2.8, wall=90543 2021-06-19 19:48:00 | INFO | train_inner | epoch 003: 1957 / 3002 loss=2.608, ppl=6.1, wps=5900.3, ups=0.09, wpb=64805, bsz=128, num_updates=7910, lr=9.99447e-05, gnorm=2.207, loss_scale=1, train_wall=11, gb_free=2.8, wall=90554 2021-06-19 19:48:11 | INFO | train_inner | epoch 003: 1958 / 3002 loss=2.682, ppl=6.42, wps=5920.4, ups=0.09, wpb=64877, bsz=128, num_updates=7911, lr=9.99447e-05, gnorm=2.202, loss_scale=1, train_wall=10, gb_free=2.8, wall=90565 2021-06-19 19:48:22 | INFO | train_inner | epoch 003: 1959 / 3002 loss=2.668, ppl=6.36, wps=5791.1, ups=0.09, wpb=64756, bsz=128, num_updates=7912, lr=9.99447e-05, gnorm=2.079, loss_scale=1, train_wall=11, gb_free=2.8, wall=90576 2021-06-19 19:48:33 | INFO | train_inner | epoch 003: 1960 / 3002 loss=2.705, ppl=6.52, wps=5853.9, ups=0.09, wpb=64887, bsz=128, num_updates=7913, lr=9.99447e-05, gnorm=2.051, loss_scale=1, train_wall=11, gb_free=2.8, wall=90587 2021-06-19 19:48:44 | INFO | train_inner | epoch 003: 1961 / 3002 loss=2.804, ppl=6.98, wps=5903.2, ups=0.09, wpb=64793, bsz=128, num_updates=7914, lr=9.99447e-05, gnorm=2.23, loss_scale=1, train_wall=11, gb_free=2.8, wall=90598 2021-06-19 19:48:55 | INFO | train_inner | epoch 003: 1962 / 3002 loss=2.643, ppl=6.25, wps=5873.7, ups=0.09, wpb=64914, bsz=128, num_updates=7915, lr=9.99447e-05, gnorm=2.129, loss_scale=1, train_wall=11, gb_free=2.8, wall=90609 2021-06-19 19:49:06 | INFO | train_inner | epoch 003: 1963 / 3002 loss=2.644, ppl=6.25, wps=5866.5, ups=0.09, wpb=64820, bsz=128, num_updates=7916, lr=9.99447e-05, gnorm=2.04, loss_scale=1, train_wall=11, gb_free=2.8, wall=90620 2021-06-19 19:49:17 | INFO | train_inner | epoch 003: 1964 / 3002 loss=2.478, ppl=5.57, wps=5854.1, ups=0.09, wpb=64822, bsz=128, num_updates=7917, lr=9.99447e-05, gnorm=2.103, loss_scale=1, train_wall=11, gb_free=2.8, wall=90631 2021-06-19 19:49:28 | INFO | train_inner | epoch 003: 1965 / 3002 loss=2.674, ppl=6.38, wps=5827, ups=0.09, wpb=64810, bsz=128, num_updates=7918, lr=9.99447e-05, gnorm=2.052, loss_scale=1, train_wall=11, gb_free=2.8, wall=90643 2021-06-19 19:49:39 | INFO | train_inner | epoch 003: 1966 / 3002 loss=2.592, ppl=6.03, wps=5838.5, ups=0.09, wpb=64887, bsz=128, num_updates=7919, lr=9.99446e-05, gnorm=2.09, loss_scale=1, train_wall=11, gb_free=2.8, wall=90654 2021-06-19 19:49:50 | INFO | train_inner | epoch 003: 1967 / 3002 loss=2.651, ppl=6.28, wps=5847.5, ups=0.09, wpb=64785, bsz=128, num_updates=7920, lr=9.99446e-05, gnorm=2.123, loss_scale=1, train_wall=11, gb_free=2.8, wall=90665 2021-06-19 19:50:02 | INFO | train_inner | epoch 003: 1968 / 3002 loss=2.6, ppl=6.06, wps=5795.7, ups=0.09, wpb=64911, bsz=128, num_updates=7921, lr=9.99446e-05, gnorm=5.107, loss_scale=1, train_wall=11, gb_free=2.8, wall=90676 2021-06-19 19:50:13 | INFO | train_inner | epoch 003: 1969 / 3002 loss=2.757, ppl=6.76, wps=5880.3, ups=0.09, wpb=64821, bsz=128, num_updates=7922, lr=9.99446e-05, gnorm=2.378, loss_scale=1, train_wall=11, gb_free=2.8, wall=90687 2021-06-19 19:50:24 | INFO | train_inner | epoch 003: 1970 / 3002 loss=2.695, ppl=6.47, wps=5869.9, ups=0.09, wpb=64583, bsz=128, num_updates=7923, lr=9.99446e-05, gnorm=2.175, loss_scale=1, train_wall=11, gb_free=2.8, wall=90698 2021-06-19 19:50:35 | INFO | train_inner | epoch 003: 1971 / 3002 loss=2.558, ppl=5.89, wps=5832.2, ups=0.09, wpb=64796, bsz=128, num_updates=7924, lr=9.99446e-05, gnorm=2.099, loss_scale=1, train_wall=11, gb_free=2.8, wall=90709 2021-06-19 19:50:46 | INFO | train_inner | epoch 003: 1972 / 3002 loss=2.657, ppl=6.31, wps=5872.9, ups=0.09, wpb=64892, bsz=128, num_updates=7925, lr=9.99446e-05, gnorm=2.06, loss_scale=1, train_wall=11, gb_free=2.8, wall=90720 2021-06-19 19:50:57 | INFO | train_inner | epoch 003: 1973 / 3002 loss=2.625, ppl=6.17, wps=5816.1, ups=0.09, wpb=64792, bsz=128, num_updates=7926, lr=9.99446e-05, gnorm=2.789, loss_scale=1, train_wall=11, gb_free=2.8, wall=90731 2021-06-19 19:51:08 | INFO | train_inner | epoch 003: 1974 / 3002 loss=2.49, ppl=5.62, wps=5716.4, ups=0.09, wpb=64859, bsz=128, num_updates=7927, lr=9.99446e-05, gnorm=2.099, loss_scale=1, train_wall=11, gb_free=2.8, wall=90743 2021-06-19 19:51:19 | INFO | train_inner | epoch 003: 1975 / 3002 loss=2.577, ppl=5.97, wps=5885.6, ups=0.09, wpb=64953, bsz=128, num_updates=7928, lr=9.99446e-05, gnorm=2.087, loss_scale=1, train_wall=11, gb_free=2.8, wall=90754 2021-06-19 19:51:30 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 2021-06-19 19:51:41 | INFO | train_inner | epoch 003: 1977 / 3002 loss=2.67, ppl=6.36, wps=2940.9, ups=0.05, wpb=64793, bsz=128, num_updates=7929, lr=9.99446e-05, gnorm=2.102, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=90776 2021-06-19 19:51:53 | INFO | train_inner | epoch 003: 1978 / 3002 loss=2.71, ppl=6.54, wps=5715.9, ups=0.09, wpb=64839, bsz=128, num_updates=7930, lr=9.99446e-05, gnorm=2.149, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90787 2021-06-19 19:52:04 | INFO | train_inner | epoch 003: 1979 / 3002 loss=2.637, ppl=6.22, wps=5811, ups=0.09, wpb=64859, bsz=128, num_updates=7931, lr=9.99445e-05, gnorm=2.101, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90798 2021-06-19 19:52:15 | INFO | train_inner | epoch 003: 1980 / 3002 loss=2.649, ppl=6.27, wps=5775.3, ups=0.09, wpb=64744, bsz=128, num_updates=7932, lr=9.99445e-05, gnorm=6.516, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90809 2021-06-19 19:52:26 | INFO | train_inner | epoch 003: 1981 / 3002 loss=2.698, ppl=6.49, wps=5848.4, ups=0.09, wpb=64891, bsz=128, num_updates=7933, lr=9.99445e-05, gnorm=2.192, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90820 2021-06-19 19:52:37 | INFO | train_inner | epoch 003: 1982 / 3002 loss=2.399, ppl=5.27, wps=5975.6, ups=0.09, wpb=64778, bsz=128, num_updates=7934, lr=9.99445e-05, gnorm=2.1, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=90831 2021-06-19 19:52:48 | INFO | train_inner | epoch 003: 1983 / 3002 loss=2.554, ppl=5.87, wps=5894.8, ups=0.09, wpb=64905, bsz=128, num_updates=7935, lr=9.99445e-05, gnorm=2.115, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90842 2021-06-19 19:52:59 | INFO | train_inner | epoch 003: 1984 / 3002 loss=2.634, ppl=6.21, wps=5783.3, ups=0.09, wpb=64834, bsz=128, num_updates=7936, lr=9.99445e-05, gnorm=2.127, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90854 2021-06-19 19:53:10 | INFO | train_inner | epoch 003: 1985 / 3002 loss=2.603, ppl=6.08, wps=5819.9, ups=0.09, wpb=64779, bsz=128, num_updates=7937, lr=9.99445e-05, gnorm=2.433, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90865 2021-06-19 19:53:21 | INFO | train_inner | epoch 003: 1986 / 3002 loss=2.624, ppl=6.17, wps=5957.3, ups=0.09, wpb=64933, bsz=128, num_updates=7938, lr=9.99445e-05, gnorm=2.093, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=90876 2021-06-19 19:53:32 | INFO | train_inner | epoch 003: 1987 / 3002 loss=2.579, ppl=5.97, wps=5831.8, ups=0.09, wpb=64776, bsz=128, num_updates=7939, lr=9.99445e-05, gnorm=2.127, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90887 2021-06-19 19:53:43 | INFO | train_inner | epoch 003: 1988 / 3002 loss=2.833, ppl=7.12, wps=5872.3, ups=0.09, wpb=64806, bsz=128, num_updates=7940, lr=9.99445e-05, gnorm=2.052, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90898 2021-06-19 19:53:54 | INFO | train_inner | epoch 003: 1989 / 3002 loss=2.66, ppl=6.32, wps=5857, ups=0.09, wpb=64848, bsz=128, num_updates=7941, lr=9.99445e-05, gnorm=2.111, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90909 2021-06-19 19:54:06 | INFO | train_inner | epoch 003: 1990 / 3002 loss=2.721, ppl=6.59, wps=5774.4, ups=0.09, wpb=64767, bsz=128, num_updates=7942, lr=9.99445e-05, gnorm=2.061, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90920 2021-06-19 19:54:17 | INFO | train_inner | epoch 003: 1991 / 3002 loss=2.616, ppl=6.13, wps=5758.8, ups=0.09, wpb=64804, bsz=128, num_updates=7943, lr=9.99445e-05, gnorm=2.444, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90931 2021-06-19 19:54:28 | INFO | train_inner | epoch 003: 1992 / 3002 loss=2.726, ppl=6.62, wps=5823.5, ups=0.09, wpb=64859, bsz=128, num_updates=7944, lr=9.99444e-05, gnorm=2.341, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90942 2021-06-19 19:54:39 | INFO | train_inner | epoch 003: 1993 / 3002 loss=2.656, ppl=6.3, wps=5827.9, ups=0.09, wpb=64823, bsz=128, num_updates=7945, lr=9.99444e-05, gnorm=2.119, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90953 2021-06-19 19:54:50 | INFO | train_inner | epoch 003: 1994 / 3002 loss=2.632, ppl=6.2, wps=5916.8, ups=0.09, wpb=64940, bsz=128, num_updates=7946, lr=9.99444e-05, gnorm=2.142, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90964 2021-06-19 19:55:01 | INFO | train_inner | epoch 003: 1995 / 3002 loss=2.783, ppl=6.88, wps=5846.8, ups=0.09, wpb=64791, bsz=128, num_updates=7947, lr=9.99444e-05, gnorm=2.253, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90976 2021-06-19 19:55:12 | INFO | train_inner | epoch 003: 1996 / 3002 loss=2.601, ppl=6.07, wps=5804.6, ups=0.09, wpb=64830, bsz=128, num_updates=7948, lr=9.99444e-05, gnorm=2.091, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90987 2021-06-19 19:55:24 | INFO | train_inner | epoch 003: 1997 / 3002 loss=2.713, ppl=6.56, wps=5729.6, ups=0.09, wpb=64831, bsz=128, num_updates=7949, lr=9.99444e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90998 2021-06-19 19:55:35 | INFO | train_inner | epoch 003: 1998 / 3002 loss=2.753, ppl=6.74, wps=5922, ups=0.09, wpb=64830, bsz=128, num_updates=7950, lr=9.99444e-05, gnorm=2.255, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91009 2021-06-19 19:55:46 | INFO | train_inner | epoch 003: 1999 / 3002 loss=2.849, ppl=7.21, wps=5846, ups=0.09, wpb=64767, bsz=128, num_updates=7951, lr=9.99444e-05, gnorm=2.456, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91020 2021-06-19 19:55:57 | INFO | train_inner | epoch 003: 2000 / 3002 loss=2.612, ppl=6.12, wps=5864.9, ups=0.09, wpb=64737, bsz=128, num_updates=7952, lr=9.99444e-05, gnorm=2.124, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91031 2021-06-19 19:56:08 | INFO | train_inner | epoch 003: 2001 / 3002 loss=2.516, ppl=5.72, wps=5907, ups=0.09, wpb=64835, bsz=128, num_updates=7953, lr=9.99444e-05, gnorm=2.055, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91042 2021-06-19 19:56:19 | INFO | train_inner | epoch 003: 2002 / 3002 loss=2.615, ppl=6.13, wps=5831.2, ups=0.09, wpb=64870, bsz=128, num_updates=7954, lr=9.99444e-05, gnorm=2.055, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91053 2021-06-19 19:56:30 | INFO | train_inner | epoch 003: 2003 / 3002 loss=2.899, ppl=7.46, wps=5860.5, ups=0.09, wpb=64701, bsz=128, num_updates=7955, lr=9.99444e-05, gnorm=2.142, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91064 2021-06-19 19:56:41 | INFO | train_inner | epoch 003: 2004 / 3002 loss=2.748, ppl=6.72, wps=5852.7, ups=0.09, wpb=64805, bsz=128, num_updates=7956, lr=9.99443e-05, gnorm=2.11, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91075 2021-06-19 19:56:52 | INFO | train_inner | epoch 003: 2005 / 3002 loss=2.565, ppl=5.92, wps=5866.6, ups=0.09, wpb=64809, bsz=128, num_updates=7957, lr=9.99443e-05, gnorm=2.061, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91086 2021-06-19 19:57:03 | INFO | train_inner | epoch 003: 2006 / 3002 loss=2.523, ppl=5.75, wps=5907.9, ups=0.09, wpb=64853, bsz=128, num_updates=7958, lr=9.99443e-05, gnorm=91.847, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91097 2021-06-19 19:57:14 | INFO | train_inner | epoch 003: 2007 / 3002 loss=2.5, ppl=5.66, wps=5894.6, ups=0.09, wpb=64839, bsz=128, num_updates=7959, lr=9.99443e-05, gnorm=2.072, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91108 2021-06-19 19:57:25 | INFO | train_inner | epoch 003: 2008 / 3002 loss=2.631, ppl=6.19, wps=5857.7, ups=0.09, wpb=64819, bsz=128, num_updates=7960, lr=9.99443e-05, gnorm=2.15, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91119 2021-06-19 19:57:36 | INFO | train_inner | epoch 003: 2009 / 3002 loss=2.763, ppl=6.79, wps=6010.6, ups=0.09, wpb=64921, bsz=128, num_updates=7961, lr=9.99443e-05, gnorm=2.142, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91130 2021-06-19 19:57:47 | INFO | train_inner | epoch 003: 2010 / 3002 loss=2.57, ppl=5.94, wps=5781.7, ups=0.09, wpb=64858, bsz=128, num_updates=7962, lr=9.99443e-05, gnorm=2.263, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91141 2021-06-19 19:57:58 | INFO | train_inner | epoch 003: 2011 / 3002 loss=2.6, ppl=6.06, wps=5889.8, ups=0.09, wpb=64853, bsz=128, num_updates=7963, lr=9.99443e-05, gnorm=2.288, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91152 2021-06-19 19:58:09 | INFO | train_inner | epoch 003: 2012 / 3002 loss=2.651, ppl=6.28, wps=5691.3, ups=0.09, wpb=64830, bsz=128, num_updates=7964, lr=9.99443e-05, gnorm=2.134, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91164 2021-06-19 19:58:21 | INFO | train_inner | epoch 003: 2013 / 3002 loss=2.85, ppl=7.21, wps=5789.6, ups=0.09, wpb=64799, bsz=128, num_updates=7965, lr=9.99443e-05, gnorm=3.09, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91175 2021-06-19 19:58:32 | INFO | train_inner | epoch 003: 2014 / 3002 loss=2.651, ppl=6.28, wps=5863.8, ups=0.09, wpb=64843, bsz=128, num_updates=7966, lr=9.99443e-05, gnorm=2.193, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91186 2021-06-19 19:58:43 | INFO | train_inner | epoch 003: 2015 / 3002 loss=2.704, ppl=6.52, wps=5844.9, ups=0.09, wpb=64782, bsz=128, num_updates=7967, lr=9.99443e-05, gnorm=2.042, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91197 2021-06-19 19:58:54 | INFO | train_inner | epoch 003: 2016 / 3002 loss=2.747, ppl=6.71, wps=5911.1, ups=0.09, wpb=64882, bsz=128, num_updates=7968, lr=9.99443e-05, gnorm=7.11, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91208 2021-06-19 19:59:05 | INFO | train_inner | epoch 003: 2017 / 3002 loss=2.668, ppl=6.36, wps=5838.9, ups=0.09, wpb=64868, bsz=128, num_updates=7969, lr=9.99442e-05, gnorm=2.147, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91219 2021-06-19 19:59:16 | INFO | train_inner | epoch 003: 2018 / 3002 loss=2.639, ppl=6.23, wps=5851.6, ups=0.09, wpb=64811, bsz=128, num_updates=7970, lr=9.99442e-05, gnorm=2.196, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91230 2021-06-19 19:59:27 | INFO | train_inner | epoch 003: 2019 / 3002 loss=2.534, ppl=5.79, wps=5898.2, ups=0.09, wpb=64888, bsz=128, num_updates=7971, lr=9.99442e-05, gnorm=5.823, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91241 2021-06-19 19:59:38 | INFO | train_inner | epoch 003: 2020 / 3002 loss=2.64, ppl=6.23, wps=5951.3, ups=0.09, wpb=64863, bsz=128, num_updates=7972, lr=9.99442e-05, gnorm=2.382, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91252 2021-06-19 19:59:49 | INFO | train_inner | epoch 003: 2021 / 3002 loss=2.746, ppl=6.71, wps=5867.5, ups=0.09, wpb=64795, bsz=128, num_updates=7973, lr=9.99442e-05, gnorm=2.237, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91263 2021-06-19 20:00:00 | INFO | train_inner | epoch 003: 2022 / 3002 loss=2.523, ppl=5.75, wps=5851.8, ups=0.09, wpb=64781, bsz=128, num_updates=7974, lr=9.99442e-05, gnorm=2.203, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91274 2021-06-19 20:00:11 | INFO | train_inner | epoch 003: 2023 / 3002 loss=2.832, ppl=7.12, wps=5930.5, ups=0.09, wpb=64759, bsz=128, num_updates=7975, lr=9.99442e-05, gnorm=2.178, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91285 2021-06-19 20:00:22 | INFO | train_inner | epoch 003: 2024 / 3002 loss=2.617, ppl=6.13, wps=5863.7, ups=0.09, wpb=64811, bsz=128, num_updates=7976, lr=9.99442e-05, gnorm=2.125, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91296 2021-06-19 20:00:33 | INFO | train_inner | epoch 003: 2025 / 3002 loss=2.473, ppl=5.55, wps=5792.4, ups=0.09, wpb=64876, bsz=128, num_updates=7977, lr=9.99442e-05, gnorm=2.252, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91308 2021-06-19 20:00:44 | INFO | train_inner | epoch 003: 2026 / 3002 loss=2.647, ppl=6.26, wps=5836.4, ups=0.09, wpb=64858, bsz=128, num_updates=7978, lr=9.99442e-05, gnorm=2.252, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91319 2021-06-19 20:00:55 | INFO | train_inner | epoch 003: 2027 / 3002 loss=2.639, ppl=6.23, wps=5859, ups=0.09, wpb=64823, bsz=128, num_updates=7979, lr=9.99442e-05, gnorm=2.387, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91330 2021-06-19 20:01:06 | INFO | train_inner | epoch 003: 2028 / 3002 loss=2.651, ppl=6.28, wps=5866.8, ups=0.09, wpb=64837, bsz=128, num_updates=7980, lr=9.99442e-05, gnorm=2.307, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91341 2021-06-19 20:01:17 | INFO | train_inner | epoch 003: 2029 / 3002 loss=2.689, ppl=6.45, wps=5899.5, ups=0.09, wpb=64804, bsz=128, num_updates=7981, lr=9.99441e-05, gnorm=2.269, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91352 2021-06-19 20:01:28 | INFO | train_inner | epoch 003: 2030 / 3002 loss=2.856, ppl=7.24, wps=5887.1, ups=0.09, wpb=64909, bsz=128, num_updates=7982, lr=9.99441e-05, gnorm=2.816, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91363 2021-06-19 20:01:39 | INFO | train_inner | epoch 003: 2031 / 3002 loss=2.726, ppl=6.62, wps=5896.3, ups=0.09, wpb=64768, bsz=128, num_updates=7983, lr=9.99441e-05, gnorm=2.176, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91374 2021-06-19 20:01:51 | INFO | train_inner | epoch 003: 2032 / 3002 loss=2.768, ppl=6.81, wps=5753.2, ups=0.09, wpb=64793, bsz=128, num_updates=7984, lr=9.99441e-05, gnorm=2.275, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91385 2021-06-19 20:02:02 | INFO | train_inner | epoch 003: 2033 / 3002 loss=2.608, ppl=6.1, wps=5876.1, ups=0.09, wpb=64840, bsz=128, num_updates=7985, lr=9.99441e-05, gnorm=2.167, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91396 2021-06-19 20:02:13 | INFO | train_inner | epoch 003: 2034 / 3002 loss=2.562, ppl=5.91, wps=5972.3, ups=0.09, wpb=64819, bsz=128, num_updates=7986, lr=9.99441e-05, gnorm=2.161, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91407 2021-06-19 20:02:24 | INFO | train_inner | epoch 003: 2035 / 3002 loss=2.621, ppl=6.15, wps=5890.4, ups=0.09, wpb=64888, bsz=128, num_updates=7987, lr=9.99441e-05, gnorm=2.36, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91418 2021-06-19 20:02:35 | INFO | train_inner | epoch 003: 2036 / 3002 loss=2.608, ppl=6.1, wps=5837.9, ups=0.09, wpb=64818, bsz=128, num_updates=7988, lr=9.99441e-05, gnorm=2.131, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91429 2021-06-19 20:02:46 | INFO | train_inner | epoch 003: 2037 / 3002 loss=2.599, ppl=6.06, wps=5837.3, ups=0.09, wpb=64841, bsz=128, num_updates=7989, lr=9.99441e-05, gnorm=2.114, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91440 2021-06-19 20:02:57 | INFO | train_inner | epoch 003: 2038 / 3002 loss=2.534, ppl=5.79, wps=5735.7, ups=0.09, wpb=64766, bsz=128, num_updates=7990, lr=9.99441e-05, gnorm=2.121, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91451 2021-06-19 20:03:08 | INFO | train_inner | epoch 003: 2039 / 3002 loss=2.643, ppl=6.25, wps=5777.2, ups=0.09, wpb=64797, bsz=128, num_updates=7991, lr=9.99441e-05, gnorm=2.682, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91463 2021-06-19 20:03:19 | INFO | train_inner | epoch 003: 2040 / 3002 loss=2.617, ppl=6.13, wps=5962.3, ups=0.09, wpb=64823, bsz=128, num_updates=7992, lr=9.99441e-05, gnorm=2.098, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91474 2021-06-19 20:03:30 | INFO | train_inner | epoch 003: 2041 / 3002 loss=2.608, ppl=6.1, wps=5963.3, ups=0.09, wpb=64833, bsz=128, num_updates=7993, lr=9.99441e-05, gnorm=2.741, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91484 2021-06-19 20:03:41 | INFO | train_inner | epoch 003: 2042 / 3002 loss=2.617, ppl=6.13, wps=5963.7, ups=0.09, wpb=64797, bsz=128, num_updates=7994, lr=9.9944e-05, gnorm=2.106, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91495 2021-06-19 20:03:52 | INFO | train_inner | epoch 003: 2043 / 3002 loss=2.64, ppl=6.23, wps=5792, ups=0.09, wpb=64860, bsz=128, num_updates=7995, lr=9.9944e-05, gnorm=2.085, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91506 2021-06-19 20:04:03 | INFO | train_inner | epoch 003: 2044 / 3002 loss=2.719, ppl=6.58, wps=5770.1, ups=0.09, wpb=64788, bsz=128, num_updates=7996, lr=9.9944e-05, gnorm=2.381, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91518 2021-06-19 20:04:14 | INFO | train_inner | epoch 003: 2045 / 3002 loss=2.557, ppl=5.88, wps=5865.8, ups=0.09, wpb=64863, bsz=128, num_updates=7997, lr=9.9944e-05, gnorm=2.109, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91529 2021-06-19 20:04:25 | INFO | train_inner | epoch 003: 2046 / 3002 loss=2.55, ppl=5.86, wps=5843, ups=0.09, wpb=64878, bsz=128, num_updates=7998, lr=9.9944e-05, gnorm=2.129, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91540 2021-06-19 20:04:37 | INFO | train_inner | epoch 003: 2047 / 3002 loss=2.526, ppl=5.76, wps=5801.7, ups=0.09, wpb=64836, bsz=128, num_updates=7999, lr=9.9944e-05, gnorm=2.044, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91551 2021-06-19 20:04:48 | INFO | train_inner | epoch 003: 2048 / 3002 loss=2.642, ppl=6.24, wps=5967.2, ups=0.09, wpb=64909, bsz=128, num_updates=8000, lr=9.9944e-05, gnorm=2.171, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91562 2021-06-19 20:04:58 | INFO | train_inner | epoch 003: 2049 / 3002 loss=2.75, ppl=6.73, wps=5970.6, ups=0.09, wpb=64810, bsz=128, num_updates=8001, lr=9.9944e-05, gnorm=2.108, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91573 2021-06-19 20:05:09 | INFO | train_inner | epoch 003: 2050 / 3002 loss=2.669, ppl=6.36, wps=5899, ups=0.09, wpb=64889, bsz=128, num_updates=8002, lr=9.9944e-05, gnorm=2.205, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91584 2021-06-19 20:05:20 | INFO | train_inner | epoch 003: 2051 / 3002 loss=2.74, ppl=6.68, wps=5930.2, ups=0.09, wpb=64786, bsz=128, num_updates=8003, lr=9.9944e-05, gnorm=2.678, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91595 2021-06-19 20:05:31 | INFO | train_inner | epoch 003: 2052 / 3002 loss=2.644, ppl=6.25, wps=5854.1, ups=0.09, wpb=64827, bsz=128, num_updates=8004, lr=9.9944e-05, gnorm=2.206, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91606 2021-06-19 20:05:42 | INFO | train_inner | epoch 003: 2053 / 3002 loss=2.545, ppl=5.84, wps=5875, ups=0.09, wpb=64817, bsz=128, num_updates=8005, lr=9.9944e-05, gnorm=2.066, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91617 2021-06-19 20:05:53 | INFO | train_inner | epoch 003: 2054 / 3002 loss=2.598, ppl=6.05, wps=5882, ups=0.09, wpb=64739, bsz=128, num_updates=8006, lr=9.99439e-05, gnorm=2.027, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91628 2021-06-19 20:06:05 | INFO | train_inner | epoch 003: 2055 / 3002 loss=2.748, ppl=6.72, wps=5747.6, ups=0.09, wpb=64757, bsz=128, num_updates=8007, lr=9.99439e-05, gnorm=2.017, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91639 2021-06-19 20:06:16 | INFO | train_inner | epoch 003: 2056 / 3002 loss=2.768, ppl=6.81, wps=5838.8, ups=0.09, wpb=64806, bsz=128, num_updates=8008, lr=9.99439e-05, gnorm=2.124, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91650 2021-06-19 20:06:27 | INFO | train_inner | epoch 003: 2057 / 3002 loss=2.639, ppl=6.23, wps=6003.3, ups=0.09, wpb=64908, bsz=128, num_updates=8009, lr=9.99439e-05, gnorm=2.065, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91661 2021-06-19 20:06:38 | INFO | train_inner | epoch 003: 2058 / 3002 loss=2.693, ppl=6.46, wps=5904.7, ups=0.09, wpb=64770, bsz=128, num_updates=8010, lr=9.99439e-05, gnorm=2.114, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91672 2021-06-19 20:06:49 | INFO | train_inner | epoch 003: 2059 / 3002 loss=2.549, ppl=5.85, wps=5835.3, ups=0.09, wpb=64899, bsz=128, num_updates=8011, lr=9.99439e-05, gnorm=2.573, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91683 2021-06-19 20:07:00 | INFO | train_inner | epoch 003: 2060 / 3002 loss=2.566, ppl=5.92, wps=5859.1, ups=0.09, wpb=64839, bsz=128, num_updates=8012, lr=9.99439e-05, gnorm=2.108, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91694 2021-06-19 20:07:11 | INFO | train_inner | epoch 003: 2061 / 3002 loss=2.51, ppl=5.7, wps=5822.1, ups=0.09, wpb=64859, bsz=128, num_updates=8013, lr=9.99439e-05, gnorm=2.018, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91705 2021-06-19 20:07:22 | INFO | train_inner | epoch 003: 2062 / 3002 loss=2.592, ppl=6.03, wps=5791.4, ups=0.09, wpb=64868, bsz=128, num_updates=8014, lr=9.99439e-05, gnorm=2.069, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91716 2021-06-19 20:07:33 | INFO | train_inner | epoch 003: 2063 / 3002 loss=2.741, ppl=6.68, wps=5865.7, ups=0.09, wpb=64795, bsz=128, num_updates=8015, lr=9.99439e-05, gnorm=2.159, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91728 2021-06-19 20:07:44 | INFO | train_inner | epoch 003: 2064 / 3002 loss=2.658, ppl=6.31, wps=5798.8, ups=0.09, wpb=64783, bsz=128, num_updates=8016, lr=9.99439e-05, gnorm=1.98, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91739 2021-06-19 20:07:55 | INFO | train_inner | epoch 003: 2065 / 3002 loss=2.771, ppl=6.83, wps=5864.9, ups=0.09, wpb=64733, bsz=128, num_updates=8017, lr=9.99439e-05, gnorm=4.33, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91750 2021-06-19 20:08:07 | INFO | train_inner | epoch 003: 2066 / 3002 loss=2.589, ppl=6.02, wps=5820.4, ups=0.09, wpb=64744, bsz=128, num_updates=8018, lr=9.99439e-05, gnorm=2.042, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91761 2021-06-19 20:08:18 | INFO | train_inner | epoch 003: 2067 / 3002 loss=2.517, ppl=5.72, wps=5783.8, ups=0.09, wpb=64800, bsz=128, num_updates=8019, lr=9.99438e-05, gnorm=2.12, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91772 2021-06-19 20:08:29 | INFO | train_inner | epoch 003: 2068 / 3002 loss=2.611, ppl=6.11, wps=5738.3, ups=0.09, wpb=64830, bsz=128, num_updates=8020, lr=9.99438e-05, gnorm=2.055, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91783 2021-06-19 20:08:40 | INFO | train_inner | epoch 003: 2069 / 3002 loss=2.72, ppl=6.59, wps=5779, ups=0.09, wpb=64773, bsz=128, num_updates=8021, lr=9.99438e-05, gnorm=2.137, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91795 2021-06-19 20:08:51 | INFO | train_inner | epoch 003: 2070 / 3002 loss=2.424, ppl=5.37, wps=5792.9, ups=0.09, wpb=64811, bsz=128, num_updates=8022, lr=9.99438e-05, gnorm=1.973, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91806 2021-06-19 20:09:03 | INFO | train_inner | epoch 003: 2071 / 3002 loss=2.746, ppl=6.71, wps=5751.1, ups=0.09, wpb=64850, bsz=128, num_updates=8023, lr=9.99438e-05, gnorm=2.07, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91817 2021-06-19 20:09:14 | INFO | train_inner | epoch 003: 2072 / 3002 loss=2.731, ppl=6.64, wps=5883, ups=0.09, wpb=64830, bsz=128, num_updates=8024, lr=9.99438e-05, gnorm=2.237, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91828 2021-06-19 20:09:25 | INFO | train_inner | epoch 003: 2073 / 3002 loss=2.467, ppl=5.53, wps=5972.3, ups=0.09, wpb=64839, bsz=128, num_updates=8025, lr=9.99438e-05, gnorm=2.049, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91839 2021-06-19 20:09:36 | INFO | train_inner | epoch 003: 2074 / 3002 loss=2.465, ppl=5.52, wps=5772.6, ups=0.09, wpb=64755, bsz=128, num_updates=8026, lr=9.99438e-05, gnorm=2.013, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91850 2021-06-19 20:09:47 | INFO | train_inner | epoch 003: 2075 / 3002 loss=2.765, ppl=6.8, wps=5866.5, ups=0.09, wpb=64796, bsz=128, num_updates=8027, lr=9.99438e-05, gnorm=2.122, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91861 2021-06-19 20:09:58 | INFO | train_inner | epoch 003: 2076 / 3002 loss=2.633, ppl=6.21, wps=5745.8, ups=0.09, wpb=64788, bsz=128, num_updates=8028, lr=9.99438e-05, gnorm=2.154, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91872 2021-06-19 20:10:09 | INFO | train_inner | epoch 003: 2077 / 3002 loss=2.627, ppl=6.18, wps=5787.6, ups=0.09, wpb=64859, bsz=128, num_updates=8029, lr=9.99438e-05, gnorm=2.099, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91884 2021-06-19 20:10:20 | INFO | train_inner | epoch 003: 2078 / 3002 loss=2.517, ppl=5.73, wps=5897.3, ups=0.09, wpb=64861, bsz=128, num_updates=8030, lr=9.99438e-05, gnorm=2.106, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91895 2021-06-19 20:10:31 | INFO | train_inner | epoch 003: 2079 / 3002 loss=2.414, ppl=5.33, wps=5901.8, ups=0.09, wpb=64881, bsz=128, num_updates=8031, lr=9.99437e-05, gnorm=1.972, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91906 2021-06-19 20:10:42 | INFO | train_inner | epoch 003: 2080 / 3002 loss=2.64, ppl=6.23, wps=5918.3, ups=0.09, wpb=64859, bsz=128, num_updates=8032, lr=9.99437e-05, gnorm=2.059, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91917 2021-06-19 20:10:53 | INFO | train_inner | epoch 003: 2081 / 3002 loss=2.491, ppl=5.62, wps=5923.4, ups=0.09, wpb=64894, bsz=128, num_updates=8033, lr=9.99437e-05, gnorm=2.241, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91928 2021-06-19 20:11:04 | INFO | train_inner | epoch 003: 2082 / 3002 loss=2.63, ppl=6.19, wps=5855.7, ups=0.09, wpb=64884, bsz=128, num_updates=8034, lr=9.99437e-05, gnorm=2.048, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91939 2021-06-19 20:11:15 | INFO | train_inner | epoch 003: 2083 / 3002 loss=2.576, ppl=5.96, wps=5921.3, ups=0.09, wpb=64870, bsz=128, num_updates=8035, lr=9.99437e-05, gnorm=2.025, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91950 2021-06-19 20:11:26 | INFO | train_inner | epoch 003: 2084 / 3002 loss=2.514, ppl=5.71, wps=5812.9, ups=0.09, wpb=64802, bsz=128, num_updates=8036, lr=9.99437e-05, gnorm=2.125, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91961 2021-06-19 20:11:37 | INFO | train_inner | epoch 003: 2085 / 3002 loss=2.631, ppl=6.19, wps=5918.1, ups=0.09, wpb=64878, bsz=128, num_updates=8037, lr=9.99437e-05, gnorm=2.065, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91972 2021-06-19 20:11:49 | INFO | train_inner | epoch 003: 2086 / 3002 loss=2.568, ppl=5.93, wps=5795.7, ups=0.09, wpb=64759, bsz=128, num_updates=8038, lr=9.99437e-05, gnorm=2.004, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91983 2021-06-19 20:12:00 | INFO | train_inner | epoch 003: 2087 / 3002 loss=2.657, ppl=6.31, wps=5869.1, ups=0.09, wpb=64756, bsz=128, num_updates=8039, lr=9.99437e-05, gnorm=2.139, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91994 2021-06-19 20:12:11 | INFO | train_inner | epoch 003: 2088 / 3002 loss=2.597, ppl=6.05, wps=5930.5, ups=0.09, wpb=64908, bsz=128, num_updates=8040, lr=9.99437e-05, gnorm=2.101, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92005 2021-06-19 20:12:22 | INFO | train_inner | epoch 003: 2089 / 3002 loss=2.671, ppl=6.37, wps=5865.9, ups=0.09, wpb=64801, bsz=128, num_updates=8041, lr=9.99437e-05, gnorm=2.026, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92016 2021-06-19 20:12:33 | INFO | train_inner | epoch 003: 2090 / 3002 loss=2.426, ppl=5.37, wps=5787.1, ups=0.09, wpb=64851, bsz=128, num_updates=8042, lr=9.99437e-05, gnorm=2.126, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92027 2021-06-19 20:12:44 | INFO | train_inner | epoch 003: 2091 / 3002 loss=2.732, ppl=6.65, wps=5879.2, ups=0.09, wpb=64867, bsz=128, num_updates=8043, lr=9.99437e-05, gnorm=2.276, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92038 2021-06-19 20:12:55 | INFO | train_inner | epoch 003: 2092 / 3002 loss=2.527, ppl=5.76, wps=5880.9, ups=0.09, wpb=64857, bsz=128, num_updates=8044, lr=9.99436e-05, gnorm=2.005, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92049 2021-06-19 20:13:06 | INFO | train_inner | epoch 003: 2093 / 3002 loss=2.614, ppl=6.12, wps=5909.9, ups=0.09, wpb=64862, bsz=128, num_updates=8045, lr=9.99436e-05, gnorm=2.138, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92060 2021-06-19 20:13:17 | INFO | train_inner | epoch 003: 2094 / 3002 loss=2.47, ppl=5.54, wps=5769.2, ups=0.09, wpb=64839, bsz=128, num_updates=8046, lr=9.99436e-05, gnorm=2.043, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92071 2021-06-19 20:13:28 | INFO | train_inner | epoch 003: 2095 / 3002 loss=2.581, ppl=5.98, wps=5789.9, ups=0.09, wpb=64783, bsz=128, num_updates=8047, lr=9.99436e-05, gnorm=5.256, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92083 2021-06-19 20:13:40 | INFO | train_inner | epoch 003: 2096 / 3002 loss=2.646, ppl=6.26, wps=5744.5, ups=0.09, wpb=64832, bsz=128, num_updates=8048, lr=9.99436e-05, gnorm=2.066, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92094 2021-06-19 20:13:51 | INFO | train_inner | epoch 003: 2097 / 3002 loss=2.557, ppl=5.88, wps=5818.2, ups=0.09, wpb=64848, bsz=128, num_updates=8049, lr=9.99436e-05, gnorm=2.217, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92105 2021-06-19 20:14:02 | INFO | train_inner | epoch 003: 2098 / 3002 loss=2.596, ppl=6.05, wps=5847.5, ups=0.09, wpb=64834, bsz=128, num_updates=8050, lr=9.99436e-05, gnorm=2.046, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92116 2021-06-19 20:14:13 | INFO | train_inner | epoch 003: 2099 / 3002 loss=2.611, ppl=6.11, wps=5772.2, ups=0.09, wpb=64844, bsz=128, num_updates=8051, lr=9.99436e-05, gnorm=2.088, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92127 2021-06-19 20:14:24 | INFO | train_inner | epoch 003: 2100 / 3002 loss=2.537, ppl=5.8, wps=5876.5, ups=0.09, wpb=64911, bsz=128, num_updates=8052, lr=9.99436e-05, gnorm=2.148, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92138 2021-06-19 20:14:35 | INFO | train_inner | epoch 003: 2101 / 3002 loss=2.548, ppl=5.85, wps=5862.5, ups=0.09, wpb=64849, bsz=128, num_updates=8053, lr=9.99436e-05, gnorm=2.473, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92149 2021-06-19 20:14:46 | INFO | train_inner | epoch 003: 2102 / 3002 loss=2.674, ppl=6.38, wps=5859.3, ups=0.09, wpb=64754, bsz=128, num_updates=8054, lr=9.99436e-05, gnorm=2.042, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92160 2021-06-19 20:14:57 | INFO | train_inner | epoch 003: 2103 / 3002 loss=2.65, ppl=6.27, wps=5817.2, ups=0.09, wpb=64821, bsz=128, num_updates=8055, lr=9.99436e-05, gnorm=2.766, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92172 2021-06-19 20:15:08 | INFO | train_inner | epoch 003: 2104 / 3002 loss=2.675, ppl=6.39, wps=5916.2, ups=0.09, wpb=64859, bsz=128, num_updates=8056, lr=9.99435e-05, gnorm=2.066, loss_scale=1, train_wall=11, gb_free=2.8, wall=92183 2021-06-19 20:15:19 | INFO | train_inner | epoch 003: 2105 / 3002 loss=2.629, ppl=6.19, wps=5936, ups=0.09, wpb=64804, bsz=128, num_updates=8057, lr=9.99435e-05, gnorm=4.902, loss_scale=1, train_wall=10, gb_free=2.8, wall=92193 2021-06-19 20:15:30 | INFO | train_inner | epoch 003: 2106 / 3002 loss=2.825, ppl=7.09, wps=5820.6, ups=0.09, wpb=64810, bsz=128, num_updates=8058, lr=9.99435e-05, gnorm=2.262, loss_scale=1, train_wall=11, gb_free=2.8, wall=92205 2021-06-19 20:15:41 | INFO | train_inner | epoch 003: 2107 / 3002 loss=2.457, ppl=5.49, wps=5792.7, ups=0.09, wpb=64830, bsz=128, num_updates=8059, lr=9.99435e-05, gnorm=2.172, loss_scale=1, train_wall=11, gb_free=2.8, wall=92216 2021-06-19 20:15:53 | INFO | train_inner | epoch 003: 2108 / 3002 loss=2.576, ppl=5.96, wps=5888.1, ups=0.09, wpb=64875, bsz=128, num_updates=8060, lr=9.99435e-05, gnorm=2.359, loss_scale=1, train_wall=11, gb_free=2.8, wall=92227 2021-06-19 20:16:04 | INFO | train_inner | epoch 003: 2109 / 3002 loss=2.651, ppl=6.28, wps=5870.4, ups=0.09, wpb=64838, bsz=128, num_updates=8061, lr=9.99435e-05, gnorm=2.307, loss_scale=1, train_wall=11, gb_free=2.8, wall=92238 2021-06-19 20:16:15 | INFO | train_inner | epoch 003: 2110 / 3002 loss=2.723, ppl=6.6, wps=5841.8, ups=0.09, wpb=64818, bsz=128, num_updates=8062, lr=9.99435e-05, gnorm=6.586, loss_scale=1, train_wall=11, gb_free=2.8, wall=92249 2021-06-19 20:16:26 | INFO | train_inner | epoch 003: 2111 / 3002 loss=2.734, ppl=6.65, wps=5835.9, ups=0.09, wpb=64686, bsz=128, num_updates=8063, lr=9.99435e-05, gnorm=2.27, loss_scale=1, train_wall=11, gb_free=2.8, wall=92260 2021-06-19 20:16:37 | INFO | train_inner | epoch 003: 2112 / 3002 loss=2.593, ppl=6.03, wps=5927.6, ups=0.09, wpb=64896, bsz=128, num_updates=8064, lr=9.99435e-05, gnorm=2.256, loss_scale=1, train_wall=10, gb_free=2.8, wall=92271 2021-06-19 20:16:48 | INFO | train_inner | epoch 003: 2113 / 3002 loss=2.698, ppl=6.49, wps=5927, ups=0.09, wpb=64880, bsz=128, num_updates=8065, lr=9.99435e-05, gnorm=2.336, loss_scale=1, train_wall=11, gb_free=2.8, wall=92282 2021-06-19 20:16:59 | INFO | train_inner | epoch 003: 2114 / 3002 loss=2.647, ppl=6.27, wps=5789.5, ups=0.09, wpb=64806, bsz=128, num_updates=8066, lr=9.99435e-05, gnorm=2.222, loss_scale=1, train_wall=11, gb_free=2.8, wall=92293 2021-06-19 20:17:10 | INFO | train_inner | epoch 003: 2115 / 3002 loss=2.514, ppl=5.71, wps=5799.7, ups=0.09, wpb=64846, bsz=128, num_updates=8067, lr=9.99435e-05, gnorm=2.137, loss_scale=1, train_wall=11, gb_free=2.8, wall=92304 2021-06-19 20:17:21 | INFO | train_inner | epoch 003: 2116 / 3002 loss=2.586, ppl=6.01, wps=5903.8, ups=0.09, wpb=64792, bsz=128, num_updates=8068, lr=9.99435e-05, gnorm=11.013, loss_scale=1, train_wall=11, gb_free=2.8, wall=92315 2021-06-19 20:17:32 | INFO | train_inner | epoch 003: 2117 / 3002 loss=2.65, ppl=6.27, wps=5766.3, ups=0.09, wpb=64815, bsz=128, num_updates=8069, lr=9.99434e-05, gnorm=3.09, loss_scale=1, train_wall=11, gb_free=2.8, wall=92327 2021-06-19 20:17:43 | INFO | train_inner | epoch 003: 2118 / 3002 loss=2.64, ppl=6.23, wps=5833.9, ups=0.09, wpb=64768, bsz=128, num_updates=8070, lr=9.99434e-05, gnorm=4.879, loss_scale=1, train_wall=11, gb_free=2.8, wall=92338 2021-06-19 20:17:54 | INFO | train_inner | epoch 003: 2119 / 3002 loss=2.704, ppl=6.52, wps=5838.8, ups=0.09, wpb=64871, bsz=128, num_updates=8071, lr=9.99434e-05, gnorm=2.218, loss_scale=1, train_wall=11, gb_free=2.8, wall=92349 2021-06-19 20:18:06 | INFO | train_inner | epoch 003: 2120 / 3002 loss=2.635, ppl=6.21, wps=5761.3, ups=0.09, wpb=64879, bsz=128, num_updates=8072, lr=9.99434e-05, gnorm=2.437, loss_scale=1, train_wall=11, gb_free=2.8, wall=92360 2021-06-19 20:18:17 | INFO | train_inner | epoch 003: 2121 / 3002 loss=2.717, ppl=6.57, wps=5749.9, ups=0.09, wpb=64816, bsz=128, num_updates=8073, lr=9.99434e-05, gnorm=2.538, loss_scale=1, train_wall=11, gb_free=2.8, wall=92371 2021-06-19 20:18:28 | INFO | train_inner | epoch 003: 2122 / 3002 loss=2.637, ppl=6.22, wps=5762.5, ups=0.09, wpb=64881, bsz=128, num_updates=8074, lr=9.99434e-05, gnorm=2.292, loss_scale=1, train_wall=11, gb_free=2.8, wall=92383 2021-06-19 20:18:39 | INFO | train_inner | epoch 003: 2123 / 3002 loss=2.718, ppl=6.58, wps=5876.9, ups=0.09, wpb=64873, bsz=128, num_updates=8075, lr=9.99434e-05, gnorm=3.279, loss_scale=1, train_wall=11, gb_free=2.8, wall=92394 2021-06-19 20:18:51 | INFO | train_inner | epoch 003: 2124 / 3002 loss=2.701, ppl=6.5, wps=5749.3, ups=0.09, wpb=64807, bsz=128, num_updates=8076, lr=9.99434e-05, gnorm=3.349, loss_scale=1, train_wall=11, gb_free=2.8, wall=92405 2021-06-19 20:19:02 | INFO | train_inner | epoch 003: 2125 / 3002 loss=2.525, ppl=5.76, wps=5736.1, ups=0.09, wpb=64856, bsz=128, num_updates=8077, lr=9.99434e-05, gnorm=2.171, loss_scale=1, train_wall=11, gb_free=2.8, wall=92416 2021-06-19 20:19:13 | INFO | train_inner | epoch 003: 2126 / 3002 loss=2.65, ppl=6.27, wps=5833.7, ups=0.09, wpb=64833, bsz=128, num_updates=8078, lr=9.99434e-05, gnorm=2.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=92427 2021-06-19 20:19:24 | INFO | train_inner | epoch 003: 2127 / 3002 loss=2.749, ppl=6.72, wps=5811.3, ups=0.09, wpb=64834, bsz=128, num_updates=8079, lr=9.99434e-05, gnorm=2.186, loss_scale=1, train_wall=11, gb_free=2.8, wall=92438 2021-06-19 20:19:35 | INFO | train_inner | epoch 003: 2128 / 3002 loss=2.602, ppl=6.07, wps=5839.2, ups=0.09, wpb=64887, bsz=128, num_updates=8080, lr=9.99434e-05, gnorm=2.207, loss_scale=1, train_wall=11, gb_free=2.8, wall=92450 2021-06-19 20:19:46 | INFO | train_inner | epoch 003: 2129 / 3002 loss=2.753, ppl=6.74, wps=5875.3, ups=0.09, wpb=64817, bsz=128, num_updates=8081, lr=9.99433e-05, gnorm=2.12, loss_scale=1, train_wall=11, gb_free=2.8, wall=92461 2021-06-19 20:19:57 | INFO | train_inner | epoch 003: 2130 / 3002 loss=2.655, ppl=6.3, wps=5939.7, ups=0.09, wpb=64846, bsz=128, num_updates=8082, lr=9.99433e-05, gnorm=2.153, loss_scale=1, train_wall=10, gb_free=2.8, wall=92472 2021-06-19 20:20:08 | INFO | train_inner | epoch 003: 2131 / 3002 loss=2.625, ppl=6.17, wps=5914.5, ups=0.09, wpb=64895, bsz=128, num_updates=8083, lr=9.99433e-05, gnorm=2.345, loss_scale=1, train_wall=10, gb_free=2.8, wall=92482 2021-06-19 20:20:19 | INFO | train_inner | epoch 003: 2132 / 3002 loss=2.736, ppl=6.66, wps=5737.3, ups=0.09, wpb=64761, bsz=128, num_updates=8084, lr=9.99433e-05, gnorm=2.297, loss_scale=1, train_wall=11, gb_free=2.8, wall=92494 2021-06-19 20:20:31 | INFO | train_inner | epoch 003: 2133 / 3002 loss=2.635, ppl=6.21, wps=5725.7, ups=0.09, wpb=64863, bsz=128, num_updates=8085, lr=9.99433e-05, gnorm=2.261, loss_scale=1, train_wall=11, gb_free=2.8, wall=92505 2021-06-19 20:20:42 | INFO | train_inner | epoch 003: 2134 / 3002 loss=2.564, ppl=5.91, wps=5912.8, ups=0.09, wpb=64849, bsz=128, num_updates=8086, lr=9.99433e-05, gnorm=2.139, loss_scale=1, train_wall=11, gb_free=2.8, wall=92516 2021-06-19 20:20:53 | INFO | train_inner | epoch 003: 2135 / 3002 loss=2.551, ppl=5.86, wps=5732.3, ups=0.09, wpb=64834, bsz=128, num_updates=8087, lr=9.99433e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=92527 2021-06-19 20:21:04 | INFO | train_inner | epoch 003: 2136 / 3002 loss=2.707, ppl=6.53, wps=5816.4, ups=0.09, wpb=64768, bsz=128, num_updates=8088, lr=9.99433e-05, gnorm=2.294, loss_scale=1, train_wall=11, gb_free=2.8, wall=92539 2021-06-19 20:21:15 | INFO | train_inner | epoch 003: 2137 / 3002 loss=2.653, ppl=6.29, wps=5791.3, ups=0.09, wpb=64766, bsz=128, num_updates=8089, lr=9.99433e-05, gnorm=2.534, loss_scale=1, train_wall=11, gb_free=2.8, wall=92550 2021-06-19 20:21:26 | INFO | train_inner | epoch 003: 2138 / 3002 loss=2.74, ppl=6.68, wps=5854.4, ups=0.09, wpb=64875, bsz=128, num_updates=8090, lr=9.99433e-05, gnorm=2.182, loss_scale=1, train_wall=11, gb_free=2.8, wall=92561 2021-06-19 20:21:38 | INFO | train_inner | epoch 003: 2139 / 3002 loss=2.617, ppl=6.14, wps=5730, ups=0.09, wpb=64822, bsz=128, num_updates=8091, lr=9.99433e-05, gnorm=2.097, loss_scale=1, train_wall=11, gb_free=2.8, wall=92572 2021-06-19 20:21:49 | INFO | train_inner | epoch 003: 2140 / 3002 loss=2.797, ppl=6.95, wps=5762.8, ups=0.09, wpb=64686, bsz=128, num_updates=8092, lr=9.99433e-05, gnorm=3.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=92583 2021-06-19 20:22:00 | INFO | train_inner | epoch 003: 2141 / 3002 loss=2.699, ppl=6.49, wps=5797.2, ups=0.09, wpb=64786, bsz=128, num_updates=8093, lr=9.99433e-05, gnorm=2.205, loss_scale=1, train_wall=11, gb_free=2.8, wall=92594 2021-06-19 20:22:11 | INFO | train_inner | epoch 003: 2142 / 3002 loss=2.482, ppl=5.59, wps=5865.2, ups=0.09, wpb=64882, bsz=128, num_updates=8094, lr=9.99432e-05, gnorm=2.205, loss_scale=1, train_wall=11, gb_free=2.8, wall=92606 2021-06-19 20:22:22 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 2021-06-19 20:22:34 | INFO | train_inner | epoch 003: 2144 / 3002 loss=2.632, ppl=6.2, wps=2907.8, ups=0.04, wpb=64805, bsz=128, num_updates=8095, lr=9.99432e-05, gnorm=2.351, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=92628 2021-06-19 20:22:45 | INFO | train_inner | epoch 003: 2145 / 3002 loss=2.756, ppl=6.76, wps=5861.3, ups=0.09, wpb=64833, bsz=128, num_updates=8096, lr=9.99432e-05, gnorm=2.383, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92639 2021-06-19 20:22:56 | INFO | train_inner | epoch 003: 2146 / 3002 loss=2.623, ppl=6.16, wps=5774.3, ups=0.09, wpb=64865, bsz=128, num_updates=8097, lr=9.99432e-05, gnorm=2.054, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92650 2021-06-19 20:23:07 | INFO | train_inner | epoch 003: 2147 / 3002 loss=2.618, ppl=6.14, wps=5812.4, ups=0.09, wpb=64774, bsz=128, num_updates=8098, lr=9.99432e-05, gnorm=2.138, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92661 2021-06-19 20:23:18 | INFO | train_inner | epoch 003: 2148 / 3002 loss=2.541, ppl=5.82, wps=5863.9, ups=0.09, wpb=64884, bsz=128, num_updates=8099, lr=9.99432e-05, gnorm=2.045, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92672 2021-06-19 20:23:29 | INFO | train_inner | epoch 003: 2149 / 3002 loss=2.648, ppl=6.27, wps=5790.6, ups=0.09, wpb=64755, bsz=128, num_updates=8100, lr=9.99432e-05, gnorm=2.147, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92684 2021-06-19 20:23:40 | INFO | train_inner | epoch 003: 2150 / 3002 loss=2.598, ppl=6.05, wps=5896.3, ups=0.09, wpb=64811, bsz=128, num_updates=8101, lr=9.99432e-05, gnorm=2.11, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92695 2021-06-19 20:23:51 | INFO | train_inner | epoch 003: 2151 / 3002 loss=2.716, ppl=6.57, wps=5789.4, ups=0.09, wpb=64782, bsz=128, num_updates=8102, lr=9.99432e-05, gnorm=2.251, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92706 2021-06-19 20:24:03 | INFO | train_inner | epoch 003: 2152 / 3002 loss=2.845, ppl=7.18, wps=5805, ups=0.09, wpb=64747, bsz=128, num_updates=8103, lr=9.99432e-05, gnorm=2.116, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92717 2021-06-19 20:24:14 | INFO | train_inner | epoch 003: 2153 / 3002 loss=2.541, ppl=5.82, wps=5903.5, ups=0.09, wpb=64792, bsz=128, num_updates=8104, lr=9.99432e-05, gnorm=2.418, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92728 2021-06-19 20:24:25 | INFO | train_inner | epoch 003: 2154 / 3002 loss=2.476, ppl=5.56, wps=5895.6, ups=0.09, wpb=64980, bsz=128, num_updates=8105, lr=9.99432e-05, gnorm=2.143, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92739 2021-06-19 20:24:36 | INFO | train_inner | epoch 003: 2155 / 3002 loss=2.581, ppl=5.99, wps=5828.6, ups=0.09, wpb=64824, bsz=128, num_updates=8106, lr=9.99431e-05, gnorm=2.151, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92750 2021-06-19 20:24:47 | INFO | train_inner | epoch 003: 2156 / 3002 loss=2.647, ppl=6.26, wps=5742.1, ups=0.09, wpb=64784, bsz=128, num_updates=8107, lr=9.99431e-05, gnorm=2.055, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92761 2021-06-19 20:24:58 | INFO | train_inner | epoch 003: 2157 / 3002 loss=2.792, ppl=6.93, wps=5922.8, ups=0.09, wpb=64854, bsz=128, num_updates=8108, lr=9.99431e-05, gnorm=2.217, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=92772 2021-06-19 20:25:09 | INFO | train_inner | epoch 003: 2158 / 3002 loss=2.52, ppl=5.74, wps=5838.8, ups=0.09, wpb=64765, bsz=128, num_updates=8109, lr=9.99431e-05, gnorm=2.087, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92783 2021-06-19 20:25:20 | INFO | train_inner | epoch 003: 2159 / 3002 loss=2.606, ppl=6.09, wps=5794.4, ups=0.09, wpb=64857, bsz=128, num_updates=8110, lr=9.99431e-05, gnorm=2.487, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92795 2021-06-19 20:25:31 | INFO | train_inner | epoch 003: 2160 / 3002 loss=2.636, ppl=6.22, wps=5828.9, ups=0.09, wpb=64898, bsz=128, num_updates=8111, lr=9.99431e-05, gnorm=2.113, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92806 2021-06-19 20:25:42 | INFO | train_inner | epoch 003: 2161 / 3002 loss=2.57, ppl=5.94, wps=5935.5, ups=0.09, wpb=64741, bsz=128, num_updates=8112, lr=9.99431e-05, gnorm=2.136, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=92817 2021-06-19 20:25:53 | INFO | train_inner | epoch 003: 2162 / 3002 loss=2.551, ppl=5.86, wps=5886.2, ups=0.09, wpb=64890, bsz=128, num_updates=8113, lr=9.99431e-05, gnorm=2.144, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92828 2021-06-19 20:26:04 | INFO | train_inner | epoch 003: 2163 / 3002 loss=2.717, ppl=6.57, wps=5983.3, ups=0.09, wpb=64873, bsz=128, num_updates=8114, lr=9.99431e-05, gnorm=2.158, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=92838 2021-06-19 20:26:15 | INFO | train_inner | epoch 003: 2164 / 3002 loss=2.498, ppl=5.65, wps=5883.9, ups=0.09, wpb=64768, bsz=128, num_updates=8115, lr=9.99431e-05, gnorm=2.24, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92849 2021-06-19 20:26:26 | INFO | train_inner | epoch 003: 2165 / 3002 loss=2.6, ppl=6.06, wps=5900.2, ups=0.09, wpb=64797, bsz=128, num_updates=8116, lr=9.99431e-05, gnorm=39.753, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92860 2021-06-19 20:26:37 | INFO | train_inner | epoch 003: 2166 / 3002 loss=2.655, ppl=6.3, wps=5906.7, ups=0.09, wpb=64727, bsz=128, num_updates=8117, lr=9.99431e-05, gnorm=2.665, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92871 2021-06-19 20:26:48 | INFO | train_inner | epoch 003: 2167 / 3002 loss=2.607, ppl=6.09, wps=5835.9, ups=0.09, wpb=64854, bsz=128, num_updates=8118, lr=9.99431e-05, gnorm=2.074, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92882 2021-06-19 20:26:59 | INFO | train_inner | epoch 003: 2168 / 3002 loss=2.543, ppl=5.83, wps=5967, ups=0.09, wpb=64819, bsz=128, num_updates=8119, lr=9.9943e-05, gnorm=2.19, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=92893 2021-06-19 20:27:10 | INFO | train_inner | epoch 003: 2169 / 3002 loss=2.604, ppl=6.08, wps=5904.9, ups=0.09, wpb=64868, bsz=128, num_updates=8120, lr=9.9943e-05, gnorm=2.138, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92904 2021-06-19 20:27:21 | INFO | train_inner | epoch 003: 2170 / 3002 loss=2.668, ppl=6.36, wps=5856.3, ups=0.09, wpb=64830, bsz=128, num_updates=8121, lr=9.9943e-05, gnorm=2.151, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92915 2021-06-19 20:27:32 | INFO | train_inner | epoch 003: 2171 / 3002 loss=2.613, ppl=6.12, wps=5807.7, ups=0.09, wpb=64879, bsz=128, num_updates=8122, lr=9.9943e-05, gnorm=2.24, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92927 2021-06-19 20:27:43 | INFO | train_inner | epoch 003: 2172 / 3002 loss=2.717, ppl=6.58, wps=5809.5, ups=0.09, wpb=64849, bsz=128, num_updates=8123, lr=9.9943e-05, gnorm=2.197, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92938 2021-06-19 20:27:55 | INFO | train_inner | epoch 003: 2173 / 3002 loss=2.619, ppl=6.14, wps=5817.9, ups=0.09, wpb=64864, bsz=128, num_updates=8124, lr=9.9943e-05, gnorm=3.018, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92949 2021-06-19 20:28:06 | INFO | train_inner | epoch 003: 2174 / 3002 loss=2.683, ppl=6.42, wps=5727.7, ups=0.09, wpb=64847, bsz=128, num_updates=8125, lr=9.9943e-05, gnorm=2.329, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92960 2021-06-19 20:28:17 | INFO | train_inner | epoch 003: 2175 / 3002 loss=2.53, ppl=5.78, wps=5706.5, ups=0.09, wpb=64784, bsz=128, num_updates=8126, lr=9.9943e-05, gnorm=8.218, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92972 2021-06-19 20:28:28 | INFO | train_inner | epoch 003: 2176 / 3002 loss=2.621, ppl=6.15, wps=5928.3, ups=0.09, wpb=64840, bsz=128, num_updates=8127, lr=9.9943e-05, gnorm=8.159, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=92982 2021-06-19 20:28:39 | INFO | train_inner | epoch 003: 2177 / 3002 loss=2.647, ppl=6.26, wps=5853, ups=0.09, wpb=64874, bsz=128, num_updates=8128, lr=9.9943e-05, gnorm=2.158, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92994 2021-06-19 20:28:50 | INFO | train_inner | epoch 003: 2178 / 3002 loss=2.685, ppl=6.43, wps=5886, ups=0.09, wpb=64789, bsz=128, num_updates=8129, lr=9.9943e-05, gnorm=2.2, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93005 2021-06-19 20:29:01 | INFO | train_inner | epoch 003: 2179 / 3002 loss=2.514, ppl=5.71, wps=5779.7, ups=0.09, wpb=64843, bsz=128, num_updates=8130, lr=9.9943e-05, gnorm=2.066, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93016 2021-06-19 20:29:13 | INFO | train_inner | epoch 003: 2180 / 3002 loss=2.674, ppl=6.38, wps=5815.2, ups=0.09, wpb=64851, bsz=128, num_updates=8131, lr=9.99429e-05, gnorm=2.119, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93027 2021-06-19 20:29:23 | INFO | train_inner | epoch 003: 2181 / 3002 loss=2.591, ppl=6.03, wps=5981.1, ups=0.09, wpb=64923, bsz=128, num_updates=8132, lr=9.99429e-05, gnorm=2.239, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93038 2021-06-19 20:29:35 | INFO | train_inner | epoch 003: 2182 / 3002 loss=2.602, ppl=6.07, wps=5767.6, ups=0.09, wpb=64824, bsz=128, num_updates=8133, lr=9.99429e-05, gnorm=2.167, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93049 2021-06-19 20:29:46 | INFO | train_inner | epoch 003: 2183 / 3002 loss=2.656, ppl=6.3, wps=5799.5, ups=0.09, wpb=64771, bsz=128, num_updates=8134, lr=9.99429e-05, gnorm=2.198, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93060 2021-06-19 20:29:57 | INFO | train_inner | epoch 003: 2184 / 3002 loss=2.839, ppl=7.15, wps=5838.5, ups=0.09, wpb=64860, bsz=128, num_updates=8135, lr=9.99429e-05, gnorm=2.096, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93071 2021-06-19 20:30:08 | INFO | train_inner | epoch 003: 2185 / 3002 loss=2.816, ppl=7.04, wps=5902.8, ups=0.09, wpb=64868, bsz=128, num_updates=8136, lr=9.99429e-05, gnorm=2.254, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93082 2021-06-19 20:30:19 | INFO | train_inner | epoch 003: 2186 / 3002 loss=2.697, ppl=6.48, wps=5807, ups=0.09, wpb=64861, bsz=128, num_updates=8137, lr=9.99429e-05, gnorm=2.203, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93093 2021-06-19 20:30:30 | INFO | train_inner | epoch 003: 2187 / 3002 loss=2.74, ppl=6.68, wps=5829.4, ups=0.09, wpb=64809, bsz=128, num_updates=8138, lr=9.99429e-05, gnorm=2.159, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93105 2021-06-19 20:30:41 | INFO | train_inner | epoch 003: 2188 / 3002 loss=2.605, ppl=6.09, wps=5877.8, ups=0.09, wpb=64849, bsz=128, num_updates=8139, lr=9.99429e-05, gnorm=2.159, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93116 2021-06-19 20:30:52 | INFO | train_inner | epoch 003: 2189 / 3002 loss=2.42, ppl=5.35, wps=5873.3, ups=0.09, wpb=64800, bsz=128, num_updates=8140, lr=9.99429e-05, gnorm=2.21, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93127 2021-06-19 20:31:03 | INFO | train_inner | epoch 003: 2190 / 3002 loss=2.586, ppl=6, wps=5875.4, ups=0.09, wpb=64836, bsz=128, num_updates=8141, lr=9.99429e-05, gnorm=2.123, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93138 2021-06-19 20:31:14 | INFO | train_inner | epoch 003: 2191 / 3002 loss=2.594, ppl=6.04, wps=5852, ups=0.09, wpb=64818, bsz=128, num_updates=8142, lr=9.99429e-05, gnorm=2.137, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93149 2021-06-19 20:31:25 | INFO | train_inner | epoch 003: 2192 / 3002 loss=2.537, ppl=5.8, wps=5878.7, ups=0.09, wpb=64837, bsz=128, num_updates=8143, lr=9.99429e-05, gnorm=2.325, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93160 2021-06-19 20:31:36 | INFO | train_inner | epoch 003: 2193 / 3002 loss=2.459, ppl=5.5, wps=5913.8, ups=0.09, wpb=64805, bsz=128, num_updates=8144, lr=9.99428e-05, gnorm=2.364, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93171 2021-06-19 20:31:48 | INFO | train_inner | epoch 003: 2194 / 3002 loss=2.656, ppl=6.3, wps=5817.6, ups=0.09, wpb=64832, bsz=128, num_updates=8145, lr=9.99428e-05, gnorm=2.265, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93182 2021-06-19 20:31:59 | INFO | train_inner | epoch 003: 2195 / 3002 loss=2.612, ppl=6.11, wps=5858.7, ups=0.09, wpb=64823, bsz=128, num_updates=8146, lr=9.99428e-05, gnorm=17.744, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93193 2021-06-19 20:32:10 | INFO | train_inner | epoch 003: 2196 / 3002 loss=2.579, ppl=5.98, wps=5760, ups=0.09, wpb=64876, bsz=128, num_updates=8147, lr=9.99428e-05, gnorm=2.449, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93204 2021-06-19 20:32:21 | INFO | train_inner | epoch 003: 2197 / 3002 loss=2.527, ppl=5.76, wps=5834.7, ups=0.09, wpb=64762, bsz=128, num_updates=8148, lr=9.99428e-05, gnorm=2.046, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93215 2021-06-19 20:32:32 | INFO | train_inner | epoch 003: 2198 / 3002 loss=2.654, ppl=6.29, wps=5854, ups=0.09, wpb=64797, bsz=128, num_updates=8149, lr=9.99428e-05, gnorm=2.071, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93226 2021-06-19 20:32:43 | INFO | train_inner | epoch 003: 2199 / 3002 loss=2.61, ppl=6.11, wps=5853.9, ups=0.09, wpb=64808, bsz=128, num_updates=8150, lr=9.99428e-05, gnorm=2.172, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93237 2021-06-19 20:32:54 | INFO | train_inner | epoch 003: 2200 / 3002 loss=2.654, ppl=6.29, wps=5877.3, ups=0.09, wpb=64798, bsz=128, num_updates=8151, lr=9.99428e-05, gnorm=2.067, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93249 2021-06-19 20:33:05 | INFO | train_inner | epoch 003: 2201 / 3002 loss=2.777, ppl=6.85, wps=5896.2, ups=0.09, wpb=64813, bsz=128, num_updates=8152, lr=9.99428e-05, gnorm=2.05, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93260 2021-06-19 20:33:16 | INFO | train_inner | epoch 003: 2202 / 3002 loss=2.682, ppl=6.42, wps=5850.6, ups=0.09, wpb=64819, bsz=128, num_updates=8153, lr=9.99428e-05, gnorm=2.156, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93271 2021-06-19 20:33:27 | INFO | train_inner | epoch 003: 2203 / 3002 loss=2.708, ppl=6.53, wps=5948.2, ups=0.09, wpb=64829, bsz=128, num_updates=8154, lr=9.99428e-05, gnorm=2.109, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93281 2021-06-19 20:33:38 | INFO | train_inner | epoch 003: 2204 / 3002 loss=2.647, ppl=6.26, wps=5871.1, ups=0.09, wpb=64804, bsz=128, num_updates=8155, lr=9.99428e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93293 2021-06-19 20:33:49 | INFO | train_inner | epoch 003: 2205 / 3002 loss=2.551, ppl=5.86, wps=5745.3, ups=0.09, wpb=64817, bsz=128, num_updates=8156, lr=9.99427e-05, gnorm=2.169, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93304 2021-06-19 20:34:01 | INFO | train_inner | epoch 003: 2206 / 3002 loss=2.665, ppl=6.34, wps=5847.1, ups=0.09, wpb=64875, bsz=128, num_updates=8157, lr=9.99427e-05, gnorm=2.084, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93315 2021-06-19 20:34:12 | INFO | train_inner | epoch 003: 2207 / 3002 loss=2.503, ppl=5.67, wps=5759.2, ups=0.09, wpb=64892, bsz=128, num_updates=8158, lr=9.99427e-05, gnorm=2.159, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93326 2021-06-19 20:34:23 | INFO | train_inner | epoch 003: 2208 / 3002 loss=2.648, ppl=6.27, wps=6052.9, ups=0.09, wpb=64975, bsz=128, num_updates=8159, lr=9.99427e-05, gnorm=2.262, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93337 2021-06-19 20:34:34 | INFO | train_inner | epoch 003: 2209 / 3002 loss=2.573, ppl=5.95, wps=5797.7, ups=0.09, wpb=64903, bsz=128, num_updates=8160, lr=9.99427e-05, gnorm=2.274, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93348 2021-06-19 20:34:45 | INFO | train_inner | epoch 003: 2210 / 3002 loss=2.549, ppl=5.85, wps=5863.8, ups=0.09, wpb=64785, bsz=128, num_updates=8161, lr=9.99427e-05, gnorm=4.683, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93359 2021-06-19 20:34:56 | INFO | train_inner | epoch 003: 2211 / 3002 loss=2.804, ppl=6.98, wps=5804.6, ups=0.09, wpb=64742, bsz=128, num_updates=8162, lr=9.99427e-05, gnorm=2.205, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93370 2021-06-19 20:35:07 | INFO | train_inner | epoch 003: 2212 / 3002 loss=2.695, ppl=6.47, wps=5894.5, ups=0.09, wpb=64898, bsz=128, num_updates=8163, lr=9.99427e-05, gnorm=2.231, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93381 2021-06-19 20:35:18 | INFO | train_inner | epoch 003: 2213 / 3002 loss=2.63, ppl=6.19, wps=5853.9, ups=0.09, wpb=64852, bsz=128, num_updates=8164, lr=9.99427e-05, gnorm=3.986, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93392 2021-06-19 20:35:29 | INFO | train_inner | epoch 003: 2214 / 3002 loss=2.775, ppl=6.85, wps=5875.8, ups=0.09, wpb=64863, bsz=128, num_updates=8165, lr=9.99427e-05, gnorm=2.133, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93403 2021-06-19 20:35:40 | INFO | train_inner | epoch 003: 2215 / 3002 loss=2.787, ppl=6.9, wps=5864.6, ups=0.09, wpb=64843, bsz=128, num_updates=8166, lr=9.99427e-05, gnorm=5.294, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93414 2021-06-19 20:35:51 | INFO | train_inner | epoch 003: 2216 / 3002 loss=2.719, ppl=6.59, wps=5979.9, ups=0.09, wpb=64937, bsz=128, num_updates=8167, lr=9.99427e-05, gnorm=2.177, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93425 2021-06-19 20:36:02 | INFO | train_inner | epoch 003: 2217 / 3002 loss=2.79, ppl=6.92, wps=5796.8, ups=0.09, wpb=64842, bsz=128, num_updates=8168, lr=9.99427e-05, gnorm=2.249, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93437 2021-06-19 20:36:13 | INFO | train_inner | epoch 003: 2218 / 3002 loss=2.844, ppl=7.18, wps=5861.1, ups=0.09, wpb=64763, bsz=128, num_updates=8169, lr=9.99426e-05, gnorm=2.251, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93448 2021-06-19 20:36:24 | INFO | train_inner | epoch 003: 2219 / 3002 loss=2.59, ppl=6.02, wps=5810.5, ups=0.09, wpb=64880, bsz=128, num_updates=8170, lr=9.99426e-05, gnorm=10.445, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93459 2021-06-19 20:36:35 | INFO | train_inner | epoch 003: 2220 / 3002 loss=2.563, ppl=5.91, wps=5935.7, ups=0.09, wpb=64862, bsz=128, num_updates=8171, lr=9.99426e-05, gnorm=2.156, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93470 2021-06-19 20:36:46 | INFO | train_inner | epoch 003: 2221 / 3002 loss=2.586, ppl=6, wps=5903.3, ups=0.09, wpb=64907, bsz=128, num_updates=8172, lr=9.99426e-05, gnorm=2.276, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93481 2021-06-19 20:36:57 | INFO | train_inner | epoch 003: 2222 / 3002 loss=2.668, ppl=6.36, wps=5836.4, ups=0.09, wpb=64820, bsz=128, num_updates=8173, lr=9.99426e-05, gnorm=2.239, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93492 2021-06-19 20:37:09 | INFO | train_inner | epoch 003: 2223 / 3002 loss=2.607, ppl=6.09, wps=5708.3, ups=0.09, wpb=64775, bsz=128, num_updates=8174, lr=9.99426e-05, gnorm=2.139, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93503 2021-06-19 20:37:20 | INFO | train_inner | epoch 003: 2224 / 3002 loss=2.594, ppl=6.04, wps=5818.9, ups=0.09, wpb=64867, bsz=128, num_updates=8175, lr=9.99426e-05, gnorm=2.332, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93514 2021-06-19 20:37:31 | INFO | train_inner | epoch 003: 2225 / 3002 loss=2.608, ppl=6.1, wps=5833.9, ups=0.09, wpb=64916, bsz=128, num_updates=8176, lr=9.99426e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93525 2021-06-19 20:37:42 | INFO | train_inner | epoch 003: 2226 / 3002 loss=2.692, ppl=6.46, wps=5729.6, ups=0.09, wpb=64829, bsz=128, num_updates=8177, lr=9.99426e-05, gnorm=2.435, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93537 2021-06-19 20:37:54 | INFO | train_inner | epoch 003: 2227 / 3002 loss=2.71, ppl=6.54, wps=5770.5, ups=0.09, wpb=64809, bsz=128, num_updates=8178, lr=9.99426e-05, gnorm=2.194, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93548 2021-06-19 20:38:05 | INFO | train_inner | epoch 003: 2228 / 3002 loss=2.588, ppl=6.01, wps=5849.1, ups=0.09, wpb=64830, bsz=128, num_updates=8179, lr=9.99426e-05, gnorm=2.504, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93559 2021-06-19 20:38:16 | INFO | train_inner | epoch 003: 2229 / 3002 loss=2.461, ppl=5.51, wps=5887.8, ups=0.09, wpb=64820, bsz=128, num_updates=8180, lr=9.99426e-05, gnorm=2.142, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93570 2021-06-19 20:38:27 | INFO | train_inner | epoch 003: 2230 / 3002 loss=2.712, ppl=6.55, wps=5836.1, ups=0.09, wpb=64473, bsz=128, num_updates=8181, lr=9.99425e-05, gnorm=2.122, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93581 2021-06-19 20:38:38 | INFO | train_inner | epoch 003: 2231 / 3002 loss=2.741, ppl=6.69, wps=5839, ups=0.09, wpb=64761, bsz=128, num_updates=8182, lr=9.99425e-05, gnorm=2.918, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93592 2021-06-19 20:38:49 | INFO | train_inner | epoch 003: 2232 / 3002 loss=2.676, ppl=6.39, wps=5826.4, ups=0.09, wpb=64792, bsz=128, num_updates=8183, lr=9.99425e-05, gnorm=2.324, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93603 2021-06-19 20:39:00 | INFO | train_inner | epoch 003: 2233 / 3002 loss=2.646, ppl=6.26, wps=5900.5, ups=0.09, wpb=64875, bsz=128, num_updates=8184, lr=9.99425e-05, gnorm=4.895, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93614 2021-06-19 20:39:11 | INFO | train_inner | epoch 003: 2234 / 3002 loss=2.723, ppl=6.6, wps=5706.2, ups=0.09, wpb=64821, bsz=128, num_updates=8185, lr=9.99425e-05, gnorm=2.249, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93626 2021-06-19 20:39:23 | INFO | train_inner | epoch 003: 2235 / 3002 loss=2.749, ppl=6.72, wps=5677.6, ups=0.09, wpb=64782, bsz=128, num_updates=8186, lr=9.99425e-05, gnorm=2.087, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93637 2021-06-19 20:39:34 | INFO | train_inner | epoch 003: 2236 / 3002 loss=2.654, ppl=6.29, wps=5928.2, ups=0.09, wpb=64812, bsz=128, num_updates=8187, lr=9.99425e-05, gnorm=1.99, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93648 2021-06-19 20:39:45 | INFO | train_inner | epoch 003: 2237 / 3002 loss=2.516, ppl=5.72, wps=5817.6, ups=0.09, wpb=64900, bsz=128, num_updates=8188, lr=9.99425e-05, gnorm=2.271, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93659 2021-06-19 20:39:56 | INFO | train_inner | epoch 003: 2238 / 3002 loss=2.551, ppl=5.86, wps=6052.9, ups=0.09, wpb=64904, bsz=128, num_updates=8189, lr=9.99425e-05, gnorm=2.173, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93670 2021-06-19 20:40:07 | INFO | train_inner | epoch 003: 2239 / 3002 loss=2.562, ppl=5.91, wps=5893.8, ups=0.09, wpb=64861, bsz=128, num_updates=8190, lr=9.99425e-05, gnorm=2.102, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93681 2021-06-19 20:40:18 | INFO | train_inner | epoch 003: 2240 / 3002 loss=2.637, ppl=6.22, wps=5768.1, ups=0.09, wpb=64785, bsz=128, num_updates=8191, lr=9.99425e-05, gnorm=2.281, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93692 2021-06-19 20:40:29 | INFO | train_inner | epoch 003: 2241 / 3002 loss=2.656, ppl=6.3, wps=5818.5, ups=0.09, wpb=64809, bsz=128, num_updates=8192, lr=9.99425e-05, gnorm=2.332, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93703 2021-06-19 20:40:40 | INFO | train_inner | epoch 003: 2242 / 3002 loss=2.656, ppl=6.3, wps=5790.6, ups=0.09, wpb=64877, bsz=128, num_updates=8193, lr=9.99425e-05, gnorm=2.211, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93714 2021-06-19 20:40:51 | INFO | train_inner | epoch 003: 2243 / 3002 loss=2.583, ppl=5.99, wps=5794.6, ups=0.09, wpb=64730, bsz=128, num_updates=8194, lr=9.99424e-05, gnorm=2.105, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93726 2021-06-19 20:41:02 | INFO | train_inner | epoch 003: 2244 / 3002 loss=2.759, ppl=6.77, wps=5855.2, ups=0.09, wpb=64858, bsz=128, num_updates=8195, lr=9.99424e-05, gnorm=2.215, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93737 2021-06-19 20:41:13 | INFO | train_inner | epoch 003: 2245 / 3002 loss=2.702, ppl=6.51, wps=5852.3, ups=0.09, wpb=64856, bsz=128, num_updates=8196, lr=9.99424e-05, gnorm=2.121, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93748 2021-06-19 20:41:25 | INFO | train_inner | epoch 003: 2246 / 3002 loss=2.707, ppl=6.53, wps=5742.5, ups=0.09, wpb=64814, bsz=128, num_updates=8197, lr=9.99424e-05, gnorm=2.37, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93759 2021-06-19 20:41:36 | INFO | train_inner | epoch 003: 2247 / 3002 loss=2.595, ppl=6.04, wps=5923.4, ups=0.09, wpb=64883, bsz=128, num_updates=8198, lr=9.99424e-05, gnorm=2.286, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93770 2021-06-19 20:41:47 | INFO | train_inner | epoch 003: 2248 / 3002 loss=2.64, ppl=6.23, wps=5976.2, ups=0.09, wpb=64862, bsz=128, num_updates=8199, lr=9.99424e-05, gnorm=2.171, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93781 2021-06-19 20:41:58 | INFO | train_inner | epoch 003: 2249 / 3002 loss=2.809, ppl=7.01, wps=5863.5, ups=0.09, wpb=64713, bsz=128, num_updates=8200, lr=9.99424e-05, gnorm=2.335, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93792 2021-06-19 20:42:09 | INFO | train_inner | epoch 003: 2250 / 3002 loss=2.581, ppl=5.99, wps=5914.1, ups=0.09, wpb=64752, bsz=128, num_updates=8201, lr=9.99424e-05, gnorm=2.264, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93803 2021-06-19 20:42:20 | INFO | train_inner | epoch 003: 2251 / 3002 loss=2.556, ppl=5.88, wps=5809.1, ups=0.09, wpb=64799, bsz=128, num_updates=8202, lr=9.99424e-05, gnorm=2.212, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93814 2021-06-19 20:42:31 | INFO | train_inner | epoch 003: 2252 / 3002 loss=2.647, ppl=6.26, wps=5918.6, ups=0.09, wpb=64805, bsz=128, num_updates=8203, lr=9.99424e-05, gnorm=2.146, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93825 2021-06-19 20:42:42 | INFO | train_inner | epoch 003: 2253 / 3002 loss=2.729, ppl=6.63, wps=5877.1, ups=0.09, wpb=64860, bsz=128, num_updates=8204, lr=9.99424e-05, gnorm=2.099, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93836 2021-06-19 20:42:53 | INFO | train_inner | epoch 003: 2254 / 3002 loss=2.696, ppl=6.48, wps=5718.6, ups=0.09, wpb=64765, bsz=128, num_updates=8205, lr=9.99424e-05, gnorm=2.185, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93847 2021-06-19 20:43:04 | INFO | train_inner | epoch 003: 2255 / 3002 loss=2.669, ppl=6.36, wps=5894.4, ups=0.09, wpb=64854, bsz=128, num_updates=8206, lr=9.99423e-05, gnorm=2.092, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93858 2021-06-19 20:43:15 | INFO | train_inner | epoch 003: 2256 / 3002 loss=2.621, ppl=6.15, wps=5948.4, ups=0.09, wpb=64862, bsz=128, num_updates=8207, lr=9.99423e-05, gnorm=2.329, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93869 2021-06-19 20:43:26 | INFO | train_inner | epoch 003: 2257 / 3002 loss=2.481, ppl=5.58, wps=5833.5, ups=0.09, wpb=64823, bsz=128, num_updates=8208, lr=9.99423e-05, gnorm=2.487, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93880 2021-06-19 20:43:37 | INFO | train_inner | epoch 003: 2258 / 3002 loss=2.709, ppl=6.54, wps=5871.8, ups=0.09, wpb=64882, bsz=128, num_updates=8209, lr=9.99423e-05, gnorm=6.786, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93891 2021-06-19 20:43:48 | INFO | train_inner | epoch 003: 2259 / 3002 loss=2.716, ppl=6.57, wps=5886, ups=0.09, wpb=64837, bsz=128, num_updates=8210, lr=9.99423e-05, gnorm=2.12, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93902 2021-06-19 20:43:59 | INFO | train_inner | epoch 003: 2260 / 3002 loss=2.69, ppl=6.45, wps=5791.9, ups=0.09, wpb=64765, bsz=128, num_updates=8211, lr=9.99423e-05, gnorm=2.11, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93914 2021-06-19 20:44:10 | INFO | train_inner | epoch 003: 2261 / 3002 loss=2.52, ppl=5.73, wps=5833.5, ups=0.09, wpb=64771, bsz=128, num_updates=8212, lr=9.99423e-05, gnorm=2.242, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93925 2021-06-19 20:44:22 | INFO | train_inner | epoch 003: 2262 / 3002 loss=2.689, ppl=6.45, wps=5834.7, ups=0.09, wpb=64937, bsz=128, num_updates=8213, lr=9.99423e-05, gnorm=2.246, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93936 2021-06-19 20:44:32 | INFO | train_inner | epoch 003: 2263 / 3002 loss=2.652, ppl=6.29, wps=5992.6, ups=0.09, wpb=64875, bsz=128, num_updates=8214, lr=9.99423e-05, gnorm=6.825, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93947 2021-06-19 20:44:43 | INFO | train_inner | epoch 003: 2264 / 3002 loss=2.666, ppl=6.34, wps=5826.2, ups=0.09, wpb=64919, bsz=128, num_updates=8215, lr=9.99423e-05, gnorm=2.279, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93958 2021-06-19 20:44:55 | INFO | train_inner | epoch 003: 2265 / 3002 loss=2.648, ppl=6.27, wps=5755.9, ups=0.09, wpb=64825, bsz=128, num_updates=8216, lr=9.99423e-05, gnorm=2.717, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93969 2021-06-19 20:45:06 | INFO | train_inner | epoch 003: 2266 / 3002 loss=2.733, ppl=6.65, wps=5824.7, ups=0.09, wpb=64841, bsz=128, num_updates=8217, lr=9.99423e-05, gnorm=2.854, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93980 2021-06-19 20:45:17 | INFO | train_inner | epoch 003: 2267 / 3002 loss=2.655, ppl=6.3, wps=6011.6, ups=0.09, wpb=64880, bsz=128, num_updates=8218, lr=9.99423e-05, gnorm=2.357, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93991 2021-06-19 20:45:28 | INFO | train_inner | epoch 003: 2268 / 3002 loss=2.693, ppl=6.46, wps=5878.8, ups=0.09, wpb=64822, bsz=128, num_updates=8219, lr=9.99422e-05, gnorm=2.5, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=94002 2021-06-19 20:45:39 | INFO | train_inner | epoch 003: 2269 / 3002 loss=2.911, ppl=7.52, wps=5769.8, ups=0.09, wpb=64856, bsz=128, num_updates=8220, lr=9.99422e-05, gnorm=4.351, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=94013 2021-06-19 20:45:50 | INFO | train_inner | epoch 003: 2270 / 3002 loss=2.657, ppl=6.31, wps=5840.3, ups=0.09, wpb=64775, bsz=128, num_updates=8221, lr=9.99422e-05, gnorm=2.23, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=94024 2021-06-19 20:46:01 | INFO | train_inner | epoch 003: 2271 / 3002 loss=2.702, ppl=6.51, wps=5805.1, ups=0.09, wpb=64781, bsz=128, num_updates=8222, lr=9.99422e-05, gnorm=2.218, loss_scale=1, train_wall=11, gb_free=2.8, wall=94036 2021-06-19 20:46:12 | INFO | train_inner | epoch 003: 2272 / 3002 loss=2.536, ppl=5.8, wps=5737, ups=0.09, wpb=64824, bsz=128, num_updates=8223, lr=9.99422e-05, gnorm=2.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=94047 2021-06-19 20:46:24 | INFO | train_inner | epoch 003: 2273 / 3002 loss=2.634, ppl=6.21, wps=5828.6, ups=0.09, wpb=64806, bsz=128, num_updates=8224, lr=9.99422e-05, gnorm=2.196, loss_scale=1, train_wall=11, gb_free=2.8, wall=94058 2021-06-19 20:46:35 | INFO | train_inner | epoch 003: 2274 / 3002 loss=2.681, ppl=6.41, wps=5842.9, ups=0.09, wpb=64790, bsz=128, num_updates=8225, lr=9.99422e-05, gnorm=3.763, loss_scale=1, train_wall=11, gb_free=2.8, wall=94069 2021-06-19 20:46:46 | INFO | train_inner | epoch 003: 2275 / 3002 loss=2.62, ppl=6.15, wps=5861.8, ups=0.09, wpb=64824, bsz=128, num_updates=8226, lr=9.99422e-05, gnorm=2.484, loss_scale=1, train_wall=11, gb_free=2.8, wall=94080 2021-06-19 20:46:57 | INFO | train_inner | epoch 003: 2276 / 3002 loss=2.533, ppl=5.79, wps=5778, ups=0.09, wpb=64934, bsz=128, num_updates=8227, lr=9.99422e-05, gnorm=2.228, loss_scale=1, train_wall=11, gb_free=2.8, wall=94091 2021-06-19 20:47:08 | INFO | train_inner | epoch 003: 2277 / 3002 loss=2.473, ppl=5.55, wps=5811.5, ups=0.09, wpb=64806, bsz=128, num_updates=8228, lr=9.99422e-05, gnorm=2.427, loss_scale=1, train_wall=11, gb_free=2.8, wall=94102 2021-06-19 20:47:19 | INFO | train_inner | epoch 003: 2278 / 3002 loss=2.704, ppl=6.52, wps=5909.5, ups=0.09, wpb=64875, bsz=128, num_updates=8229, lr=9.99422e-05, gnorm=2.386, loss_scale=1, train_wall=11, gb_free=2.8, wall=94113 2021-06-19 20:47:30 | INFO | train_inner | epoch 003: 2279 / 3002 loss=2.647, ppl=6.26, wps=5717.1, ups=0.09, wpb=64855, bsz=128, num_updates=8230, lr=9.99422e-05, gnorm=2.215, loss_scale=1, train_wall=11, gb_free=2.8, wall=94125 2021-06-19 20:47:42 | INFO | train_inner | epoch 003: 2280 / 3002 loss=2.662, ppl=6.33, wps=5865.9, ups=0.09, wpb=64828, bsz=128, num_updates=8231, lr=9.99421e-05, gnorm=2.163, loss_scale=1, train_wall=11, gb_free=2.8, wall=94136 2021-06-19 20:47:53 | INFO | train_inner | epoch 003: 2281 / 3002 loss=2.554, ppl=5.87, wps=5767.3, ups=0.09, wpb=64931, bsz=128, num_updates=8232, lr=9.99421e-05, gnorm=6.642, loss_scale=1, train_wall=11, gb_free=2.8, wall=94147 2021-06-19 20:48:04 | INFO | train_inner | epoch 003: 2282 / 3002 loss=2.682, ppl=6.42, wps=5891.1, ups=0.09, wpb=64879, bsz=128, num_updates=8233, lr=9.99421e-05, gnorm=2.136, loss_scale=1, train_wall=11, gb_free=2.8, wall=94158 2021-06-19 20:48:15 | INFO | train_inner | epoch 003: 2283 / 3002 loss=2.45, ppl=5.47, wps=5908.5, ups=0.09, wpb=64879, bsz=128, num_updates=8234, lr=9.99421e-05, gnorm=2.088, loss_scale=1, train_wall=11, gb_free=2.8, wall=94169 2021-06-19 20:48:26 | INFO | train_inner | epoch 003: 2284 / 3002 loss=2.672, ppl=6.38, wps=5782.3, ups=0.09, wpb=64816, bsz=128, num_updates=8235, lr=9.99421e-05, gnorm=2.542, loss_scale=1, train_wall=11, gb_free=2.8, wall=94180 2021-06-19 20:48:37 | INFO | train_inner | epoch 003: 2285 / 3002 loss=2.716, ppl=6.57, wps=5976.3, ups=0.09, wpb=64761, bsz=128, num_updates=8236, lr=9.99421e-05, gnorm=2.13, loss_scale=1, train_wall=10, gb_free=2.8, wall=94191 2021-06-19 20:48:48 | INFO | train_inner | epoch 003: 2286 / 3002 loss=2.591, ppl=6.03, wps=5905.4, ups=0.09, wpb=64898, bsz=128, num_updates=8237, lr=9.99421e-05, gnorm=2.115, loss_scale=1, train_wall=11, gb_free=2.8, wall=94202 2021-06-19 20:48:59 | INFO | train_inner | epoch 003: 2287 / 3002 loss=2.61, ppl=6.11, wps=5797, ups=0.09, wpb=64902, bsz=128, num_updates=8238, lr=9.99421e-05, gnorm=2.788, loss_scale=1, train_wall=11, gb_free=2.8, wall=94213 2021-06-19 20:49:10 | INFO | train_inner | epoch 003: 2288 / 3002 loss=2.575, ppl=5.96, wps=5849.2, ups=0.09, wpb=64748, bsz=128, num_updates=8239, lr=9.99421e-05, gnorm=2.264, loss_scale=1, train_wall=11, gb_free=2.8, wall=94224 2021-06-19 20:49:21 | INFO | train_inner | epoch 003: 2289 / 3002 loss=2.711, ppl=6.55, wps=5844.1, ups=0.09, wpb=64838, bsz=128, num_updates=8240, lr=9.99421e-05, gnorm=2.622, loss_scale=1, train_wall=11, gb_free=2.8, wall=94235 2021-06-19 20:49:32 | INFO | train_inner | epoch 003: 2290 / 3002 loss=2.759, ppl=6.77, wps=5853.2, ups=0.09, wpb=64859, bsz=128, num_updates=8241, lr=9.99421e-05, gnorm=2.239, loss_scale=1, train_wall=11, gb_free=2.8, wall=94247 2021-06-19 20:49:43 | INFO | train_inner | epoch 003: 2291 / 3002 loss=2.667, ppl=6.35, wps=5812.4, ups=0.09, wpb=64810, bsz=128, num_updates=8242, lr=9.99421e-05, gnorm=2.163, loss_scale=1, train_wall=11, gb_free=2.8, wall=94258 2021-06-19 20:49:55 | INFO | train_inner | epoch 003: 2292 / 3002 loss=2.61, ppl=6.1, wps=5811.8, ups=0.09, wpb=64856, bsz=128, num_updates=8243, lr=9.99421e-05, gnorm=2.257, loss_scale=1, train_wall=11, gb_free=2.8, wall=94269 2021-06-19 20:50:06 | INFO | train_inner | epoch 003: 2293 / 3002 loss=2.569, ppl=5.94, wps=5769.5, ups=0.09, wpb=64910, bsz=128, num_updates=8244, lr=9.9942e-05, gnorm=2.115, loss_scale=1, train_wall=11, gb_free=2.8, wall=94280 2021-06-19 20:50:17 | INFO | train_inner | epoch 003: 2294 / 3002 loss=2.567, ppl=5.93, wps=5765, ups=0.09, wpb=64899, bsz=128, num_updates=8245, lr=9.9942e-05, gnorm=2.053, loss_scale=1, train_wall=11, gb_free=2.8, wall=94291 2021-06-19 20:50:28 | INFO | train_inner | epoch 003: 2295 / 3002 loss=2.682, ppl=6.42, wps=5862.2, ups=0.09, wpb=64844, bsz=128, num_updates=8246, lr=9.9942e-05, gnorm=2.111, loss_scale=1, train_wall=11, gb_free=2.8, wall=94302 2021-06-19 20:50:39 | INFO | train_inner | epoch 003: 2296 / 3002 loss=2.641, ppl=6.24, wps=5774.9, ups=0.09, wpb=64848, bsz=128, num_updates=8247, lr=9.9942e-05, gnorm=2.198, loss_scale=1, train_wall=11, gb_free=2.8, wall=94314 2021-06-19 20:50:51 | INFO | train_inner | epoch 003: 2297 / 3002 loss=2.627, ppl=6.18, wps=5801.7, ups=0.09, wpb=64775, bsz=128, num_updates=8248, lr=9.9942e-05, gnorm=2.217, loss_scale=1, train_wall=11, gb_free=2.8, wall=94325 2021-06-19 20:51:02 | INFO | train_inner | epoch 003: 2298 / 3002 loss=2.542, ppl=5.82, wps=5892.5, ups=0.09, wpb=64838, bsz=128, num_updates=8249, lr=9.9942e-05, gnorm=2.098, loss_scale=1, train_wall=11, gb_free=2.8, wall=94336 2021-06-19 20:51:12 | INFO | train_inner | epoch 003: 2299 / 3002 loss=2.62, ppl=6.15, wps=5947.7, ups=0.09, wpb=64894, bsz=128, num_updates=8250, lr=9.9942e-05, gnorm=2.116, loss_scale=1, train_wall=10, gb_free=2.8, wall=94347 2021-06-19 20:51:23 | INFO | train_inner | epoch 003: 2300 / 3002 loss=2.596, ppl=6.04, wps=5915.8, ups=0.09, wpb=64854, bsz=128, num_updates=8251, lr=9.9942e-05, gnorm=2.105, loss_scale=1, train_wall=11, gb_free=2.8, wall=94358 2021-06-19 20:51:35 | INFO | train_inner | epoch 003: 2301 / 3002 loss=2.552, ppl=5.86, wps=5721.6, ups=0.09, wpb=64755, bsz=128, num_updates=8252, lr=9.9942e-05, gnorm=2.746, loss_scale=1, train_wall=11, gb_free=2.8, wall=94369 2021-06-19 20:51:46 | INFO | train_inner | epoch 003: 2302 / 3002 loss=2.524, ppl=5.75, wps=5908.5, ups=0.09, wpb=64732, bsz=128, num_updates=8253, lr=9.9942e-05, gnorm=2.17, loss_scale=1, train_wall=11, gb_free=2.8, wall=94380 2021-06-19 20:51:56 | INFO | train_inner | epoch 003: 2303 / 3002 loss=2.54, ppl=5.81, wps=6009.3, ups=0.09, wpb=64872, bsz=128, num_updates=8254, lr=9.9942e-05, gnorm=2.216, loss_scale=1, train_wall=10, gb_free=2.8, wall=94391 2021-06-19 20:52:08 | INFO | train_inner | epoch 003: 2304 / 3002 loss=2.533, ppl=5.79, wps=5822.5, ups=0.09, wpb=64898, bsz=128, num_updates=8255, lr=9.9942e-05, gnorm=2.159, loss_scale=1, train_wall=11, gb_free=2.8, wall=94402 2021-06-19 20:52:19 | INFO | train_inner | epoch 003: 2305 / 3002 loss=2.633, ppl=6.2, wps=5806.8, ups=0.09, wpb=64764, bsz=128, num_updates=8256, lr=9.99419e-05, gnorm=2.31, loss_scale=1, train_wall=11, gb_free=2.8, wall=94413 2021-06-19 20:52:30 | INFO | train_inner | epoch 003: 2306 / 3002 loss=2.53, ppl=5.78, wps=5819.9, ups=0.09, wpb=64883, bsz=128, num_updates=8257, lr=9.99419e-05, gnorm=2.116, loss_scale=1, train_wall=11, gb_free=2.8, wall=94424 2021-06-19 20:52:41 | INFO | train_inner | epoch 003: 2307 / 3002 loss=2.686, ppl=6.44, wps=5911.2, ups=0.09, wpb=64848, bsz=128, num_updates=8258, lr=9.99419e-05, gnorm=2.185, loss_scale=1, train_wall=11, gb_free=2.8, wall=94435 2021-06-19 20:52:52 | INFO | train_inner | epoch 003: 2308 / 3002 loss=2.582, ppl=5.99, wps=5782.8, ups=0.09, wpb=64820, bsz=128, num_updates=8259, lr=9.99419e-05, gnorm=2.167, loss_scale=1, train_wall=11, gb_free=2.8, wall=94446 2021-06-19 20:53:03 | INFO | train_inner | epoch 003: 2309 / 3002 loss=2.646, ppl=6.26, wps=5781.9, ups=0.09, wpb=64823, bsz=128, num_updates=8260, lr=9.99419e-05, gnorm=2.15, loss_scale=1, train_wall=11, gb_free=2.8, wall=94458 2021-06-19 20:53:14 | INFO | train_inner | epoch 003: 2310 / 3002 loss=2.632, ppl=6.2, wps=5926.5, ups=0.09, wpb=64807, bsz=128, num_updates=8261, lr=9.99419e-05, gnorm=2.608, loss_scale=1, train_wall=10, gb_free=2.8, wall=94469 2021-06-19 20:53:25 | INFO | train_inner | epoch 003: 2311 / 3002 loss=2.562, ppl=5.91, wps=5859.9, ups=0.09, wpb=64715, bsz=128, num_updates=8262, lr=9.99419e-05, gnorm=2.014, loss_scale=1, train_wall=11, gb_free=2.8, wall=94480 2021-06-19 20:53:36 | INFO | train_inner | epoch 003: 2312 / 3002 loss=2.653, ppl=6.29, wps=5808.4, ups=0.09, wpb=64888, bsz=128, num_updates=8263, lr=9.99419e-05, gnorm=2.079, loss_scale=1, train_wall=11, gb_free=2.8, wall=94491 2021-06-19 20:53:47 | INFO | train_inner | epoch 003: 2313 / 3002 loss=2.746, ppl=6.71, wps=5887.2, ups=0.09, wpb=64841, bsz=128, num_updates=8264, lr=9.99419e-05, gnorm=2.076, loss_scale=1, train_wall=11, gb_free=2.8, wall=94502 2021-06-19 20:53:59 | INFO | train_inner | epoch 003: 2314 / 3002 loss=2.491, ppl=5.62, wps=5735.1, ups=0.09, wpb=64744, bsz=128, num_updates=8265, lr=9.99419e-05, gnorm=2.091, loss_scale=1, train_wall=11, gb_free=2.8, wall=94513 2021-06-19 20:54:10 | INFO | train_inner | epoch 003: 2315 / 3002 loss=2.424, ppl=5.37, wps=5916.3, ups=0.09, wpb=64789, bsz=128, num_updates=8266, lr=9.99419e-05, gnorm=2.073, loss_scale=1, train_wall=10, gb_free=2.8, wall=94524 2021-06-19 20:54:21 | INFO | train_inner | epoch 003: 2316 / 3002 loss=2.572, ppl=5.95, wps=5864.5, ups=0.09, wpb=64842, bsz=128, num_updates=8267, lr=9.99419e-05, gnorm=3.552, loss_scale=1, train_wall=11, gb_free=2.8, wall=94535 2021-06-19 20:54:32 | INFO | train_inner | epoch 003: 2317 / 3002 loss=2.535, ppl=5.8, wps=5839.4, ups=0.09, wpb=64865, bsz=128, num_updates=8268, lr=9.99419e-05, gnorm=2.169, loss_scale=1, train_wall=11, gb_free=2.8, wall=94546 2021-06-19 20:54:43 | INFO | train_inner | epoch 003: 2318 / 3002 loss=2.643, ppl=6.25, wps=5721.1, ups=0.09, wpb=64739, bsz=128, num_updates=8269, lr=9.99418e-05, gnorm=2.084, loss_scale=1, train_wall=11, gb_free=2.8, wall=94558 2021-06-19 20:54:54 | INFO | train_inner | epoch 003: 2319 / 3002 loss=2.829, ppl=7.11, wps=5787.7, ups=0.09, wpb=64889, bsz=128, num_updates=8270, lr=9.99418e-05, gnorm=2.218, loss_scale=1, train_wall=11, gb_free=2.8, wall=94569 2021-06-19 20:55:06 | INFO | train_inner | epoch 003: 2320 / 3002 loss=2.693, ppl=6.47, wps=5839.5, ups=0.09, wpb=64813, bsz=128, num_updates=8271, lr=9.99418e-05, gnorm=2.191, loss_scale=1, train_wall=11, gb_free=2.8, wall=94580 2021-06-19 20:55:16 | INFO | train_inner | epoch 003: 2321 / 3002 loss=2.603, ppl=6.07, wps=5937.5, ups=0.09, wpb=64811, bsz=128, num_updates=8272, lr=9.99418e-05, gnorm=2.158, loss_scale=1, train_wall=10, gb_free=2.8, wall=94591 2021-06-19 20:55:27 | INFO | train_inner | epoch 003: 2322 / 3002 loss=2.666, ppl=6.35, wps=5981.6, ups=0.09, wpb=64842, bsz=128, num_updates=8273, lr=9.99418e-05, gnorm=2.156, loss_scale=1, train_wall=10, gb_free=2.8, wall=94602 2021-06-19 20:55:38 | INFO | train_inner | epoch 003: 2323 / 3002 loss=2.352, ppl=5.1, wps=5834.6, ups=0.09, wpb=64889, bsz=128, num_updates=8274, lr=9.99418e-05, gnorm=2.08, loss_scale=1, train_wall=11, gb_free=2.8, wall=94613 2021-06-19 20:55:50 | INFO | train_inner | epoch 003: 2324 / 3002 loss=2.657, ppl=6.31, wps=5788.7, ups=0.09, wpb=64688, bsz=128, num_updates=8275, lr=9.99418e-05, gnorm=2.177, loss_scale=1, train_wall=11, gb_free=2.8, wall=94624 2021-06-19 20:56:00 | INFO | train_inner | epoch 003: 2325 / 3002 loss=2.572, ppl=5.95, wps=5941.8, ups=0.09, wpb=64802, bsz=128, num_updates=8276, lr=9.99418e-05, gnorm=2.331, loss_scale=1, train_wall=10, gb_free=2.8, wall=94635 2021-06-19 20:56:12 | INFO | train_inner | epoch 003: 2326 / 3002 loss=2.683, ppl=6.42, wps=5812.3, ups=0.09, wpb=64820, bsz=128, num_updates=8277, lr=9.99418e-05, gnorm=2.173, loss_scale=1, train_wall=11, gb_free=2.8, wall=94646 2021-06-19 20:56:23 | INFO | train_inner | epoch 003: 2327 / 3002 loss=2.449, ppl=5.46, wps=5856.8, ups=0.09, wpb=64897, bsz=128, num_updates=8278, lr=9.99418e-05, gnorm=2.019, loss_scale=1, train_wall=11, gb_free=2.8, wall=94657 2021-06-19 20:56:34 | INFO | train_inner | epoch 003: 2328 / 3002 loss=2.576, ppl=5.96, wps=5819.5, ups=0.09, wpb=64829, bsz=128, num_updates=8279, lr=9.99418e-05, gnorm=2.176, loss_scale=1, train_wall=11, gb_free=2.8, wall=94668 2021-06-19 20:56:45 | INFO | train_inner | epoch 003: 2329 / 3002 loss=2.757, ppl=6.76, wps=5845.7, ups=0.09, wpb=64765, bsz=128, num_updates=8280, lr=9.99418e-05, gnorm=2.048, loss_scale=1, train_wall=11, gb_free=2.8, wall=94679 2021-06-19 20:56:56 | INFO | train_inner | epoch 003: 2330 / 3002 loss=2.62, ppl=6.15, wps=5888, ups=0.09, wpb=64886, bsz=128, num_updates=8281, lr=9.99417e-05, gnorm=2.147, loss_scale=1, train_wall=11, gb_free=2.8, wall=94690 2021-06-19 20:57:07 | INFO | train_inner | epoch 003: 2331 / 3002 loss=2.667, ppl=6.35, wps=5985.6, ups=0.09, wpb=64758, bsz=128, num_updates=8282, lr=9.99417e-05, gnorm=2.236, loss_scale=1, train_wall=10, gb_free=2.8, wall=94701 2021-06-19 20:57:18 | INFO | train_inner | epoch 003: 2332 / 3002 loss=2.604, ppl=6.08, wps=5892.7, ups=0.09, wpb=64840, bsz=128, num_updates=8283, lr=9.99417e-05, gnorm=2.077, loss_scale=1, train_wall=11, gb_free=2.8, wall=94712 2021-06-19 20:57:29 | INFO | train_inner | epoch 003: 2333 / 3002 loss=2.682, ppl=6.42, wps=5804.6, ups=0.09, wpb=64826, bsz=128, num_updates=8284, lr=9.99417e-05, gnorm=2.14, loss_scale=1, train_wall=11, gb_free=2.8, wall=94723 2021-06-19 20:57:40 | INFO | train_inner | epoch 003: 2334 / 3002 loss=2.569, ppl=5.93, wps=5791.7, ups=0.09, wpb=64776, bsz=128, num_updates=8285, lr=9.99417e-05, gnorm=2.294, loss_scale=1, train_wall=11, gb_free=2.8, wall=94734 2021-06-19 20:57:51 | INFO | train_inner | epoch 003: 2335 / 3002 loss=2.62, ppl=6.15, wps=5878.1, ups=0.09, wpb=64860, bsz=128, num_updates=8286, lr=9.99417e-05, gnorm=2.132, loss_scale=1, train_wall=11, gb_free=2.8, wall=94745 2021-06-19 20:58:02 | INFO | train_inner | epoch 003: 2336 / 3002 loss=2.713, ppl=6.56, wps=5875.4, ups=0.09, wpb=64830, bsz=128, num_updates=8287, lr=9.99417e-05, gnorm=2.424, loss_scale=1, train_wall=11, gb_free=2.8, wall=94757 2021-06-19 20:58:13 | INFO | train_inner | epoch 003: 2337 / 3002 loss=2.707, ppl=6.53, wps=5783.8, ups=0.09, wpb=64850, bsz=128, num_updates=8288, lr=9.99417e-05, gnorm=2.101, loss_scale=1, train_wall=11, gb_free=2.8, wall=94768 2021-06-19 20:58:25 | INFO | train_inner | epoch 003: 2338 / 3002 loss=2.723, ppl=6.6, wps=5815.3, ups=0.09, wpb=64856, bsz=128, num_updates=8289, lr=9.99417e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=94779 2021-06-19 20:58:36 | INFO | train_inner | epoch 003: 2339 / 3002 loss=2.604, ppl=6.08, wps=5857.5, ups=0.09, wpb=64856, bsz=128, num_updates=8290, lr=9.99417e-05, gnorm=2.201, loss_scale=1, train_wall=11, gb_free=2.8, wall=94790 2021-06-19 20:58:47 | INFO | train_inner | epoch 003: 2340 / 3002 loss=2.45, ppl=5.47, wps=5793, ups=0.09, wpb=64813, bsz=128, num_updates=8291, lr=9.99417e-05, gnorm=2.621, loss_scale=1, train_wall=11, gb_free=2.8, wall=94801 2021-06-19 20:58:58 | INFO | train_inner | epoch 003: 2341 / 3002 loss=2.644, ppl=6.25, wps=5839.4, ups=0.09, wpb=64902, bsz=128, num_updates=8292, lr=9.99417e-05, gnorm=2.185, loss_scale=1, train_wall=11, gb_free=2.8, wall=94812 2021-06-19 20:59:09 | INFO | train_inner | epoch 003: 2342 / 3002 loss=2.692, ppl=6.46, wps=5865.4, ups=0.09, wpb=64762, bsz=128, num_updates=8293, lr=9.99417e-05, gnorm=2.886, loss_scale=1, train_wall=11, gb_free=2.8, wall=94823 2021-06-19 20:59:20 | INFO | train_inner | epoch 003: 2343 / 3002 loss=2.455, ppl=5.48, wps=5907.1, ups=0.09, wpb=64886, bsz=128, num_updates=8294, lr=9.99416e-05, gnorm=2.183, loss_scale=1, train_wall=11, gb_free=2.8, wall=94834 2021-06-19 20:59:31 | INFO | train_inner | epoch 003: 2344 / 3002 loss=2.53, ppl=5.78, wps=5855.3, ups=0.09, wpb=64774, bsz=128, num_updates=8295, lr=9.99416e-05, gnorm=2.112, loss_scale=1, train_wall=11, gb_free=2.8, wall=94845 2021-06-19 20:59:42 | INFO | train_inner | epoch 003: 2345 / 3002 loss=2.666, ppl=6.35, wps=5888.7, ups=0.09, wpb=64846, bsz=128, num_updates=8296, lr=9.99416e-05, gnorm=2.477, loss_scale=1, train_wall=11, gb_free=2.8, wall=94856 2021-06-19 20:59:53 | INFO | train_inner | epoch 003: 2346 / 3002 loss=2.502, ppl=5.66, wps=5807.1, ups=0.09, wpb=64771, bsz=128, num_updates=8297, lr=9.99416e-05, gnorm=2.219, loss_scale=1, train_wall=11, gb_free=2.8, wall=94868 2021-06-19 21:00:04 | INFO | train_inner | epoch 003: 2347 / 3002 loss=2.777, ppl=6.86, wps=5839.1, ups=0.09, wpb=64830, bsz=128, num_updates=8298, lr=9.99416e-05, gnorm=2.374, loss_scale=1, train_wall=11, gb_free=2.8, wall=94879 2021-06-19 21:00:15 | INFO | train_inner | epoch 003: 2348 / 3002 loss=2.58, ppl=5.98, wps=5803.9, ups=0.09, wpb=64800, bsz=128, num_updates=8299, lr=9.99416e-05, gnorm=2.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=94890 2021-06-19 21:00:27 | INFO | train_inner | epoch 003: 2349 / 3002 loss=2.55, ppl=5.86, wps=5855.6, ups=0.09, wpb=64861, bsz=128, num_updates=8300, lr=9.99416e-05, gnorm=2.146, loss_scale=1, train_wall=11, gb_free=2.8, wall=94901 2021-06-19 21:00:38 | INFO | train_inner | epoch 003: 2350 / 3002 loss=2.548, ppl=5.85, wps=5801.9, ups=0.09, wpb=64765, bsz=128, num_updates=8301, lr=9.99416e-05, gnorm=4.819, loss_scale=1, train_wall=11, gb_free=2.8, wall=94912 2021-06-19 21:00:49 | INFO | train_inner | epoch 003: 2351 / 3002 loss=2.709, ppl=6.54, wps=5925.6, ups=0.09, wpb=64821, bsz=128, num_updates=8302, lr=9.99416e-05, gnorm=2.142, loss_scale=1, train_wall=10, gb_free=2.8, wall=94923 2021-06-19 21:01:00 | INFO | train_inner | epoch 003: 2352 / 3002 loss=2.512, ppl=5.71, wps=5939, ups=0.09, wpb=64880, bsz=128, num_updates=8303, lr=9.99416e-05, gnorm=2.167, loss_scale=1, train_wall=10, gb_free=2.8, wall=94934 2021-06-19 21:01:11 | INFO | train_inner | epoch 003: 2353 / 3002 loss=2.691, ppl=6.46, wps=5756, ups=0.09, wpb=64824, bsz=128, num_updates=8304, lr=9.99416e-05, gnorm=2.108, loss_scale=1, train_wall=11, gb_free=2.8, wall=94945 2021-06-19 21:01:22 | INFO | train_inner | epoch 003: 2354 / 3002 loss=2.725, ppl=6.61, wps=5962.8, ups=0.09, wpb=64775, bsz=128, num_updates=8305, lr=9.99416e-05, gnorm=3.869, loss_scale=1, train_wall=10, gb_free=2.8, wall=94956 2021-06-19 21:01:33 | INFO | train_inner | epoch 003: 2355 / 3002 loss=2.709, ppl=6.54, wps=5749.3, ups=0.09, wpb=64809, bsz=128, num_updates=8306, lr=9.99415e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=94967 2021-06-19 21:01:44 | INFO | train_inner | epoch 003: 2356 / 3002 loss=2.589, ppl=6.02, wps=5927, ups=0.09, wpb=64772, bsz=128, num_updates=8307, lr=9.99415e-05, gnorm=2.114, loss_scale=1, train_wall=10, gb_free=2.8, wall=94978 2021-06-19 21:01:55 | INFO | train_inner | epoch 003: 2357 / 3002 loss=2.482, ppl=5.58, wps=5770.4, ups=0.09, wpb=64883, bsz=128, num_updates=8308, lr=9.99415e-05, gnorm=2.095, loss_scale=1, train_wall=11, gb_free=2.8, wall=94989 2021-06-19 21:02:06 | INFO | train_inner | epoch 003: 2358 / 3002 loss=2.843, ppl=7.18, wps=5848.7, ups=0.09, wpb=64829, bsz=128, num_updates=8309, lr=9.99415e-05, gnorm=2.139, loss_scale=1, train_wall=11, gb_free=2.8, wall=95001 2021-06-19 21:02:17 | INFO | train_inner | epoch 003: 2359 / 3002 loss=2.646, ppl=6.26, wps=5790.3, ups=0.09, wpb=64803, bsz=128, num_updates=8310, lr=9.99415e-05, gnorm=2.135, loss_scale=1, train_wall=11, gb_free=2.8, wall=95012 2021-06-19 21:02:28 | INFO | train_inner | epoch 003: 2360 / 3002 loss=2.731, ppl=6.64, wps=5840.8, ups=0.09, wpb=64746, bsz=128, num_updates=8311, lr=9.99415e-05, gnorm=2.049, loss_scale=1, train_wall=11, gb_free=2.8, wall=95023 2021-06-19 21:02:40 | INFO | train_inner | epoch 003: 2361 / 3002 loss=2.51, ppl=5.7, wps=5811.8, ups=0.09, wpb=64898, bsz=128, num_updates=8312, lr=9.99415e-05, gnorm=2.131, loss_scale=1, train_wall=11, gb_free=2.8, wall=95034 2021-06-19 21:02:51 | INFO | train_inner | epoch 003: 2362 / 3002 loss=2.594, ppl=6.04, wps=5859.7, ups=0.09, wpb=64860, bsz=128, num_updates=8313, lr=9.99415e-05, gnorm=2.064, loss_scale=1, train_wall=11, gb_free=2.8, wall=95045 2021-06-19 21:03:02 | INFO | train_inner | epoch 003: 2363 / 3002 loss=2.647, ppl=6.26, wps=5910.8, ups=0.09, wpb=64864, bsz=128, num_updates=8314, lr=9.99415e-05, gnorm=2.327, loss_scale=1, train_wall=11, gb_free=2.8, wall=95056 2021-06-19 21:03:13 | INFO | train_inner | epoch 003: 2364 / 3002 loss=2.551, ppl=5.86, wps=5840, ups=0.09, wpb=64883, bsz=128, num_updates=8315, lr=9.99415e-05, gnorm=2.37, loss_scale=1, train_wall=11, gb_free=2.8, wall=95067 2021-06-19 21:03:24 | INFO | train_inner | epoch 003: 2365 / 3002 loss=2.623, ppl=6.16, wps=5873.4, ups=0.09, wpb=64914, bsz=128, num_updates=8316, lr=9.99415e-05, gnorm=2.078, loss_scale=1, train_wall=11, gb_free=2.8, wall=95078 2021-06-19 21:03:35 | INFO | train_inner | epoch 003: 2366 / 3002 loss=2.768, ppl=6.81, wps=5828.1, ups=0.09, wpb=64856, bsz=128, num_updates=8317, lr=9.99415e-05, gnorm=2.221, loss_scale=1, train_wall=11, gb_free=2.8, wall=95089 2021-06-19 21:03:46 | INFO | train_inner | epoch 003: 2367 / 3002 loss=2.604, ppl=6.08, wps=5853.8, ups=0.09, wpb=64831, bsz=128, num_updates=8318, lr=9.99415e-05, gnorm=2.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=95100 2021-06-19 21:03:57 | INFO | train_inner | epoch 003: 2368 / 3002 loss=2.64, ppl=6.23, wps=5765, ups=0.09, wpb=64821, bsz=128, num_updates=8319, lr=9.99414e-05, gnorm=2.184, loss_scale=1, train_wall=11, gb_free=2.8, wall=95112 2021-06-19 21:04:09 | INFO | train_inner | epoch 003: 2369 / 3002 loss=2.666, ppl=6.35, wps=5741.2, ups=0.09, wpb=64863, bsz=128, num_updates=8320, lr=9.99414e-05, gnorm=2.211, loss_scale=1, train_wall=11, gb_free=2.8, wall=95123 2021-06-19 21:04:20 | INFO | train_inner | epoch 003: 2370 / 3002 loss=2.701, ppl=6.5, wps=5890.1, ups=0.09, wpb=64848, bsz=128, num_updates=8321, lr=9.99414e-05, gnorm=2.145, loss_scale=1, train_wall=11, gb_free=2.8, wall=95134 2021-06-19 21:04:31 | INFO | train_inner | epoch 003: 2371 / 3002 loss=2.628, ppl=6.18, wps=5830.7, ups=0.09, wpb=64830, bsz=128, num_updates=8322, lr=9.99414e-05, gnorm=2.115, loss_scale=1, train_wall=11, gb_free=2.8, wall=95145 2021-06-19 21:04:42 | INFO | train_inner | epoch 003: 2372 / 3002 loss=2.586, ppl=6, wps=5819.9, ups=0.09, wpb=64798, bsz=128, num_updates=8323, lr=9.99414e-05, gnorm=2.053, loss_scale=1, train_wall=11, gb_free=2.8, wall=95156 2021-06-19 21:04:53 | INFO | train_inner | epoch 003: 2373 / 3002 loss=2.644, ppl=6.25, wps=5877.1, ups=0.09, wpb=64789, bsz=128, num_updates=8324, lr=9.99414e-05, gnorm=2.068, loss_scale=1, train_wall=11, gb_free=2.8, wall=95167 2021-06-19 21:05:04 | INFO | train_inner | epoch 003: 2374 / 3002 loss=2.581, ppl=5.98, wps=5821, ups=0.09, wpb=64773, bsz=128, num_updates=8325, lr=9.99414e-05, gnorm=2.191, loss_scale=1, train_wall=11, gb_free=2.8, wall=95178 2021-06-19 21:05:15 | INFO | train_inner | epoch 003: 2375 / 3002 loss=2.611, ppl=6.11, wps=5702.5, ups=0.09, wpb=64848, bsz=128, num_updates=8326, lr=9.99414e-05, gnorm=2.097, loss_scale=1, train_wall=11, gb_free=2.8, wall=95190 2021-06-19 21:05:26 | INFO | train_inner | epoch 003: 2376 / 3002 loss=2.684, ppl=6.43, wps=5919, ups=0.09, wpb=64876, bsz=128, num_updates=8327, lr=9.99414e-05, gnorm=2.068, loss_scale=1, train_wall=11, gb_free=2.8, wall=95201 2021-06-19 21:05:37 | INFO | train_inner | epoch 003: 2377 / 3002 loss=2.661, ppl=6.33, wps=5832.4, ups=0.09, wpb=64854, bsz=128, num_updates=8328, lr=9.99414e-05, gnorm=2.09, loss_scale=1, train_wall=11, gb_free=2.8, wall=95212 2021-06-19 21:05:49 | INFO | train_inner | epoch 003: 2378 / 3002 loss=2.524, ppl=5.75, wps=5819.7, ups=0.09, wpb=64750, bsz=128, num_updates=8329, lr=9.99414e-05, gnorm=2.168, loss_scale=1, train_wall=11, gb_free=2.8, wall=95223 2021-06-19 21:06:00 | INFO | train_inner | epoch 003: 2379 / 3002 loss=2.632, ppl=6.2, wps=5819.4, ups=0.09, wpb=64825, bsz=128, num_updates=8330, lr=9.99414e-05, gnorm=2.05, loss_scale=1, train_wall=11, gb_free=2.8, wall=95234 2021-06-19 21:06:11 | INFO | train_inner | epoch 003: 2380 / 3002 loss=2.625, ppl=6.17, wps=5850.9, ups=0.09, wpb=64858, bsz=128, num_updates=8331, lr=9.99413e-05, gnorm=2.147, loss_scale=1, train_wall=11, gb_free=2.8, wall=95245 2021-06-19 21:06:22 | INFO | train_inner | epoch 003: 2381 / 3002 loss=2.618, ppl=6.14, wps=5956.6, ups=0.09, wpb=64846, bsz=128, num_updates=8332, lr=9.99413e-05, gnorm=2.133, loss_scale=1, train_wall=10, gb_free=2.8, wall=95256 2021-06-19 21:06:33 | INFO | train_inner | epoch 003: 2382 / 3002 loss=2.609, ppl=6.1, wps=5776.6, ups=0.09, wpb=64763, bsz=128, num_updates=8333, lr=9.99413e-05, gnorm=2.181, loss_scale=1, train_wall=11, gb_free=2.8, wall=95267 2021-06-19 21:06:44 | INFO | train_inner | epoch 003: 2383 / 3002 loss=2.768, ppl=6.81, wps=5876.4, ups=0.09, wpb=64805, bsz=128, num_updates=8334, lr=9.99413e-05, gnorm=2.17, loss_scale=1, train_wall=11, gb_free=2.8, wall=95278 2021-06-19 21:06:55 | INFO | train_inner | epoch 003: 2384 / 3002 loss=2.548, ppl=5.85, wps=5903.8, ups=0.09, wpb=64886, bsz=128, num_updates=8335, lr=9.99413e-05, gnorm=2.067, loss_scale=1, train_wall=11, gb_free=2.8, wall=95289 2021-06-19 21:07:06 | INFO | train_inner | epoch 003: 2385 / 3002 loss=2.581, ppl=5.98, wps=5916.1, ups=0.09, wpb=64867, bsz=128, num_updates=8336, lr=9.99413e-05, gnorm=2.073, loss_scale=1, train_wall=11, gb_free=2.8, wall=95300 2021-06-19 21:07:17 | INFO | train_inner | epoch 003: 2386 / 3002 loss=2.585, ppl=6, wps=5891.6, ups=0.09, wpb=64854, bsz=128, num_updates=8337, lr=9.99413e-05, gnorm=2.246, loss_scale=1, train_wall=11, gb_free=2.8, wall=95311 2021-06-19 21:07:28 | INFO | train_inner | epoch 003: 2387 / 3002 loss=2.678, ppl=6.4, wps=5808, ups=0.09, wpb=64826, bsz=128, num_updates=8338, lr=9.99413e-05, gnorm=2.838, loss_scale=1, train_wall=11, gb_free=2.8, wall=95322 2021-06-19 21:07:39 | INFO | train_inner | epoch 003: 2388 / 3002 loss=2.459, ppl=5.5, wps=5842.6, ups=0.09, wpb=64861, bsz=128, num_updates=8339, lr=9.99413e-05, gnorm=2.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=95334 2021-06-19 21:07:50 | INFO | train_inner | epoch 003: 2389 / 3002 loss=2.662, ppl=6.33, wps=5823.3, ups=0.09, wpb=64865, bsz=128, num_updates=8340, lr=9.99413e-05, gnorm=2.11, loss_scale=1, train_wall=11, gb_free=2.8, wall=95345 2021-06-19 21:08:02 | INFO | train_inner | epoch 003: 2390 / 3002 loss=2.568, ppl=5.93, wps=5778.5, ups=0.09, wpb=64792, bsz=128, num_updates=8341, lr=9.99413e-05, gnorm=2.038, loss_scale=1, train_wall=11, gb_free=2.8, wall=95356 2021-06-19 21:08:12 | INFO | train_inner | epoch 003: 2391 / 3002 loss=2.6, ppl=6.06, wps=5971.6, ups=0.09, wpb=64818, bsz=128, num_updates=8342, lr=9.99413e-05, gnorm=2.177, loss_scale=1, train_wall=10, gb_free=2.8, wall=95367 2021-06-19 21:08:23 | INFO | train_inner | epoch 003: 2392 / 3002 loss=2.842, ppl=7.17, wps=5864, ups=0.09, wpb=64793, bsz=128, num_updates=8343, lr=9.99413e-05, gnorm=2.062, loss_scale=1, train_wall=11, gb_free=2.8, wall=95378 2021-06-19 21:08:35 | INFO | train_inner | epoch 003: 2393 / 3002 loss=2.539, ppl=5.81, wps=5840.2, ups=0.09, wpb=64835, bsz=128, num_updates=8344, lr=9.99412e-05, gnorm=2.061, loss_scale=1, train_wall=11, gb_free=2.8, wall=95389 2021-06-19 21:08:46 | INFO | train_inner | epoch 003: 2394 / 3002 loss=2.818, ppl=7.05, wps=5787.4, ups=0.09, wpb=64769, bsz=128, num_updates=8345, lr=9.99412e-05, gnorm=2.056, loss_scale=1, train_wall=11, gb_free=2.8, wall=95400 2021-06-19 21:08:57 | INFO | train_inner | epoch 003: 2395 / 3002 loss=2.678, ppl=6.4, wps=5799, ups=0.09, wpb=64812, bsz=128, num_updates=8346, lr=9.99412e-05, gnorm=2.459, loss_scale=1, train_wall=11, gb_free=2.8, wall=95411 2021-06-19 21:09:08 | INFO | train_inner | epoch 003: 2396 / 3002 loss=2.499, ppl=5.65, wps=5785.6, ups=0.09, wpb=64839, bsz=128, num_updates=8347, lr=9.99412e-05, gnorm=2.01, loss_scale=1, train_wall=11, gb_free=2.8, wall=95422 2021-06-19 21:09:19 | INFO | train_inner | epoch 003: 2397 / 3002 loss=2.578, ppl=5.97, wps=5868.1, ups=0.09, wpb=64835, bsz=128, num_updates=8348, lr=9.99412e-05, gnorm=2.882, loss_scale=1, train_wall=11, gb_free=2.8, wall=95433 2021-06-19 21:09:30 | INFO | train_inner | epoch 003: 2398 / 3002 loss=2.486, ppl=5.6, wps=5794, ups=0.09, wpb=64793, bsz=128, num_updates=8349, lr=9.99412e-05, gnorm=2.017, loss_scale=1, train_wall=11, gb_free=2.8, wall=95445 2021-06-19 21:09:41 | INFO | train_inner | epoch 003: 2399 / 3002 loss=2.75, ppl=6.73, wps=5830.8, ups=0.09, wpb=64836, bsz=128, num_updates=8350, lr=9.99412e-05, gnorm=2.096, loss_scale=2, train_wall=11, gb_free=2.8, wall=95456 2021-06-19 21:09:53 | INFO | train_inner | epoch 003: 2400 / 3002 loss=2.491, ppl=5.62, wps=5866, ups=0.09, wpb=64840, bsz=128, num_updates=8351, lr=9.99412e-05, gnorm=2.244, loss_scale=2, train_wall=11, gb_free=2.8, wall=95467 2021-06-19 21:10:03 | INFO | train_inner | epoch 003: 2401 / 3002 loss=2.719, ppl=6.58, wps=5943.7, ups=0.09, wpb=64798, bsz=128, num_updates=8352, lr=9.99412e-05, gnorm=3.002, loss_scale=2, train_wall=10, gb_free=2.8, wall=95478 2021-06-19 21:10:14 | INFO | train_inner | epoch 003: 2402 / 3002 loss=2.667, ppl=6.35, wps=5963.1, ups=0.09, wpb=64915, bsz=128, num_updates=8353, lr=9.99412e-05, gnorm=2.149, loss_scale=2, train_wall=10, gb_free=2.8, wall=95489 2021-06-19 21:10:26 | INFO | train_inner | epoch 003: 2403 / 3002 loss=2.499, ppl=5.65, wps=5779.8, ups=0.09, wpb=64890, bsz=128, num_updates=8354, lr=9.99412e-05, gnorm=2.104, loss_scale=2, train_wall=11, gb_free=2.8, wall=95500 2021-06-19 21:10:36 | INFO | train_inner | epoch 003: 2404 / 3002 loss=2.598, ppl=6.06, wps=5938.8, ups=0.09, wpb=64813, bsz=128, num_updates=8355, lr=9.99412e-05, gnorm=2.158, loss_scale=2, train_wall=10, gb_free=2.8, wall=95511 2021-06-19 21:10:47 | INFO | train_inner | epoch 003: 2405 / 3002 loss=2.591, ppl=6.02, wps=5929.7, ups=0.09, wpb=64876, bsz=128, num_updates=8356, lr=9.99411e-05, gnorm=2.036, loss_scale=2, train_wall=10, gb_free=2.8, wall=95522 2021-06-19 21:10:59 | INFO | train_inner | epoch 003: 2406 / 3002 loss=2.597, ppl=6.05, wps=5783.8, ups=0.09, wpb=64779, bsz=128, num_updates=8357, lr=9.99411e-05, gnorm=2.023, loss_scale=2, train_wall=11, gb_free=2.8, wall=95533 2021-06-19 21:11:10 | INFO | train_inner | epoch 003: 2407 / 3002 loss=2.702, ppl=6.51, wps=5768.2, ups=0.09, wpb=64773, bsz=128, num_updates=8358, lr=9.99411e-05, gnorm=2.064, loss_scale=2, train_wall=11, gb_free=2.8, wall=95544 2021-06-19 21:11:21 | INFO | train_inner | epoch 003: 2408 / 3002 loss=2.719, ppl=6.59, wps=5665.7, ups=0.09, wpb=64847, bsz=128, num_updates=8359, lr=9.99411e-05, gnorm=2.236, loss_scale=2, train_wall=11, gb_free=2.8, wall=95556 2021-06-19 21:11:33 | INFO | train_inner | epoch 003: 2409 / 3002 loss=2.726, ppl=6.62, wps=5752.7, ups=0.09, wpb=64761, bsz=128, num_updates=8360, lr=9.99411e-05, gnorm=2.18, loss_scale=2, train_wall=11, gb_free=2.8, wall=95567 2021-06-19 21:11:44 | INFO | train_inner | epoch 003: 2410 / 3002 loss=2.741, ppl=6.68, wps=5887.7, ups=0.09, wpb=64850, bsz=128, num_updates=8361, lr=9.99411e-05, gnorm=2.036, loss_scale=2, train_wall=11, gb_free=2.8, wall=95578 2021-06-19 21:11:54 | INFO | train_inner | epoch 003: 2411 / 3002 loss=2.553, ppl=5.87, wps=5960.2, ups=0.09, wpb=64915, bsz=128, num_updates=8362, lr=9.99411e-05, gnorm=3.365, loss_scale=2, train_wall=10, gb_free=2.8, wall=95589 2021-06-19 21:12:06 | INFO | train_inner | epoch 003: 2412 / 3002 loss=2.615, ppl=6.13, wps=5847.7, ups=0.09, wpb=64835, bsz=128, num_updates=8363, lr=9.99411e-05, gnorm=2.08, loss_scale=2, train_wall=11, gb_free=2.8, wall=95600 2021-06-19 21:12:17 | INFO | train_inner | epoch 003: 2413 / 3002 loss=2.608, ppl=6.1, wps=5802.2, ups=0.09, wpb=64781, bsz=128, num_updates=8364, lr=9.99411e-05, gnorm=2.019, loss_scale=2, train_wall=11, gb_free=2.8, wall=95611 2021-06-19 21:12:28 | INFO | train_inner | epoch 003: 2414 / 3002 loss=2.626, ppl=6.17, wps=5808.1, ups=0.09, wpb=64766, bsz=128, num_updates=8365, lr=9.99411e-05, gnorm=2.118, loss_scale=2, train_wall=11, gb_free=2.8, wall=95622 2021-06-19 21:12:39 | INFO | train_inner | epoch 003: 2415 / 3002 loss=2.622, ppl=6.16, wps=5839.3, ups=0.09, wpb=64845, bsz=128, num_updates=8366, lr=9.99411e-05, gnorm=2.242, loss_scale=2, train_wall=11, gb_free=2.8, wall=95633 2021-06-19 21:12:50 | INFO | train_inner | epoch 003: 2416 / 3002 loss=2.558, ppl=5.89, wps=5995.9, ups=0.09, wpb=64783, bsz=128, num_updates=8367, lr=9.99411e-05, gnorm=2.084, loss_scale=2, train_wall=10, gb_free=2.8, wall=95644 2021-06-19 21:13:01 | INFO | train_inner | epoch 003: 2417 / 3002 loss=2.695, ppl=6.48, wps=5985.8, ups=0.09, wpb=64862, bsz=128, num_updates=8368, lr=9.99411e-05, gnorm=2.183, loss_scale=2, train_wall=10, gb_free=2.8, wall=95655 2021-06-19 21:13:12 | INFO | train_inner | epoch 003: 2418 / 3002 loss=2.787, ppl=6.9, wps=5806.1, ups=0.09, wpb=64797, bsz=128, num_updates=8369, lr=9.9941e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=95666 2021-06-19 21:13:23 | INFO | train_inner | epoch 003: 2419 / 3002 loss=2.524, ppl=5.75, wps=5984.4, ups=0.09, wpb=64879, bsz=128, num_updates=8370, lr=9.9941e-05, gnorm=2.147, loss_scale=2, train_wall=10, gb_free=2.8, wall=95677 2021-06-19 21:13:33 | INFO | train_inner | epoch 003: 2420 / 3002 loss=2.633, ppl=6.2, wps=5974.4, ups=0.09, wpb=64863, bsz=128, num_updates=8371, lr=9.9941e-05, gnorm=4.48, loss_scale=2, train_wall=10, gb_free=2.8, wall=95688 2021-06-19 21:13:45 | INFO | train_inner | epoch 003: 2421 / 3002 loss=2.572, ppl=5.95, wps=5754.7, ups=0.09, wpb=64833, bsz=128, num_updates=8372, lr=9.9941e-05, gnorm=2.092, loss_scale=2, train_wall=11, gb_free=2.8, wall=95699 2021-06-19 21:13:56 | INFO | train_inner | epoch 003: 2422 / 3002 loss=2.729, ppl=6.63, wps=5846.7, ups=0.09, wpb=64774, bsz=128, num_updates=8373, lr=9.9941e-05, gnorm=2.121, loss_scale=2, train_wall=11, gb_free=2.8, wall=95710 2021-06-19 21:14:07 | INFO | train_inner | epoch 003: 2423 / 3002 loss=2.773, ppl=6.84, wps=5853.5, ups=0.09, wpb=64798, bsz=128, num_updates=8374, lr=9.9941e-05, gnorm=2.214, loss_scale=2, train_wall=11, gb_free=2.8, wall=95721 2021-06-19 21:14:18 | INFO | train_inner | epoch 003: 2424 / 3002 loss=2.491, ppl=5.62, wps=5751.3, ups=0.09, wpb=64880, bsz=128, num_updates=8375, lr=9.9941e-05, gnorm=2.761, loss_scale=2, train_wall=11, gb_free=2.8, wall=95732 2021-06-19 21:14:29 | INFO | train_inner | epoch 003: 2425 / 3002 loss=2.548, ppl=5.85, wps=5763.2, ups=0.09, wpb=64777, bsz=128, num_updates=8376, lr=9.9941e-05, gnorm=3.467, loss_scale=2, train_wall=11, gb_free=2.8, wall=95744 2021-06-19 21:14:41 | INFO | train_inner | epoch 003: 2426 / 3002 loss=2.663, ppl=6.33, wps=5767.6, ups=0.09, wpb=64813, bsz=128, num_updates=8377, lr=9.9941e-05, gnorm=8.031, loss_scale=2, train_wall=11, gb_free=2.8, wall=95755 2021-06-19 21:14:52 | INFO | train_inner | epoch 003: 2427 / 3002 loss=2.608, ppl=6.1, wps=5885.8, ups=0.09, wpb=64899, bsz=128, num_updates=8378, lr=9.9941e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=95766 2021-06-19 21:15:03 | INFO | train_inner | epoch 003: 2428 / 3002 loss=2.571, ppl=5.94, wps=5871.9, ups=0.09, wpb=64783, bsz=128, num_updates=8379, lr=9.9941e-05, gnorm=2.045, loss_scale=2, train_wall=11, gb_free=2.8, wall=95777 2021-06-19 21:15:14 | INFO | train_inner | epoch 003: 2429 / 3002 loss=2.641, ppl=6.24, wps=5872.5, ups=0.09, wpb=64894, bsz=128, num_updates=8380, lr=9.9941e-05, gnorm=5.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=95788 2021-06-19 21:15:25 | INFO | train_inner | epoch 003: 2430 / 3002 loss=2.459, ppl=5.5, wps=5799.8, ups=0.09, wpb=64827, bsz=128, num_updates=8381, lr=9.99409e-05, gnorm=2.124, loss_scale=2, train_wall=11, gb_free=2.8, wall=95799 2021-06-19 21:15:36 | INFO | train_inner | epoch 003: 2431 / 3002 loss=2.572, ppl=5.95, wps=5779.1, ups=0.09, wpb=64870, bsz=128, num_updates=8382, lr=9.99409e-05, gnorm=2.169, loss_scale=2, train_wall=11, gb_free=2.8, wall=95810 2021-06-19 21:15:47 | INFO | train_inner | epoch 003: 2432 / 3002 loss=2.658, ppl=6.31, wps=5835.4, ups=0.09, wpb=64774, bsz=128, num_updates=8383, lr=9.99409e-05, gnorm=2.396, loss_scale=2, train_wall=11, gb_free=2.8, wall=95822 2021-06-19 21:15:58 | INFO | train_inner | epoch 003: 2433 / 3002 loss=2.7, ppl=6.5, wps=5766.8, ups=0.09, wpb=64767, bsz=128, num_updates=8384, lr=9.99409e-05, gnorm=2.163, loss_scale=2, train_wall=11, gb_free=2.8, wall=95833 2021-06-19 21:16:09 | INFO | train_inner | epoch 003: 2434 / 3002 loss=2.556, ppl=5.88, wps=5948.5, ups=0.09, wpb=64855, bsz=128, num_updates=8385, lr=9.99409e-05, gnorm=2.089, loss_scale=2, train_wall=10, gb_free=2.8, wall=95844 2021-06-19 21:16:20 | INFO | train_inner | epoch 003: 2435 / 3002 loss=2.587, ppl=6.01, wps=5891.3, ups=0.09, wpb=64811, bsz=128, num_updates=8386, lr=9.99409e-05, gnorm=2.082, loss_scale=2, train_wall=11, gb_free=2.8, wall=95855 2021-06-19 21:16:32 | INFO | train_inner | epoch 003: 2436 / 3002 loss=2.618, ppl=6.14, wps=5788, ups=0.09, wpb=64787, bsz=128, num_updates=8387, lr=9.99409e-05, gnorm=2.149, loss_scale=2, train_wall=11, gb_free=2.8, wall=95866 2021-06-19 21:16:42 | INFO | train_inner | epoch 003: 2437 / 3002 loss=2.748, ppl=6.72, wps=5986.2, ups=0.09, wpb=64854, bsz=128, num_updates=8388, lr=9.99409e-05, gnorm=2.067, loss_scale=2, train_wall=10, gb_free=2.8, wall=95877 2021-06-19 21:16:53 | INFO | train_inner | epoch 003: 2438 / 3002 loss=2.756, ppl=6.76, wps=5951.3, ups=0.09, wpb=64785, bsz=128, num_updates=8389, lr=9.99409e-05, gnorm=2.169, loss_scale=2, train_wall=10, gb_free=2.8, wall=95888 2021-06-19 21:17:04 | INFO | train_inner | epoch 003: 2439 / 3002 loss=2.748, ppl=6.72, wps=5953.5, ups=0.09, wpb=64844, bsz=128, num_updates=8390, lr=9.99409e-05, gnorm=3.125, loss_scale=2, train_wall=10, gb_free=2.8, wall=95899 2021-06-19 21:17:15 | INFO | train_inner | epoch 003: 2440 / 3002 loss=2.63, ppl=6.19, wps=5906.7, ups=0.09, wpb=64799, bsz=128, num_updates=8391, lr=9.99409e-05, gnorm=2.076, loss_scale=2, train_wall=11, gb_free=2.8, wall=95909 2021-06-19 21:17:26 | INFO | train_inner | epoch 003: 2441 / 3002 loss=2.705, ppl=6.52, wps=5948.2, ups=0.09, wpb=64935, bsz=128, num_updates=8392, lr=9.99409e-05, gnorm=2.142, loss_scale=2, train_wall=10, gb_free=2.8, wall=95920 2021-06-19 21:17:37 | INFO | train_inner | epoch 003: 2442 / 3002 loss=2.548, ppl=5.85, wps=5885.2, ups=0.09, wpb=64878, bsz=128, num_updates=8393, lr=9.99409e-05, gnorm=2.136, loss_scale=2, train_wall=11, gb_free=2.8, wall=95931 2021-06-19 21:17:48 | INFO | train_inner | epoch 003: 2443 / 3002 loss=2.493, ppl=5.63, wps=5986.5, ups=0.09, wpb=64844, bsz=128, num_updates=8394, lr=9.99408e-05, gnorm=1.961, loss_scale=2, train_wall=10, gb_free=2.8, wall=95942 2021-06-19 21:17:59 | INFO | train_inner | epoch 003: 2444 / 3002 loss=2.622, ppl=6.16, wps=5924.3, ups=0.09, wpb=64803, bsz=128, num_updates=8395, lr=9.99408e-05, gnorm=2.119, loss_scale=2, train_wall=10, gb_free=2.8, wall=95953 2021-06-19 21:18:10 | INFO | train_inner | epoch 003: 2445 / 3002 loss=2.884, ppl=7.38, wps=5877.5, ups=0.09, wpb=64807, bsz=128, num_updates=8396, lr=9.99408e-05, gnorm=2.614, loss_scale=2, train_wall=11, gb_free=2.8, wall=95964 2021-06-19 21:18:21 | INFO | train_inner | epoch 003: 2446 / 3002 loss=2.744, ppl=6.7, wps=5828, ups=0.09, wpb=64762, bsz=128, num_updates=8397, lr=9.99408e-05, gnorm=2.111, loss_scale=2, train_wall=11, gb_free=2.8, wall=95975 2021-06-19 21:18:32 | INFO | train_inner | epoch 003: 2447 / 3002 loss=2.555, ppl=5.88, wps=5834.8, ups=0.09, wpb=64750, bsz=128, num_updates=8398, lr=9.99408e-05, gnorm=2.135, loss_scale=2, train_wall=11, gb_free=2.8, wall=95986 2021-06-19 21:18:43 | INFO | train_inner | epoch 003: 2448 / 3002 loss=2.579, ppl=5.98, wps=5924.7, ups=0.09, wpb=64862, bsz=128, num_updates=8399, lr=9.99408e-05, gnorm=2.027, loss_scale=2, train_wall=10, gb_free=2.8, wall=95997 2021-06-19 21:18:54 | INFO | train_inner | epoch 003: 2449 / 3002 loss=2.656, ppl=6.3, wps=5698.1, ups=0.09, wpb=64781, bsz=128, num_updates=8400, lr=9.99408e-05, gnorm=2.19, loss_scale=2, train_wall=11, gb_free=2.8, wall=96009 2021-06-19 21:19:06 | INFO | train_inner | epoch 003: 2450 / 3002 loss=2.551, ppl=5.86, wps=5768.9, ups=0.09, wpb=64791, bsz=128, num_updates=8401, lr=9.99408e-05, gnorm=2.154, loss_scale=2, train_wall=11, gb_free=2.8, wall=96020 2021-06-19 21:19:17 | INFO | train_inner | epoch 003: 2451 / 3002 loss=2.652, ppl=6.29, wps=5844.4, ups=0.09, wpb=64772, bsz=128, num_updates=8402, lr=9.99408e-05, gnorm=2.159, loss_scale=2, train_wall=11, gb_free=2.8, wall=96031 2021-06-19 21:19:28 | INFO | train_inner | epoch 003: 2452 / 3002 loss=2.526, ppl=5.76, wps=5891, ups=0.09, wpb=64910, bsz=128, num_updates=8403, lr=9.99408e-05, gnorm=2.313, loss_scale=2, train_wall=11, gb_free=2.8, wall=96042 2021-06-19 21:19:39 | INFO | train_inner | epoch 003: 2453 / 3002 loss=2.701, ppl=6.5, wps=5735, ups=0.09, wpb=64765, bsz=128, num_updates=8404, lr=9.99408e-05, gnorm=2.629, loss_scale=2, train_wall=11, gb_free=2.8, wall=96053 2021-06-19 21:19:50 | INFO | train_inner | epoch 003: 2454 / 3002 loss=2.614, ppl=6.12, wps=5747.7, ups=0.09, wpb=64797, bsz=128, num_updates=8405, lr=9.99408e-05, gnorm=2.117, loss_scale=2, train_wall=11, gb_free=2.8, wall=96065 2021-06-19 21:20:01 | INFO | train_inner | epoch 003: 2455 / 3002 loss=2.434, ppl=5.4, wps=5956.9, ups=0.09, wpb=64877, bsz=128, num_updates=8406, lr=9.99407e-05, gnorm=2.107, loss_scale=2, train_wall=10, gb_free=2.8, wall=96076 2021-06-19 21:20:12 | INFO | train_inner | epoch 003: 2456 / 3002 loss=2.664, ppl=6.34, wps=5978.9, ups=0.09, wpb=64876, bsz=128, num_updates=8407, lr=9.99407e-05, gnorm=2.193, loss_scale=2, train_wall=10, gb_free=2.8, wall=96086 2021-06-19 21:20:23 | INFO | train_inner | epoch 003: 2457 / 3002 loss=2.571, ppl=5.94, wps=5701.4, ups=0.09, wpb=64776, bsz=128, num_updates=8408, lr=9.99407e-05, gnorm=2.078, loss_scale=2, train_wall=11, gb_free=2.8, wall=96098 2021-06-19 21:20:35 | INFO | train_inner | epoch 003: 2458 / 3002 loss=2.595, ppl=6.04, wps=5822.7, ups=0.09, wpb=64823, bsz=128, num_updates=8409, lr=9.99407e-05, gnorm=2.138, loss_scale=2, train_wall=11, gb_free=2.8, wall=96109 2021-06-19 21:20:46 | INFO | train_inner | epoch 003: 2459 / 3002 loss=2.632, ppl=6.2, wps=5897.2, ups=0.09, wpb=64801, bsz=128, num_updates=8410, lr=9.99407e-05, gnorm=2.145, loss_scale=2, train_wall=11, gb_free=2.8, wall=96120 2021-06-19 21:20:57 | INFO | train_inner | epoch 003: 2460 / 3002 loss=2.568, ppl=5.93, wps=5731.7, ups=0.09, wpb=64747, bsz=128, num_updates=8411, lr=9.99407e-05, gnorm=2.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=96131 2021-06-19 21:21:08 | INFO | train_inner | epoch 003: 2461 / 3002 loss=2.472, ppl=5.55, wps=5946.9, ups=0.09, wpb=64895, bsz=128, num_updates=8412, lr=9.99407e-05, gnorm=2.099, loss_scale=2, train_wall=10, gb_free=2.8, wall=96142 2021-06-19 21:21:19 | INFO | train_inner | epoch 003: 2462 / 3002 loss=2.757, ppl=6.76, wps=5816.8, ups=0.09, wpb=64871, bsz=128, num_updates=8413, lr=9.99407e-05, gnorm=2.081, loss_scale=2, train_wall=11, gb_free=2.8, wall=96153 2021-06-19 21:21:30 | INFO | train_inner | epoch 003: 2463 / 3002 loss=2.662, ppl=6.33, wps=5874.4, ups=0.09, wpb=64801, bsz=128, num_updates=8414, lr=9.99407e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=96164 2021-06-19 21:21:41 | INFO | train_inner | epoch 003: 2464 / 3002 loss=2.523, ppl=5.75, wps=5878.6, ups=0.09, wpb=64763, bsz=128, num_updates=8415, lr=9.99407e-05, gnorm=2.092, loss_scale=2, train_wall=11, gb_free=2.8, wall=96175 2021-06-19 21:21:52 | INFO | train_inner | epoch 003: 2465 / 3002 loss=2.581, ppl=5.98, wps=5917, ups=0.09, wpb=64819, bsz=128, num_updates=8416, lr=9.99407e-05, gnorm=2.066, loss_scale=2, train_wall=10, gb_free=2.8, wall=96186 2021-06-19 21:22:03 | INFO | train_inner | epoch 003: 2466 / 3002 loss=2.767, ppl=6.81, wps=5772.3, ups=0.09, wpb=64688, bsz=128, num_updates=8417, lr=9.99407e-05, gnorm=2.099, loss_scale=2, train_wall=11, gb_free=2.8, wall=96197 2021-06-19 21:22:14 | INFO | train_inner | epoch 003: 2467 / 3002 loss=2.557, ppl=5.88, wps=5798.8, ups=0.09, wpb=64834, bsz=128, num_updates=8418, lr=9.99407e-05, gnorm=2.154, loss_scale=2, train_wall=11, gb_free=2.8, wall=96209 2021-06-19 21:22:25 | INFO | train_inner | epoch 003: 2468 / 3002 loss=2.623, ppl=6.16, wps=5828.4, ups=0.09, wpb=64890, bsz=128, num_updates=8419, lr=9.99406e-05, gnorm=2.211, loss_scale=2, train_wall=11, gb_free=2.8, wall=96220 2021-06-19 21:22:36 | INFO | train_inner | epoch 003: 2469 / 3002 loss=2.696, ppl=6.48, wps=5868.3, ups=0.09, wpb=64868, bsz=128, num_updates=8420, lr=9.99406e-05, gnorm=2.149, loss_scale=2, train_wall=11, gb_free=2.8, wall=96231 2021-06-19 21:22:47 | INFO | train_inner | epoch 003: 2470 / 3002 loss=2.453, ppl=5.48, wps=6012.1, ups=0.09, wpb=64839, bsz=128, num_updates=8421, lr=9.99406e-05, gnorm=2.119, loss_scale=2, train_wall=10, gb_free=2.8, wall=96242 2021-06-19 21:22:58 | INFO | train_inner | epoch 003: 2471 / 3002 loss=2.671, ppl=6.37, wps=5973, ups=0.09, wpb=64848, bsz=128, num_updates=8422, lr=9.99406e-05, gnorm=2.126, loss_scale=2, train_wall=10, gb_free=2.8, wall=96252 2021-06-19 21:23:09 | INFO | train_inner | epoch 003: 2472 / 3002 loss=2.617, ppl=6.13, wps=5795.4, ups=0.09, wpb=64865, bsz=128, num_updates=8423, lr=9.99406e-05, gnorm=2.083, loss_scale=2, train_wall=11, gb_free=2.8, wall=96264 2021-06-19 21:23:20 | INFO | train_inner | epoch 003: 2473 / 3002 loss=2.512, ppl=5.7, wps=5910.7, ups=0.09, wpb=64864, bsz=128, num_updates=8424, lr=9.99406e-05, gnorm=2.041, loss_scale=2, train_wall=11, gb_free=2.8, wall=96275 2021-06-19 21:23:31 | INFO | train_inner | epoch 003: 2474 / 3002 loss=2.58, ppl=5.98, wps=5831.3, ups=0.09, wpb=64863, bsz=128, num_updates=8425, lr=9.99406e-05, gnorm=2.124, loss_scale=2, train_wall=11, gb_free=2.8, wall=96286 2021-06-19 21:23:42 | INFO | train_inner | epoch 003: 2475 / 3002 loss=2.568, ppl=5.93, wps=5874.5, ups=0.09, wpb=64844, bsz=128, num_updates=8426, lr=9.99406e-05, gnorm=2.049, loss_scale=2, train_wall=11, gb_free=2.8, wall=96297 2021-06-19 21:23:53 | INFO | train_inner | epoch 003: 2476 / 3002 loss=2.501, ppl=5.66, wps=5885.5, ups=0.09, wpb=64884, bsz=128, num_updates=8427, lr=9.99406e-05, gnorm=2.256, loss_scale=2, train_wall=11, gb_free=2.8, wall=96308 2021-06-19 21:24:04 | INFO | train_inner | epoch 003: 2477 / 3002 loss=2.596, ppl=6.05, wps=5886.7, ups=0.09, wpb=64903, bsz=128, num_updates=8428, lr=9.99406e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=96319 2021-06-19 21:24:15 | INFO | train_inner | epoch 003: 2478 / 3002 loss=2.669, ppl=6.36, wps=5908.7, ups=0.09, wpb=64759, bsz=128, num_updates=8429, lr=9.99406e-05, gnorm=2.165, loss_scale=2, train_wall=11, gb_free=2.8, wall=96330 2021-06-19 21:24:27 | INFO | train_inner | epoch 003: 2479 / 3002 loss=2.813, ppl=7.03, wps=5843.5, ups=0.09, wpb=64823, bsz=128, num_updates=8430, lr=9.99406e-05, gnorm=2.427, loss_scale=2, train_wall=11, gb_free=2.8, wall=96341 2021-06-19 21:24:38 | INFO | train_inner | epoch 003: 2480 / 3002 loss=2.681, ppl=6.41, wps=5849.6, ups=0.09, wpb=64904, bsz=128, num_updates=8431, lr=9.99405e-05, gnorm=2.056, loss_scale=2, train_wall=11, gb_free=2.8, wall=96352 2021-06-19 21:24:49 | INFO | train_inner | epoch 003: 2481 / 3002 loss=2.697, ppl=6.48, wps=5864.4, ups=0.09, wpb=64825, bsz=128, num_updates=8432, lr=9.99405e-05, gnorm=2.049, loss_scale=2, train_wall=11, gb_free=2.8, wall=96363 2021-06-19 21:25:00 | INFO | train_inner | epoch 003: 2482 / 3002 loss=2.643, ppl=6.25, wps=5794.7, ups=0.09, wpb=64758, bsz=128, num_updates=8433, lr=9.99405e-05, gnorm=2.281, loss_scale=2, train_wall=11, gb_free=2.8, wall=96374 2021-06-19 21:25:11 | INFO | train_inner | epoch 003: 2483 / 3002 loss=2.523, ppl=5.75, wps=5755.7, ups=0.09, wpb=64843, bsz=128, num_updates=8434, lr=9.99405e-05, gnorm=2.002, loss_scale=2, train_wall=11, gb_free=2.8, wall=96385 2021-06-19 21:25:22 | INFO | train_inner | epoch 003: 2484 / 3002 loss=2.565, ppl=5.92, wps=5874.6, ups=0.09, wpb=64829, bsz=128, num_updates=8435, lr=9.99405e-05, gnorm=2.162, loss_scale=2, train_wall=11, gb_free=2.8, wall=96397 2021-06-19 21:25:33 | INFO | train_inner | epoch 003: 2485 / 3002 loss=2.524, ppl=5.75, wps=5885.6, ups=0.09, wpb=64840, bsz=128, num_updates=8436, lr=9.99405e-05, gnorm=2.131, loss_scale=2, train_wall=11, gb_free=2.8, wall=96408 2021-06-19 21:25:44 | INFO | train_inner | epoch 003: 2486 / 3002 loss=2.561, ppl=5.9, wps=5796.6, ups=0.09, wpb=64910, bsz=128, num_updates=8437, lr=9.99405e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=96419 2021-06-19 21:25:55 | INFO | train_inner | epoch 003: 2487 / 3002 loss=2.509, ppl=5.69, wps=5930.5, ups=0.09, wpb=64866, bsz=128, num_updates=8438, lr=9.99405e-05, gnorm=2.157, loss_scale=2, train_wall=10, gb_free=2.8, wall=96430 2021-06-19 21:26:06 | INFO | train_inner | epoch 003: 2488 / 3002 loss=2.595, ppl=6.04, wps=5849.3, ups=0.09, wpb=64879, bsz=128, num_updates=8439, lr=9.99405e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=96441 2021-06-19 21:26:18 | INFO | train_inner | epoch 003: 2489 / 3002 loss=2.661, ppl=6.32, wps=5801.1, ups=0.09, wpb=64850, bsz=128, num_updates=8440, lr=9.99405e-05, gnorm=2.06, loss_scale=2, train_wall=11, gb_free=2.8, wall=96452 2021-06-19 21:26:29 | INFO | train_inner | epoch 003: 2490 / 3002 loss=2.595, ppl=6.04, wps=5826.1, ups=0.09, wpb=64820, bsz=128, num_updates=8441, lr=9.99405e-05, gnorm=2.148, loss_scale=2, train_wall=11, gb_free=2.8, wall=96463 2021-06-19 21:26:40 | INFO | train_inner | epoch 003: 2491 / 3002 loss=2.715, ppl=6.56, wps=5791.4, ups=0.09, wpb=64781, bsz=128, num_updates=8442, lr=9.99405e-05, gnorm=2.144, loss_scale=2, train_wall=11, gb_free=2.8, wall=96474 2021-06-19 21:26:51 | INFO | train_inner | epoch 003: 2492 / 3002 loss=2.65, ppl=6.28, wps=5834.2, ups=0.09, wpb=64818, bsz=128, num_updates=8443, lr=9.99405e-05, gnorm=2.08, loss_scale=2, train_wall=11, gb_free=2.8, wall=96485 2021-06-19 21:27:02 | INFO | train_inner | epoch 003: 2493 / 3002 loss=2.702, ppl=6.51, wps=5804.2, ups=0.09, wpb=64815, bsz=128, num_updates=8444, lr=9.99404e-05, gnorm=2.448, loss_scale=2, train_wall=11, gb_free=2.8, wall=96497 2021-06-19 21:27:13 | INFO | train_inner | epoch 003: 2494 / 3002 loss=2.701, ppl=6.5, wps=5869.9, ups=0.09, wpb=64857, bsz=128, num_updates=8445, lr=9.99404e-05, gnorm=2.186, loss_scale=2, train_wall=11, gb_free=2.8, wall=96508 2021-06-19 21:27:25 | INFO | train_inner | epoch 003: 2495 / 3002 loss=2.539, ppl=5.81, wps=5717.1, ups=0.09, wpb=64765, bsz=128, num_updates=8446, lr=9.99404e-05, gnorm=2.703, loss_scale=2, train_wall=11, gb_free=2.8, wall=96519 2021-06-19 21:27:36 | INFO | train_inner | epoch 003: 2496 / 3002 loss=2.694, ppl=6.47, wps=5880.3, ups=0.09, wpb=64819, bsz=128, num_updates=8447, lr=9.99404e-05, gnorm=2.141, loss_scale=2, train_wall=11, gb_free=2.8, wall=96530 2021-06-19 21:27:47 | INFO | train_inner | epoch 003: 2497 / 3002 loss=2.57, ppl=5.94, wps=5795, ups=0.09, wpb=64856, bsz=128, num_updates=8448, lr=9.99404e-05, gnorm=2.424, loss_scale=2, train_wall=11, gb_free=2.8, wall=96541 2021-06-19 21:27:58 | INFO | train_inner | epoch 003: 2498 / 3002 loss=2.774, ppl=6.84, wps=5785.1, ups=0.09, wpb=64868, bsz=128, num_updates=8449, lr=9.99404e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=96552 2021-06-19 21:28:09 | INFO | train_inner | epoch 003: 2499 / 3002 loss=2.688, ppl=6.44, wps=5857.1, ups=0.09, wpb=64857, bsz=128, num_updates=8450, lr=9.99404e-05, gnorm=2.315, loss_scale=2, train_wall=11, gb_free=2.8, wall=96563 2021-06-19 21:28:20 | INFO | train_inner | epoch 003: 2500 / 3002 loss=2.666, ppl=6.35, wps=5836.9, ups=0.09, wpb=64814, bsz=128, num_updates=8451, lr=9.99404e-05, gnorm=2.591, loss_scale=2, train_wall=11, gb_free=2.8, wall=96575 2021-06-19 21:28:31 | INFO | train_inner | epoch 003: 2501 / 3002 loss=2.752, ppl=6.74, wps=5771.9, ups=0.09, wpb=64777, bsz=128, num_updates=8452, lr=9.99404e-05, gnorm=2.325, loss_scale=2, train_wall=11, gb_free=2.8, wall=96586 2021-06-19 21:28:43 | INFO | train_inner | epoch 003: 2502 / 3002 loss=2.683, ppl=6.42, wps=5754.2, ups=0.09, wpb=64806, bsz=128, num_updates=8453, lr=9.99404e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=96597 2021-06-19 21:28:54 | INFO | train_inner | epoch 003: 2503 / 3002 loss=2.593, ppl=6.03, wps=5941.4, ups=0.09, wpb=64950, bsz=128, num_updates=8454, lr=9.99404e-05, gnorm=2.073, loss_scale=2, train_wall=10, gb_free=2.8, wall=96608 2021-06-19 21:29:05 | INFO | train_inner | epoch 003: 2504 / 3002 loss=2.641, ppl=6.24, wps=5834.7, ups=0.09, wpb=64776, bsz=128, num_updates=8455, lr=9.99404e-05, gnorm=2.806, loss_scale=2, train_wall=11, gb_free=2.8, wall=96619 2021-06-19 21:29:16 | INFO | train_inner | epoch 003: 2505 / 3002 loss=2.582, ppl=5.99, wps=5885.9, ups=0.09, wpb=64886, bsz=128, num_updates=8456, lr=9.99403e-05, gnorm=2.814, loss_scale=2, train_wall=11, gb_free=2.8, wall=96630 2021-06-19 21:29:27 | INFO | train_inner | epoch 003: 2506 / 3002 loss=2.509, ppl=5.69, wps=5734.1, ups=0.09, wpb=64774, bsz=128, num_updates=8457, lr=9.99403e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=96641 2021-06-19 21:29:38 | INFO | train_inner | epoch 003: 2507 / 3002 loss=2.538, ppl=5.81, wps=5934.3, ups=0.09, wpb=64829, bsz=128, num_updates=8458, lr=9.99403e-05, gnorm=2.101, loss_scale=2, train_wall=10, gb_free=2.8, wall=96652 2021-06-19 21:29:49 | INFO | train_inner | epoch 003: 2508 / 3002 loss=2.465, ppl=5.52, wps=5906.8, ups=0.09, wpb=64767, bsz=128, num_updates=8459, lr=9.99403e-05, gnorm=2.474, loss_scale=2, train_wall=11, gb_free=2.8, wall=96663 2021-06-19 21:30:00 | INFO | train_inner | epoch 003: 2509 / 3002 loss=2.674, ppl=6.38, wps=5897.1, ups=0.09, wpb=64824, bsz=128, num_updates=8460, lr=9.99403e-05, gnorm=2.187, loss_scale=2, train_wall=11, gb_free=2.8, wall=96674 2021-06-19 21:30:11 | INFO | train_inner | epoch 003: 2510 / 3002 loss=2.72, ppl=6.59, wps=5843.4, ups=0.09, wpb=64845, bsz=128, num_updates=8461, lr=9.99403e-05, gnorm=2.11, loss_scale=2, train_wall=11, gb_free=2.8, wall=96685 2021-06-19 21:30:22 | INFO | train_inner | epoch 003: 2511 / 3002 loss=2.594, ppl=6.04, wps=5801.6, ups=0.09, wpb=64887, bsz=128, num_updates=8462, lr=9.99403e-05, gnorm=2.157, loss_scale=2, train_wall=11, gb_free=2.8, wall=96697 2021-06-19 21:30:33 | INFO | train_inner | epoch 003: 2512 / 3002 loss=2.592, ppl=6.03, wps=5957.5, ups=0.09, wpb=64899, bsz=128, num_updates=8463, lr=9.99403e-05, gnorm=3.1, loss_scale=2, train_wall=10, gb_free=2.8, wall=96707 2021-06-19 21:30:44 | INFO | train_inner | epoch 003: 2513 / 3002 loss=2.708, ppl=6.53, wps=5957.8, ups=0.09, wpb=64827, bsz=128, num_updates=8464, lr=9.99403e-05, gnorm=2.141, loss_scale=2, train_wall=10, gb_free=2.8, wall=96718 2021-06-19 21:30:55 | INFO | train_inner | epoch 003: 2514 / 3002 loss=2.508, ppl=5.69, wps=5700.9, ups=0.09, wpb=64869, bsz=128, num_updates=8465, lr=9.99403e-05, gnorm=2.126, loss_scale=2, train_wall=11, gb_free=2.8, wall=96730 2021-06-19 21:31:06 | INFO | train_inner | epoch 003: 2515 / 3002 loss=2.516, ppl=5.72, wps=5830.8, ups=0.09, wpb=64841, bsz=128, num_updates=8466, lr=9.99403e-05, gnorm=2.979, loss_scale=2, train_wall=11, gb_free=2.8, wall=96741 2021-06-19 21:31:18 | INFO | train_inner | epoch 003: 2516 / 3002 loss=2.539, ppl=5.81, wps=5790.7, ups=0.09, wpb=64813, bsz=128, num_updates=8467, lr=9.99403e-05, gnorm=2.169, loss_scale=2, train_wall=11, gb_free=2.8, wall=96752 2021-06-19 21:31:29 | INFO | train_inner | epoch 003: 2517 / 3002 loss=2.552, ppl=5.86, wps=5846.8, ups=0.09, wpb=64871, bsz=128, num_updates=8468, lr=9.99403e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=96763 2021-06-19 21:31:40 | INFO | train_inner | epoch 003: 2518 / 3002 loss=2.532, ppl=5.78, wps=5801.8, ups=0.09, wpb=64887, bsz=128, num_updates=8469, lr=9.99402e-05, gnorm=2.191, loss_scale=2, train_wall=11, gb_free=2.8, wall=96774 2021-06-19 21:31:51 | INFO | train_inner | epoch 003: 2519 / 3002 loss=2.518, ppl=5.73, wps=5820, ups=0.09, wpb=64810, bsz=128, num_updates=8470, lr=9.99402e-05, gnorm=2.2, loss_scale=2, train_wall=11, gb_free=2.8, wall=96785 2021-06-19 21:32:02 | INFO | train_inner | epoch 003: 2520 / 3002 loss=2.481, ppl=5.58, wps=5854.7, ups=0.09, wpb=64924, bsz=128, num_updates=8471, lr=9.99402e-05, gnorm=3.186, loss_scale=2, train_wall=11, gb_free=2.8, wall=96796 2021-06-19 21:32:13 | INFO | train_inner | epoch 003: 2521 / 3002 loss=2.646, ppl=6.26, wps=5928.1, ups=0.09, wpb=64804, bsz=128, num_updates=8472, lr=9.99402e-05, gnorm=2.209, loss_scale=2, train_wall=10, gb_free=2.8, wall=96807 2021-06-19 21:32:24 | INFO | train_inner | epoch 003: 2522 / 3002 loss=2.687, ppl=6.44, wps=5783.2, ups=0.09, wpb=64827, bsz=128, num_updates=8473, lr=9.99402e-05, gnorm=4.118, loss_scale=2, train_wall=11, gb_free=2.8, wall=96819 2021-06-19 21:32:36 | INFO | train_inner | epoch 003: 2523 / 3002 loss=2.682, ppl=6.42, wps=5738.4, ups=0.09, wpb=64724, bsz=128, num_updates=8474, lr=9.99402e-05, gnorm=3.57, loss_scale=2, train_wall=11, gb_free=2.8, wall=96830 2021-06-19 21:32:47 | INFO | train_inner | epoch 003: 2524 / 3002 loss=2.639, ppl=6.23, wps=5850, ups=0.09, wpb=64788, bsz=128, num_updates=8475, lr=9.99402e-05, gnorm=2.227, loss_scale=2, train_wall=11, gb_free=2.8, wall=96841 2021-06-19 21:32:58 | INFO | train_inner | epoch 003: 2525 / 3002 loss=2.734, ppl=6.65, wps=5718.8, ups=0.09, wpb=64851, bsz=128, num_updates=8476, lr=9.99402e-05, gnorm=25.712, loss_scale=2, train_wall=11, gb_free=2.8, wall=96852 2021-06-19 21:33:09 | INFO | train_inner | epoch 003: 2526 / 3002 loss=2.59, ppl=6.02, wps=5763.6, ups=0.09, wpb=64796, bsz=128, num_updates=8477, lr=9.99402e-05, gnorm=2.46, loss_scale=2, train_wall=11, gb_free=2.8, wall=96864 2021-06-19 21:33:20 | INFO | train_inner | epoch 003: 2527 / 3002 loss=2.648, ppl=6.27, wps=5867.1, ups=0.09, wpb=64939, bsz=128, num_updates=8478, lr=9.99402e-05, gnorm=2.612, loss_scale=4, train_wall=11, gb_free=2.8, wall=96875 2021-06-19 21:33:31 | INFO | train_inner | epoch 003: 2528 / 3002 loss=2.541, ppl=5.82, wps=5793, ups=0.09, wpb=64834, bsz=128, num_updates=8479, lr=9.99402e-05, gnorm=3.814, loss_scale=4, train_wall=11, gb_free=2.8, wall=96886 2021-06-19 21:33:42 | INFO | train_inner | epoch 003: 2529 / 3002 loss=2.448, ppl=5.46, wps=5936.2, ups=0.09, wpb=64852, bsz=128, num_updates=8480, lr=9.99402e-05, gnorm=2.446, loss_scale=4, train_wall=10, gb_free=2.8, wall=96897 2021-06-19 21:33:54 | INFO | train_inner | epoch 003: 2530 / 3002 loss=2.657, ppl=6.31, wps=5806.5, ups=0.09, wpb=64769, bsz=128, num_updates=8481, lr=9.99401e-05, gnorm=2.577, loss_scale=4, train_wall=11, gb_free=2.8, wall=96908 2021-06-19 21:34:05 | INFO | train_inner | epoch 003: 2531 / 3002 loss=2.797, ppl=6.95, wps=5816.9, ups=0.09, wpb=64795, bsz=128, num_updates=8482, lr=9.99401e-05, gnorm=2.569, loss_scale=4, train_wall=11, gb_free=2.8, wall=96919 2021-06-19 21:34:16 | INFO | train_inner | epoch 003: 2532 / 3002 loss=2.59, ppl=6.02, wps=5831.7, ups=0.09, wpb=64853, bsz=128, num_updates=8483, lr=9.99401e-05, gnorm=2.524, loss_scale=4, train_wall=11, gb_free=2.8, wall=96930 2021-06-19 21:34:27 | INFO | train_inner | epoch 003: 2533 / 3002 loss=2.705, ppl=6.52, wps=5785.1, ups=0.09, wpb=64744, bsz=128, num_updates=8484, lr=9.99401e-05, gnorm=2.344, loss_scale=4, train_wall=11, gb_free=2.8, wall=96941 2021-06-19 21:34:38 | INFO | train_inner | epoch 003: 2534 / 3002 loss=2.616, ppl=6.13, wps=5924.1, ups=0.09, wpb=64851, bsz=128, num_updates=8485, lr=9.99401e-05, gnorm=2.21, loss_scale=4, train_wall=10, gb_free=2.8, wall=96952 2021-06-19 21:34:49 | INFO | train_inner | epoch 003: 2535 / 3002 loss=2.593, ppl=6.03, wps=5810.7, ups=0.09, wpb=64848, bsz=128, num_updates=8486, lr=9.99401e-05, gnorm=2.353, loss_scale=4, train_wall=11, gb_free=2.8, wall=96963 2021-06-19 21:35:00 | INFO | train_inner | epoch 003: 2536 / 3002 loss=2.728, ppl=6.63, wps=5812.8, ups=0.09, wpb=64868, bsz=128, num_updates=8487, lr=9.99401e-05, gnorm=2.763, loss_scale=4, train_wall=11, gb_free=2.8, wall=96975 2021-06-19 21:35:12 | INFO | train_inner | epoch 003: 2537 / 3002 loss=2.572, ppl=5.95, wps=5772.4, ups=0.09, wpb=64828, bsz=128, num_updates=8488, lr=9.99401e-05, gnorm=2.449, loss_scale=4, train_wall=11, gb_free=2.8, wall=96986 2021-06-19 21:35:23 | INFO | train_inner | epoch 003: 2538 / 3002 loss=2.775, ppl=6.85, wps=5851.7, ups=0.09, wpb=64803, bsz=128, num_updates=8489, lr=9.99401e-05, gnorm=2.836, loss_scale=4, train_wall=11, gb_free=2.8, wall=96997 2021-06-19 21:35:34 | INFO | train_inner | epoch 003: 2539 / 3002 loss=2.656, ppl=6.3, wps=5788.2, ups=0.09, wpb=64719, bsz=128, num_updates=8490, lr=9.99401e-05, gnorm=2.414, loss_scale=4, train_wall=11, gb_free=2.8, wall=97008 2021-06-19 21:35:45 | INFO | train_inner | epoch 003: 2540 / 3002 loss=2.784, ppl=6.89, wps=5996.2, ups=0.09, wpb=64732, bsz=128, num_updates=8491, lr=9.99401e-05, gnorm=2.637, loss_scale=4, train_wall=10, gb_free=2.8, wall=97019 2021-06-19 21:35:56 | INFO | train_inner | epoch 003: 2541 / 3002 loss=2.799, ppl=6.96, wps=5854.1, ups=0.09, wpb=64791, bsz=128, num_updates=8492, lr=9.99401e-05, gnorm=2.181, loss_scale=4, train_wall=11, gb_free=2.8, wall=97030 2021-06-19 21:36:07 | INFO | train_inner | epoch 003: 2542 / 3002 loss=2.516, ppl=5.72, wps=5849.1, ups=0.09, wpb=64894, bsz=128, num_updates=8493, lr=9.99401e-05, gnorm=2.41, loss_scale=4, train_wall=11, gb_free=2.8, wall=97041 2021-06-19 21:36:18 | INFO | train_inner | epoch 003: 2543 / 3002 loss=2.637, ppl=6.22, wps=5931.2, ups=0.09, wpb=64821, bsz=128, num_updates=8494, lr=9.994e-05, gnorm=2.935, loss_scale=4, train_wall=10, gb_free=2.8, wall=97052 2021-06-19 21:36:29 | INFO | train_inner | epoch 003: 2544 / 3002 loss=2.631, ppl=6.19, wps=5849.3, ups=0.09, wpb=64886, bsz=128, num_updates=8495, lr=9.994e-05, gnorm=3.269, loss_scale=4, train_wall=11, gb_free=2.8, wall=97063 2021-06-19 21:36:40 | INFO | train_inner | epoch 003: 2545 / 3002 loss=2.653, ppl=6.29, wps=5884.7, ups=0.09, wpb=64793, bsz=128, num_updates=8496, lr=9.994e-05, gnorm=3.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=97074 2021-06-19 21:36:51 | INFO | train_inner | epoch 003: 2546 / 3002 loss=2.647, ppl=6.26, wps=5793.2, ups=0.09, wpb=64788, bsz=128, num_updates=8497, lr=9.994e-05, gnorm=2.144, loss_scale=4, train_wall=11, gb_free=2.8, wall=97085 2021-06-19 21:37:02 | INFO | train_inner | epoch 003: 2547 / 3002 loss=2.611, ppl=6.11, wps=5716.2, ups=0.09, wpb=64880, bsz=128, num_updates=8498, lr=9.994e-05, gnorm=2.267, loss_scale=4, train_wall=11, gb_free=2.8, wall=97097 2021-06-19 21:37:13 | INFO | train_inner | epoch 003: 2548 / 3002 loss=2.8, ppl=6.97, wps=5832.9, ups=0.09, wpb=64875, bsz=128, num_updates=8499, lr=9.994e-05, gnorm=2.278, loss_scale=4, train_wall=11, gb_free=2.8, wall=97108 2021-06-19 21:37:24 | INFO | train_inner | epoch 003: 2549 / 3002 loss=2.711, ppl=6.55, wps=5890.6, ups=0.09, wpb=64923, bsz=128, num_updates=8500, lr=9.994e-05, gnorm=4.711, loss_scale=4, train_wall=11, gb_free=2.8, wall=97119 2021-06-19 21:37:36 | INFO | train_inner | epoch 003: 2550 / 3002 loss=2.559, ppl=5.89, wps=5747, ups=0.09, wpb=64792, bsz=128, num_updates=8501, lr=9.994e-05, gnorm=2.132, loss_scale=4, train_wall=11, gb_free=2.8, wall=97130 2021-06-19 21:37:47 | INFO | train_inner | epoch 003: 2551 / 3002 loss=2.822, ppl=7.07, wps=5968.5, ups=0.09, wpb=64741, bsz=128, num_updates=8502, lr=9.994e-05, gnorm=2.691, loss_scale=4, train_wall=10, gb_free=2.8, wall=97141 2021-06-19 21:37:58 | INFO | train_inner | epoch 003: 2552 / 3002 loss=2.46, ppl=5.5, wps=5834.3, ups=0.09, wpb=64873, bsz=128, num_updates=8503, lr=9.994e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=97152 2021-06-19 21:38:09 | INFO | train_inner | epoch 003: 2553 / 3002 loss=2.77, ppl=6.82, wps=5798.8, ups=0.09, wpb=64797, bsz=128, num_updates=8504, lr=9.994e-05, gnorm=2.416, loss_scale=4, train_wall=11, gb_free=2.8, wall=97163 2021-06-19 21:38:20 | INFO | train_inner | epoch 003: 2554 / 3002 loss=2.513, ppl=5.71, wps=6025.8, ups=0.09, wpb=64849, bsz=128, num_updates=8505, lr=9.994e-05, gnorm=2.402, loss_scale=4, train_wall=10, gb_free=2.8, wall=97174 2021-06-19 21:38:31 | INFO | train_inner | epoch 003: 2555 / 3002 loss=2.619, ppl=6.14, wps=5902.9, ups=0.09, wpb=64825, bsz=128, num_updates=8506, lr=9.99399e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=97185 2021-06-19 21:38:42 | INFO | train_inner | epoch 003: 2556 / 3002 loss=2.675, ppl=6.39, wps=5901, ups=0.09, wpb=64836, bsz=128, num_updates=8507, lr=9.99399e-05, gnorm=2.413, loss_scale=4, train_wall=11, gb_free=2.8, wall=97196 2021-06-19 21:38:53 | INFO | train_inner | epoch 003: 2557 / 3002 loss=2.546, ppl=5.84, wps=5921.1, ups=0.09, wpb=64894, bsz=128, num_updates=8508, lr=9.99399e-05, gnorm=5.038, loss_scale=4, train_wall=11, gb_free=2.8, wall=97207 2021-06-19 21:39:04 | INFO | train_inner | epoch 003: 2558 / 3002 loss=2.702, ppl=6.51, wps=5788.7, ups=0.09, wpb=64823, bsz=128, num_updates=8509, lr=9.99399e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=97218 2021-06-19 21:39:15 | INFO | train_inner | epoch 003: 2559 / 3002 loss=2.807, ppl=7, wps=5806.8, ups=0.09, wpb=64828, bsz=128, num_updates=8510, lr=9.99399e-05, gnorm=2.296, loss_scale=4, train_wall=11, gb_free=2.8, wall=97229 2021-06-19 21:39:26 | INFO | train_inner | epoch 003: 2560 / 3002 loss=2.593, ppl=6.04, wps=5799.9, ups=0.09, wpb=64841, bsz=128, num_updates=8511, lr=9.99399e-05, gnorm=2.472, loss_scale=4, train_wall=11, gb_free=2.8, wall=97240 2021-06-19 21:39:37 | INFO | train_inner | epoch 003: 2561 / 3002 loss=2.583, ppl=5.99, wps=5779.5, ups=0.09, wpb=64891, bsz=128, num_updates=8512, lr=9.99399e-05, gnorm=5.471, loss_scale=4, train_wall=11, gb_free=2.8, wall=97252 2021-06-19 21:39:48 | INFO | train_inner | epoch 003: 2562 / 3002 loss=2.771, ppl=6.82, wps=5880.7, ups=0.09, wpb=64775, bsz=128, num_updates=8513, lr=9.99399e-05, gnorm=2.127, loss_scale=4, train_wall=11, gb_free=2.8, wall=97263 2021-06-19 21:39:59 | INFO | train_inner | epoch 003: 2563 / 3002 loss=2.679, ppl=6.4, wps=5869, ups=0.09, wpb=64828, bsz=128, num_updates=8514, lr=9.99399e-05, gnorm=2.337, loss_scale=4, train_wall=11, gb_free=2.8, wall=97274 2021-06-19 21:40:10 | INFO | train_inner | epoch 003: 2564 / 3002 loss=2.473, ppl=5.55, wps=5963.3, ups=0.09, wpb=64907, bsz=128, num_updates=8515, lr=9.99399e-05, gnorm=2.17, loss_scale=4, train_wall=10, gb_free=2.8, wall=97285 2021-06-19 21:40:22 | INFO | train_inner | epoch 003: 2565 / 3002 loss=2.687, ppl=6.44, wps=5726.2, ups=0.09, wpb=64784, bsz=128, num_updates=8516, lr=9.99399e-05, gnorm=2.349, loss_scale=4, train_wall=11, gb_free=2.8, wall=97296 2021-06-19 21:40:33 | INFO | train_inner | epoch 003: 2566 / 3002 loss=2.736, ppl=6.66, wps=5819.8, ups=0.09, wpb=64815, bsz=128, num_updates=8517, lr=9.99399e-05, gnorm=6.681, loss_scale=4, train_wall=11, gb_free=2.8, wall=97307 2021-06-19 21:40:44 | INFO | train_inner | epoch 003: 2567 / 3002 loss=2.592, ppl=6.03, wps=5934.6, ups=0.09, wpb=64808, bsz=128, num_updates=8518, lr=9.99399e-05, gnorm=2.52, loss_scale=4, train_wall=10, gb_free=2.8, wall=97318 2021-06-19 21:40:55 | INFO | train_inner | epoch 003: 2568 / 3002 loss=2.704, ppl=6.52, wps=5778.2, ups=0.09, wpb=64837, bsz=128, num_updates=8519, lr=9.99398e-05, gnorm=2.233, loss_scale=4, train_wall=11, gb_free=2.8, wall=97329 2021-06-19 21:41:06 | INFO | train_inner | epoch 003: 2569 / 3002 loss=2.615, ppl=6.13, wps=5827.9, ups=0.09, wpb=64845, bsz=128, num_updates=8520, lr=9.99398e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=97340 2021-06-19 21:41:17 | INFO | train_inner | epoch 003: 2570 / 3002 loss=2.694, ppl=6.47, wps=5822, ups=0.09, wpb=64784, bsz=128, num_updates=8521, lr=9.99398e-05, gnorm=3.24, loss_scale=4, train_wall=11, gb_free=2.8, wall=97351 2021-06-19 21:41:28 | INFO | train_inner | epoch 003: 2571 / 3002 loss=2.544, ppl=5.83, wps=5860.5, ups=0.09, wpb=64752, bsz=128, num_updates=8522, lr=9.99398e-05, gnorm=2.433, loss_scale=4, train_wall=11, gb_free=2.8, wall=97363 2021-06-19 21:41:39 | INFO | train_inner | epoch 003: 2572 / 3002 loss=2.709, ppl=6.54, wps=5836.2, ups=0.09, wpb=64729, bsz=128, num_updates=8523, lr=9.99398e-05, gnorm=2.455, loss_scale=4, train_wall=11, gb_free=2.8, wall=97374 2021-06-19 21:41:50 | INFO | train_inner | epoch 003: 2573 / 3002 loss=2.604, ppl=6.08, wps=5892.4, ups=0.09, wpb=64909, bsz=128, num_updates=8524, lr=9.99398e-05, gnorm=10.401, loss_scale=4, train_wall=11, gb_free=2.8, wall=97385 2021-06-19 21:42:02 | INFO | train_inner | epoch 003: 2574 / 3002 loss=2.577, ppl=5.97, wps=5770.5, ups=0.09, wpb=64850, bsz=128, num_updates=8525, lr=9.99398e-05, gnorm=2.423, loss_scale=4, train_wall=11, gb_free=2.8, wall=97396 2021-06-19 21:42:13 | INFO | train_inner | epoch 003: 2575 / 3002 loss=2.801, ppl=6.97, wps=5888.7, ups=0.09, wpb=64762, bsz=128, num_updates=8526, lr=9.99398e-05, gnorm=2.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=97407 2021-06-19 21:42:24 | INFO | train_inner | epoch 003: 2576 / 3002 loss=2.47, ppl=5.54, wps=5847, ups=0.09, wpb=64860, bsz=128, num_updates=8527, lr=9.99398e-05, gnorm=2.272, loss_scale=4, train_wall=11, gb_free=2.8, wall=97418 2021-06-19 21:42:35 | INFO | train_inner | epoch 003: 2577 / 3002 loss=2.354, ppl=5.11, wps=5859.8, ups=0.09, wpb=64818, bsz=128, num_updates=8528, lr=9.99398e-05, gnorm=3.504, loss_scale=4, train_wall=11, gb_free=2.8, wall=97429 2021-06-19 21:42:46 | INFO | train_inner | epoch 003: 2578 / 3002 loss=2.616, ppl=6.13, wps=5888.7, ups=0.09, wpb=64808, bsz=128, num_updates=8529, lr=9.99398e-05, gnorm=2.863, loss_scale=4, train_wall=11, gb_free=2.8, wall=97440 2021-06-19 21:42:57 | INFO | train_inner | epoch 003: 2579 / 3002 loss=2.525, ppl=5.75, wps=5780, ups=0.09, wpb=64777, bsz=128, num_updates=8530, lr=9.99398e-05, gnorm=2.81, loss_scale=4, train_wall=11, gb_free=2.8, wall=97451 2021-06-19 21:43:08 | INFO | train_inner | epoch 003: 2580 / 3002 loss=2.678, ppl=6.4, wps=5852.1, ups=0.09, wpb=64805, bsz=128, num_updates=8531, lr=9.99397e-05, gnorm=2.432, loss_scale=4, train_wall=11, gb_free=2.8, wall=97462 2021-06-19 21:43:19 | INFO | train_inner | epoch 003: 2581 / 3002 loss=2.53, ppl=5.78, wps=5872.3, ups=0.09, wpb=64888, bsz=128, num_updates=8532, lr=9.99397e-05, gnorm=2.322, loss_scale=4, train_wall=11, gb_free=2.8, wall=97473 2021-06-19 21:43:30 | INFO | train_inner | epoch 003: 2582 / 3002 loss=2.645, ppl=6.26, wps=5807, ups=0.09, wpb=64826, bsz=128, num_updates=8533, lr=9.99397e-05, gnorm=2.164, loss_scale=4, train_wall=11, gb_free=2.8, wall=97485 2021-06-19 21:43:41 | INFO | train_inner | epoch 003: 2583 / 3002 loss=2.585, ppl=6, wps=5880.2, ups=0.09, wpb=64832, bsz=128, num_updates=8534, lr=9.99397e-05, gnorm=5.159, loss_scale=4, train_wall=11, gb_free=2.8, wall=97496 2021-06-19 21:43:52 | INFO | train_inner | epoch 003: 2584 / 3002 loss=2.591, ppl=6.02, wps=5909.1, ups=0.09, wpb=64810, bsz=128, num_updates=8535, lr=9.99397e-05, gnorm=13.055, loss_scale=4, train_wall=11, gb_free=2.8, wall=97507 2021-06-19 21:44:03 | INFO | train_inner | epoch 003: 2585 / 3002 loss=2.61, ppl=6.11, wps=5781.7, ups=0.09, wpb=64756, bsz=128, num_updates=8536, lr=9.99397e-05, gnorm=2.806, loss_scale=4, train_wall=11, gb_free=2.8, wall=97518 2021-06-19 21:44:15 | INFO | train_inner | epoch 003: 2586 / 3002 loss=2.492, ppl=5.62, wps=5730.6, ups=0.09, wpb=64811, bsz=128, num_updates=8537, lr=9.99397e-05, gnorm=2.449, loss_scale=4, train_wall=11, gb_free=2.8, wall=97529 2021-06-19 21:44:26 | INFO | train_inner | epoch 003: 2587 / 3002 loss=2.705, ppl=6.52, wps=5719, ups=0.09, wpb=64776, bsz=128, num_updates=8538, lr=9.99397e-05, gnorm=2.404, loss_scale=4, train_wall=11, gb_free=2.8, wall=97540 2021-06-19 21:44:37 | INFO | train_inner | epoch 003: 2588 / 3002 loss=2.652, ppl=6.29, wps=5814.3, ups=0.09, wpb=64886, bsz=128, num_updates=8539, lr=9.99397e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=97552 2021-06-19 21:44:48 | INFO | train_inner | epoch 003: 2589 / 3002 loss=2.72, ppl=6.59, wps=5918, ups=0.09, wpb=64744, bsz=128, num_updates=8540, lr=9.99397e-05, gnorm=2.726, loss_scale=4, train_wall=10, gb_free=2.8, wall=97562 2021-06-19 21:44:59 | INFO | train_inner | epoch 003: 2590 / 3002 loss=2.456, ppl=5.49, wps=5849, ups=0.09, wpb=64797, bsz=128, num_updates=8541, lr=9.99397e-05, gnorm=2.56, loss_scale=4, train_wall=11, gb_free=2.8, wall=97574 2021-06-19 21:45:10 | INFO | train_inner | epoch 003: 2591 / 3002 loss=2.599, ppl=6.06, wps=5773.3, ups=0.09, wpb=64786, bsz=128, num_updates=8542, lr=9.99397e-05, gnorm=4.11, loss_scale=4, train_wall=11, gb_free=2.8, wall=97585 2021-06-19 21:45:22 | INFO | train_inner | epoch 003: 2592 / 3002 loss=2.642, ppl=6.24, wps=5750.3, ups=0.09, wpb=64898, bsz=128, num_updates=8543, lr=9.99397e-05, gnorm=2.209, loss_scale=4, train_wall=11, gb_free=2.8, wall=97596 2021-06-19 21:45:33 | INFO | train_inner | epoch 003: 2593 / 3002 loss=2.67, ppl=6.37, wps=5849.5, ups=0.09, wpb=64893, bsz=128, num_updates=8544, lr=9.99396e-05, gnorm=2.34, loss_scale=4, train_wall=11, gb_free=2.8, wall=97607 2021-06-19 21:45:44 | INFO | train_inner | epoch 003: 2594 / 3002 loss=2.511, ppl=5.7, wps=5840.7, ups=0.09, wpb=64852, bsz=128, num_updates=8545, lr=9.99396e-05, gnorm=2.256, loss_scale=4, train_wall=11, gb_free=2.8, wall=97618 2021-06-19 21:45:55 | INFO | train_inner | epoch 003: 2595 / 3002 loss=2.546, ppl=5.84, wps=5921.6, ups=0.09, wpb=64842, bsz=128, num_updates=8546, lr=9.99396e-05, gnorm=2.489, loss_scale=4, train_wall=11, gb_free=2.8, wall=97629 2021-06-19 21:46:06 | INFO | train_inner | epoch 003: 2596 / 3002 loss=2.615, ppl=6.13, wps=5836, ups=0.09, wpb=64852, bsz=128, num_updates=8547, lr=9.99396e-05, gnorm=2.449, loss_scale=4, train_wall=11, gb_free=2.8, wall=97640 2021-06-19 21:46:17 | INFO | train_inner | epoch 003: 2597 / 3002 loss=2.694, ppl=6.47, wps=5820.4, ups=0.09, wpb=64875, bsz=128, num_updates=8548, lr=9.99396e-05, gnorm=2.678, loss_scale=4, train_wall=11, gb_free=2.8, wall=97651 2021-06-19 21:46:28 | INFO | train_inner | epoch 003: 2598 / 3002 loss=2.583, ppl=5.99, wps=5852.4, ups=0.09, wpb=64783, bsz=128, num_updates=8549, lr=9.99396e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=97663 2021-06-19 21:46:39 | INFO | train_inner | epoch 003: 2599 / 3002 loss=2.591, ppl=6.02, wps=5859.8, ups=0.09, wpb=64883, bsz=128, num_updates=8550, lr=9.99396e-05, gnorm=3.609, loss_scale=4, train_wall=11, gb_free=2.8, wall=97674 2021-06-19 21:46:50 | INFO | train_inner | epoch 003: 2600 / 3002 loss=2.71, ppl=6.55, wps=5909.7, ups=0.09, wpb=64848, bsz=128, num_updates=8551, lr=9.99396e-05, gnorm=9.772, loss_scale=4, train_wall=10, gb_free=2.8, wall=97685 2021-06-19 21:47:01 | INFO | train_inner | epoch 003: 2601 / 3002 loss=2.647, ppl=6.26, wps=5770.8, ups=0.09, wpb=64846, bsz=128, num_updates=8552, lr=9.99396e-05, gnorm=2.8, loss_scale=4, train_wall=11, gb_free=2.8, wall=97696 2021-06-19 21:47:13 | INFO | train_inner | epoch 003: 2602 / 3002 loss=2.601, ppl=6.07, wps=5859.7, ups=0.09, wpb=64875, bsz=128, num_updates=8553, lr=9.99396e-05, gnorm=3.288, loss_scale=4, train_wall=11, gb_free=2.8, wall=97707 2021-06-19 21:47:24 | INFO | train_inner | epoch 003: 2603 / 3002 loss=2.809, ppl=7.01, wps=5829.6, ups=0.09, wpb=64835, bsz=128, num_updates=8554, lr=9.99396e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=97718 2021-06-19 21:47:35 | INFO | train_inner | epoch 003: 2604 / 3002 loss=2.505, ppl=5.68, wps=5908.6, ups=0.09, wpb=64769, bsz=128, num_updates=8555, lr=9.99396e-05, gnorm=22.027, loss_scale=4, train_wall=10, gb_free=2.8, wall=97729 2021-06-19 21:47:46 | INFO | train_inner | epoch 003: 2605 / 3002 loss=2.603, ppl=6.07, wps=5891, ups=0.09, wpb=64804, bsz=128, num_updates=8556, lr=9.99395e-05, gnorm=2.206, loss_scale=4, train_wall=11, gb_free=2.8, wall=97740 2021-06-19 21:47:57 | INFO | train_inner | epoch 003: 2606 / 3002 loss=2.626, ppl=6.17, wps=5744.1, ups=0.09, wpb=64783, bsz=128, num_updates=8557, lr=9.99395e-05, gnorm=2.894, loss_scale=4, train_wall=11, gb_free=2.8, wall=97751 2021-06-19 21:48:08 | INFO | train_inner | epoch 003: 2607 / 3002 loss=2.758, ppl=6.76, wps=5893.8, ups=0.09, wpb=64788, bsz=128, num_updates=8558, lr=9.99395e-05, gnorm=3.008, loss_scale=4, train_wall=11, gb_free=2.8, wall=97762 2021-06-19 21:48:19 | INFO | train_inner | epoch 003: 2608 / 3002 loss=2.752, ppl=6.74, wps=5774.4, ups=0.09, wpb=64844, bsz=128, num_updates=8559, lr=9.99395e-05, gnorm=6.844, loss_scale=4, train_wall=11, gb_free=2.8, wall=97773 2021-06-19 21:48:30 | INFO | train_inner | epoch 003: 2609 / 3002 loss=2.799, ppl=6.96, wps=5976.2, ups=0.09, wpb=64824, bsz=128, num_updates=8560, lr=9.99395e-05, gnorm=5.527, loss_scale=4, train_wall=10, gb_free=2.8, wall=97784 2021-06-19 21:48:41 | INFO | train_inner | epoch 003: 2610 / 3002 loss=2.588, ppl=6.01, wps=5908.3, ups=0.09, wpb=64895, bsz=128, num_updates=8561, lr=9.99395e-05, gnorm=2.4, loss_scale=4, train_wall=10, gb_free=2.8, wall=97795 2021-06-19 21:48:52 | INFO | train_inner | epoch 003: 2611 / 3002 loss=2.707, ppl=6.53, wps=5844.9, ups=0.09, wpb=64836, bsz=128, num_updates=8562, lr=9.99395e-05, gnorm=2.437, loss_scale=4, train_wall=11, gb_free=2.8, wall=97806 2021-06-19 21:49:03 | INFO | train_inner | epoch 003: 2612 / 3002 loss=2.601, ppl=6.07, wps=5828, ups=0.09, wpb=64843, bsz=128, num_updates=8563, lr=9.99395e-05, gnorm=2.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=97817 2021-06-19 21:49:14 | INFO | train_inner | epoch 003: 2613 / 3002 loss=2.719, ppl=6.58, wps=5858.2, ups=0.09, wpb=64830, bsz=128, num_updates=8564, lr=9.99395e-05, gnorm=2.439, loss_scale=4, train_wall=11, gb_free=2.8, wall=97829 2021-06-19 21:49:25 | INFO | train_inner | epoch 003: 2614 / 3002 loss=2.71, ppl=6.54, wps=5820, ups=0.09, wpb=64729, bsz=128, num_updates=8565, lr=9.99395e-05, gnorm=2.288, loss_scale=4, train_wall=11, gb_free=2.8, wall=97840 2021-06-19 21:49:37 | INFO | train_inner | epoch 003: 2615 / 3002 loss=2.659, ppl=6.31, wps=5756.8, ups=0.09, wpb=64781, bsz=128, num_updates=8566, lr=9.99395e-05, gnorm=2.738, loss_scale=4, train_wall=11, gb_free=2.8, wall=97851 2021-06-19 21:49:48 | INFO | train_inner | epoch 003: 2616 / 3002 loss=2.832, ppl=7.12, wps=5932.5, ups=0.09, wpb=64853, bsz=128, num_updates=8567, lr=9.99395e-05, gnorm=2.305, loss_scale=4, train_wall=10, gb_free=2.8, wall=97862 2021-06-19 21:49:58 | INFO | train_inner | epoch 003: 2617 / 3002 loss=2.504, ppl=5.67, wps=5961.1, ups=0.09, wpb=64834, bsz=128, num_updates=8568, lr=9.99395e-05, gnorm=2.171, loss_scale=4, train_wall=10, gb_free=2.8, wall=97873 2021-06-19 21:50:09 | INFO | train_inner | epoch 003: 2618 / 3002 loss=2.646, ppl=6.26, wps=5940.7, ups=0.09, wpb=64813, bsz=128, num_updates=8569, lr=9.99394e-05, gnorm=2.242, loss_scale=4, train_wall=10, gb_free=2.8, wall=97884 2021-06-19 21:50:20 | INFO | train_inner | epoch 003: 2619 / 3002 loss=2.854, ppl=7.23, wps=5819.4, ups=0.09, wpb=64745, bsz=128, num_updates=8570, lr=9.99394e-05, gnorm=2.423, loss_scale=4, train_wall=11, gb_free=2.8, wall=97895 2021-06-19 21:50:32 | INFO | train_inner | epoch 003: 2620 / 3002 loss=2.641, ppl=6.24, wps=5834, ups=0.09, wpb=64834, bsz=128, num_updates=8571, lr=9.99394e-05, gnorm=2.173, loss_scale=4, train_wall=11, gb_free=2.8, wall=97906 2021-06-19 21:50:43 | INFO | train_inner | epoch 003: 2621 / 3002 loss=2.677, ppl=6.39, wps=5840.3, ups=0.09, wpb=64838, bsz=128, num_updates=8572, lr=9.99394e-05, gnorm=2.602, loss_scale=4, train_wall=11, gb_free=2.8, wall=97917 2021-06-19 21:50:54 | INFO | train_inner | epoch 003: 2622 / 3002 loss=2.78, ppl=6.87, wps=5762.8, ups=0.09, wpb=64779, bsz=128, num_updates=8573, lr=9.99394e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=97928 2021-06-19 21:51:05 | INFO | train_inner | epoch 003: 2623 / 3002 loss=2.759, ppl=6.77, wps=5853.5, ups=0.09, wpb=64842, bsz=128, num_updates=8574, lr=9.99394e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=97939 2021-06-19 21:51:16 | INFO | train_inner | epoch 003: 2624 / 3002 loss=2.686, ppl=6.43, wps=5790.3, ups=0.09, wpb=64811, bsz=128, num_updates=8575, lr=9.99394e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=97951 2021-06-19 21:51:27 | INFO | train_inner | epoch 003: 2625 / 3002 loss=2.545, ppl=5.84, wps=5725.3, ups=0.09, wpb=64767, bsz=128, num_updates=8576, lr=9.99394e-05, gnorm=2.112, loss_scale=4, train_wall=11, gb_free=2.8, wall=97962 2021-06-19 21:51:38 | INFO | train_inner | epoch 003: 2626 / 3002 loss=2.481, ppl=5.58, wps=5937.7, ups=0.09, wpb=64931, bsz=128, num_updates=8577, lr=9.99394e-05, gnorm=2.149, loss_scale=4, train_wall=10, gb_free=2.8, wall=97973 2021-06-19 21:51:50 | INFO | train_inner | epoch 003: 2627 / 3002 loss=2.685, ppl=6.43, wps=5806.7, ups=0.09, wpb=64900, bsz=128, num_updates=8578, lr=9.99394e-05, gnorm=2.345, loss_scale=4, train_wall=11, gb_free=2.8, wall=97984 2021-06-19 21:52:01 | INFO | train_inner | epoch 003: 2628 / 3002 loss=2.613, ppl=6.12, wps=5864.1, ups=0.09, wpb=64829, bsz=128, num_updates=8579, lr=9.99394e-05, gnorm=3.171, loss_scale=4, train_wall=11, gb_free=2.8, wall=97995 2021-06-19 21:52:12 | INFO | train_inner | epoch 003: 2629 / 3002 loss=2.581, ppl=5.98, wps=5807.6, ups=0.09, wpb=64846, bsz=128, num_updates=8580, lr=9.99394e-05, gnorm=2.36, loss_scale=4, train_wall=11, gb_free=2.8, wall=98006 2021-06-19 21:52:23 | INFO | train_inner | epoch 003: 2630 / 3002 loss=2.626, ppl=6.17, wps=5836.2, ups=0.09, wpb=64923, bsz=128, num_updates=8581, lr=9.99393e-05, gnorm=2.406, loss_scale=4, train_wall=11, gb_free=2.8, wall=98017 2021-06-19 21:52:34 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-19 21:52:45 | INFO | train_inner | epoch 003: 2632 / 3002 loss=2.723, ppl=6.6, wps=2910.2, ups=0.04, wpb=64837, bsz=128, num_updates=8582, lr=9.99393e-05, gnorm=2.172, loss_scale=2, train_wall=21, gb_free=2.8, wall=98040 2021-06-19 21:52:56 | INFO | train_inner | epoch 003: 2633 / 3002 loss=2.65, ppl=6.28, wps=5768.6, ups=0.09, wpb=64858, bsz=128, num_updates=8583, lr=9.99393e-05, gnorm=2.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=98051 2021-06-19 21:53:08 | INFO | train_inner | epoch 003: 2634 / 3002 loss=2.747, ppl=6.71, wps=5866.8, ups=0.09, wpb=64765, bsz=128, num_updates=8584, lr=9.99393e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=98062 2021-06-19 21:53:19 | INFO | train_inner | epoch 003: 2635 / 3002 loss=2.689, ppl=6.45, wps=5795, ups=0.09, wpb=64818, bsz=128, num_updates=8585, lr=9.99393e-05, gnorm=2.158, loss_scale=2, train_wall=11, gb_free=2.8, wall=98073 2021-06-19 21:53:30 | INFO | train_inner | epoch 003: 2636 / 3002 loss=2.513, ppl=5.71, wps=5821, ups=0.09, wpb=64840, bsz=128, num_updates=8586, lr=9.99393e-05, gnorm=8.682, loss_scale=2, train_wall=11, gb_free=2.8, wall=98084 2021-06-19 21:53:41 | INFO | train_inner | epoch 003: 2637 / 3002 loss=2.648, ppl=6.27, wps=5800.5, ups=0.09, wpb=64797, bsz=128, num_updates=8587, lr=9.99393e-05, gnorm=2.287, loss_scale=2, train_wall=11, gb_free=2.8, wall=98095 2021-06-19 21:53:52 | INFO | train_inner | epoch 003: 2638 / 3002 loss=2.638, ppl=6.23, wps=5810.5, ups=0.09, wpb=64765, bsz=128, num_updates=8588, lr=9.99393e-05, gnorm=4.189, loss_scale=2, train_wall=11, gb_free=2.8, wall=98106 2021-06-19 21:54:03 | INFO | train_inner | epoch 003: 2639 / 3002 loss=2.53, ppl=5.78, wps=5897.4, ups=0.09, wpb=64856, bsz=128, num_updates=8589, lr=9.99393e-05, gnorm=2.203, loss_scale=2, train_wall=11, gb_free=2.8, wall=98117 2021-06-19 21:54:14 | INFO | train_inner | epoch 003: 2640 / 3002 loss=2.495, ppl=5.64, wps=5872.4, ups=0.09, wpb=64866, bsz=128, num_updates=8590, lr=9.99393e-05, gnorm=2.144, loss_scale=2, train_wall=11, gb_free=2.8, wall=98129 2021-06-19 21:54:25 | INFO | train_inner | epoch 003: 2641 / 3002 loss=2.484, ppl=5.59, wps=5906.4, ups=0.09, wpb=64908, bsz=128, num_updates=8591, lr=9.99393e-05, gnorm=2.491, loss_scale=2, train_wall=11, gb_free=2.8, wall=98140 2021-06-19 21:54:36 | INFO | train_inner | epoch 003: 2642 / 3002 loss=2.5, ppl=5.66, wps=5839.2, ups=0.09, wpb=64902, bsz=128, num_updates=8592, lr=9.99393e-05, gnorm=2.098, loss_scale=2, train_wall=11, gb_free=2.8, wall=98151 2021-06-19 21:54:47 | INFO | train_inner | epoch 003: 2643 / 3002 loss=2.551, ppl=5.86, wps=5867.8, ups=0.09, wpb=64795, bsz=128, num_updates=8593, lr=9.99393e-05, gnorm=13.362, loss_scale=2, train_wall=11, gb_free=2.8, wall=98162 2021-06-19 21:54:58 | INFO | train_inner | epoch 003: 2644 / 3002 loss=2.673, ppl=6.38, wps=5857.3, ups=0.09, wpb=64805, bsz=128, num_updates=8594, lr=9.99392e-05, gnorm=2.07, loss_scale=2, train_wall=11, gb_free=2.8, wall=98173 2021-06-19 21:55:09 | INFO | train_inner | epoch 003: 2645 / 3002 loss=2.627, ppl=6.18, wps=5873.3, ups=0.09, wpb=64818, bsz=128, num_updates=8595, lr=9.99392e-05, gnorm=2.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=98184 2021-06-19 21:55:20 | INFO | train_inner | epoch 003: 2646 / 3002 loss=2.533, ppl=5.79, wps=5916.6, ups=0.09, wpb=64818, bsz=128, num_updates=8596, lr=9.99392e-05, gnorm=2.13, loss_scale=2, train_wall=11, gb_free=2.8, wall=98195 2021-06-19 21:55:32 | INFO | train_inner | epoch 003: 2647 / 3002 loss=2.695, ppl=6.47, wps=5758.5, ups=0.09, wpb=64807, bsz=128, num_updates=8597, lr=9.99392e-05, gnorm=2.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=98206 2021-06-19 21:55:43 | INFO | train_inner | epoch 003: 2648 / 3002 loss=2.681, ppl=6.41, wps=5834, ups=0.09, wpb=64822, bsz=128, num_updates=8598, lr=9.99392e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=98217 2021-06-19 21:55:54 | INFO | train_inner | epoch 003: 2649 / 3002 loss=2.632, ppl=6.2, wps=5939.9, ups=0.09, wpb=64860, bsz=128, num_updates=8599, lr=9.99392e-05, gnorm=2.804, loss_scale=2, train_wall=10, gb_free=2.8, wall=98228 2021-06-19 21:56:05 | INFO | train_inner | epoch 003: 2650 / 3002 loss=2.583, ppl=5.99, wps=5826.1, ups=0.09, wpb=64900, bsz=128, num_updates=8600, lr=9.99392e-05, gnorm=2.664, loss_scale=2, train_wall=11, gb_free=2.8, wall=98239 2021-06-19 21:56:16 | INFO | train_inner | epoch 003: 2651 / 3002 loss=2.768, ppl=6.81, wps=5911.3, ups=0.09, wpb=64785, bsz=128, num_updates=8601, lr=9.99392e-05, gnorm=2.339, loss_scale=2, train_wall=10, gb_free=2.8, wall=98250 2021-06-19 21:56:27 | INFO | train_inner | epoch 003: 2652 / 3002 loss=2.651, ppl=6.28, wps=5879.4, ups=0.09, wpb=64781, bsz=128, num_updates=8602, lr=9.99392e-05, gnorm=19.144, loss_scale=2, train_wall=11, gb_free=2.8, wall=98261 2021-06-19 21:56:38 | INFO | train_inner | epoch 003: 2653 / 3002 loss=2.507, ppl=5.69, wps=5809.4, ups=0.09, wpb=64867, bsz=128, num_updates=8603, lr=9.99392e-05, gnorm=2.375, loss_scale=2, train_wall=11, gb_free=2.8, wall=98272 2021-06-19 21:56:49 | INFO | train_inner | epoch 003: 2654 / 3002 loss=2.488, ppl=5.61, wps=5745.4, ups=0.09, wpb=64858, bsz=128, num_updates=8604, lr=9.99392e-05, gnorm=2.108, loss_scale=2, train_wall=11, gb_free=2.8, wall=98284 2021-06-19 21:57:00 | INFO | train_inner | epoch 003: 2655 / 3002 loss=2.729, ppl=6.63, wps=5778.7, ups=0.09, wpb=64825, bsz=128, num_updates=8605, lr=9.99392e-05, gnorm=2.242, loss_scale=2, train_wall=11, gb_free=2.8, wall=98295 2021-06-19 21:57:11 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 2021-06-19 21:57:23 | INFO | train_inner | epoch 003: 2657 / 3002 loss=2.566, ppl=5.92, wps=2932.5, ups=0.05, wpb=64855, bsz=128, num_updates=8606, lr=9.99391e-05, gnorm=2.21, loss_scale=1, train_wall=21, gb_free=2.8, wall=98317 2021-06-19 21:57:34 | INFO | train_inner | epoch 003: 2658 / 3002 loss=2.633, ppl=6.2, wps=5851.6, ups=0.09, wpb=64788, bsz=128, num_updates=8607, lr=9.99391e-05, gnorm=4.302, loss_scale=1, train_wall=11, gb_free=2.8, wall=98328 2021-06-19 21:57:45 | INFO | train_inner | epoch 003: 2659 / 3002 loss=2.651, ppl=6.28, wps=5888.9, ups=0.09, wpb=64828, bsz=128, num_updates=8608, lr=9.99391e-05, gnorm=2.142, loss_scale=1, train_wall=11, gb_free=2.8, wall=98339 2021-06-19 21:57:56 | INFO | train_inner | epoch 003: 2660 / 3002 loss=2.559, ppl=5.89, wps=5823.3, ups=0.09, wpb=64843, bsz=128, num_updates=8609, lr=9.99391e-05, gnorm=2.843, loss_scale=1, train_wall=11, gb_free=2.8, wall=98350 2021-06-19 21:58:07 | INFO | train_inner | epoch 003: 2661 / 3002 loss=2.543, ppl=5.83, wps=5770.5, ups=0.09, wpb=64825, bsz=128, num_updates=8610, lr=9.99391e-05, gnorm=2.207, loss_scale=1, train_wall=11, gb_free=2.8, wall=98361 2021-06-19 21:58:18 | INFO | train_inner | epoch 003: 2662 / 3002 loss=2.707, ppl=6.53, wps=5900.6, ups=0.09, wpb=64774, bsz=128, num_updates=8611, lr=9.99391e-05, gnorm=4.007, loss_scale=1, train_wall=11, gb_free=2.8, wall=98372 2021-06-19 21:58:29 | INFO | train_inner | epoch 003: 2663 / 3002 loss=2.731, ppl=6.64, wps=5741.8, ups=0.09, wpb=64782, bsz=128, num_updates=8612, lr=9.99391e-05, gnorm=2.052, loss_scale=1, train_wall=11, gb_free=2.8, wall=98384 2021-06-19 21:58:40 | INFO | train_inner | epoch 003: 2664 / 3002 loss=2.633, ppl=6.2, wps=5809.1, ups=0.09, wpb=64818, bsz=128, num_updates=8613, lr=9.99391e-05, gnorm=2.193, loss_scale=1, train_wall=11, gb_free=2.8, wall=98395 2021-06-19 21:58:52 | INFO | train_inner | epoch 003: 2665 / 3002 loss=2.69, ppl=6.45, wps=5855, ups=0.09, wpb=64866, bsz=128, num_updates=8614, lr=9.99391e-05, gnorm=2.13, loss_scale=1, train_wall=11, gb_free=2.8, wall=98406 2021-06-19 21:59:03 | INFO | train_inner | epoch 003: 2666 / 3002 loss=2.701, ppl=6.5, wps=5751.5, ups=0.09, wpb=64851, bsz=128, num_updates=8615, lr=9.99391e-05, gnorm=2.65, loss_scale=1, train_wall=11, gb_free=2.8, wall=98417 2021-06-19 21:59:14 | INFO | train_inner | epoch 003: 2667 / 3002 loss=2.569, ppl=5.94, wps=5970.9, ups=0.09, wpb=64822, bsz=128, num_updates=8616, lr=9.99391e-05, gnorm=2.297, loss_scale=1, train_wall=10, gb_free=2.8, wall=98428 2021-06-19 21:59:25 | INFO | train_inner | epoch 003: 2668 / 3002 loss=2.621, ppl=6.15, wps=5890.8, ups=0.09, wpb=64787, bsz=128, num_updates=8617, lr=9.99391e-05, gnorm=2.101, loss_scale=1, train_wall=11, gb_free=2.8, wall=98439 2021-06-19 21:59:36 | INFO | train_inner | epoch 003: 2669 / 3002 loss=2.67, ppl=6.37, wps=5769.3, ups=0.09, wpb=64781, bsz=128, num_updates=8618, lr=9.99391e-05, gnorm=2.223, loss_scale=1, train_wall=11, gb_free=2.8, wall=98450 2021-06-19 21:59:47 | INFO | train_inner | epoch 003: 2670 / 3002 loss=2.561, ppl=5.9, wps=5831.5, ups=0.09, wpb=64869, bsz=128, num_updates=8619, lr=9.9939e-05, gnorm=2.129, loss_scale=1, train_wall=11, gb_free=2.8, wall=98461 2021-06-19 21:59:58 | INFO | train_inner | epoch 003: 2671 / 3002 loss=2.738, ppl=6.67, wps=5816.6, ups=0.09, wpb=64763, bsz=128, num_updates=8620, lr=9.9939e-05, gnorm=2.51, loss_scale=1, train_wall=11, gb_free=2.8, wall=98472 2021-06-19 22:00:09 | INFO | train_inner | epoch 003: 2672 / 3002 loss=2.522, ppl=5.74, wps=5797.3, ups=0.09, wpb=64895, bsz=128, num_updates=8621, lr=9.9939e-05, gnorm=2.066, loss_scale=1, train_wall=11, gb_free=2.8, wall=98484 2021-06-19 22:00:21 | INFO | train_inner | epoch 003: 2673 / 3002 loss=2.568, ppl=5.93, wps=5793, ups=0.09, wpb=64859, bsz=128, num_updates=8622, lr=9.9939e-05, gnorm=2.296, loss_scale=1, train_wall=11, gb_free=2.8, wall=98495 2021-06-19 22:00:32 | INFO | train_inner | epoch 003: 2674 / 3002 loss=2.614, ppl=6.12, wps=5910.7, ups=0.09, wpb=64883, bsz=128, num_updates=8623, lr=9.9939e-05, gnorm=2.761, loss_scale=1, train_wall=11, gb_free=2.8, wall=98506 2021-06-19 22:00:43 | INFO | train_inner | epoch 003: 2675 / 3002 loss=2.631, ppl=6.19, wps=5888, ups=0.09, wpb=64822, bsz=128, num_updates=8624, lr=9.9939e-05, gnorm=2.118, loss_scale=1, train_wall=11, gb_free=2.8, wall=98517 2021-06-19 22:00:54 | INFO | train_inner | epoch 003: 2676 / 3002 loss=2.549, ppl=5.85, wps=5778, ups=0.09, wpb=64838, bsz=128, num_updates=8625, lr=9.9939e-05, gnorm=2.612, loss_scale=1, train_wall=11, gb_free=2.8, wall=98528 2021-06-19 22:01:05 | INFO | train_inner | epoch 003: 2677 / 3002 loss=2.634, ppl=6.21, wps=5865.6, ups=0.09, wpb=64862, bsz=128, num_updates=8626, lr=9.9939e-05, gnorm=2.129, loss_scale=1, train_wall=11, gb_free=2.8, wall=98539 2021-06-19 22:01:16 | INFO | train_inner | epoch 003: 2678 / 3002 loss=2.646, ppl=6.26, wps=5743.8, ups=0.09, wpb=64786, bsz=128, num_updates=8627, lr=9.9939e-05, gnorm=2.243, loss_scale=1, train_wall=11, gb_free=2.8, wall=98550 2021-06-19 22:01:27 | INFO | train_inner | epoch 003: 2679 / 3002 loss=2.627, ppl=6.18, wps=5860.7, ups=0.09, wpb=64811, bsz=128, num_updates=8628, lr=9.9939e-05, gnorm=2.362, loss_scale=1, train_wall=11, gb_free=2.8, wall=98561 2021-06-19 22:01:38 | INFO | train_inner | epoch 003: 2680 / 3002 loss=2.587, ppl=6.01, wps=5806.4, ups=0.09, wpb=64789, bsz=128, num_updates=8629, lr=9.9939e-05, gnorm=3.719, loss_scale=1, train_wall=11, gb_free=2.8, wall=98573 2021-06-19 22:01:50 | INFO | train_inner | epoch 003: 2681 / 3002 loss=2.605, ppl=6.08, wps=5794.4, ups=0.09, wpb=64888, bsz=128, num_updates=8630, lr=9.9939e-05, gnorm=2.098, loss_scale=1, train_wall=11, gb_free=2.8, wall=98584 2021-06-19 22:02:01 | INFO | train_inner | epoch 003: 2682 / 3002 loss=2.661, ppl=6.32, wps=5884.3, ups=0.09, wpb=64834, bsz=128, num_updates=8631, lr=9.99389e-05, gnorm=2.593, loss_scale=1, train_wall=11, gb_free=2.8, wall=98595 2021-06-19 22:02:12 | INFO | train_inner | epoch 003: 2683 / 3002 loss=2.626, ppl=6.17, wps=5884.4, ups=0.09, wpb=64868, bsz=128, num_updates=8632, lr=9.99389e-05, gnorm=3.182, loss_scale=1, train_wall=11, gb_free=2.8, wall=98606 2021-06-19 22:02:23 | INFO | train_inner | epoch 003: 2684 / 3002 loss=2.582, ppl=5.99, wps=5666.5, ups=0.09, wpb=64799, bsz=128, num_updates=8633, lr=9.99389e-05, gnorm=2.084, loss_scale=1, train_wall=11, gb_free=2.8, wall=98617 2021-06-19 22:02:34 | INFO | train_inner | epoch 003: 2685 / 3002 loss=2.507, ppl=5.69, wps=5817.6, ups=0.09, wpb=64793, bsz=128, num_updates=8634, lr=9.99389e-05, gnorm=3.09, loss_scale=1, train_wall=11, gb_free=2.8, wall=98628 2021-06-19 22:02:45 | INFO | train_inner | epoch 003: 2686 / 3002 loss=2.625, ppl=6.17, wps=5837.3, ups=0.09, wpb=64831, bsz=128, num_updates=8635, lr=9.99389e-05, gnorm=2.128, loss_scale=1, train_wall=11, gb_free=2.8, wall=98640 2021-06-19 22:02:56 | INFO | train_inner | epoch 003: 2687 / 3002 loss=2.692, ppl=6.46, wps=5799.5, ups=0.09, wpb=64821, bsz=128, num_updates=8636, lr=9.99389e-05, gnorm=2.2, loss_scale=1, train_wall=11, gb_free=2.8, wall=98651 2021-06-19 22:03:07 | INFO | train_inner | epoch 003: 2688 / 3002 loss=2.564, ppl=5.91, wps=5867.1, ups=0.09, wpb=64894, bsz=128, num_updates=8637, lr=9.99389e-05, gnorm=2.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=98662 2021-06-19 22:03:19 | INFO | train_inner | epoch 003: 2689 / 3002 loss=2.675, ppl=6.39, wps=5835.6, ups=0.09, wpb=64824, bsz=128, num_updates=8638, lr=9.99389e-05, gnorm=2.12, loss_scale=1, train_wall=11, gb_free=2.8, wall=98673 2021-06-19 22:03:30 | INFO | train_inner | epoch 003: 2690 / 3002 loss=2.496, ppl=5.64, wps=5877.5, ups=0.09, wpb=64821, bsz=128, num_updates=8639, lr=9.99389e-05, gnorm=2.19, loss_scale=1, train_wall=11, gb_free=2.8, wall=98684 2021-06-19 22:03:41 | INFO | train_inner | epoch 003: 2691 / 3002 loss=2.609, ppl=6.1, wps=5909.9, ups=0.09, wpb=64878, bsz=128, num_updates=8640, lr=9.99389e-05, gnorm=2.845, loss_scale=1, train_wall=11, gb_free=2.8, wall=98695 2021-06-19 22:03:52 | INFO | train_inner | epoch 003: 2692 / 3002 loss=2.493, ppl=5.63, wps=5913.2, ups=0.09, wpb=64869, bsz=128, num_updates=8641, lr=9.99389e-05, gnorm=2.109, loss_scale=1, train_wall=11, gb_free=2.8, wall=98706 2021-06-19 22:04:03 | INFO | train_inner | epoch 003: 2693 / 3002 loss=2.488, ppl=5.61, wps=5836.9, ups=0.09, wpb=64800, bsz=128, num_updates=8642, lr=9.99389e-05, gnorm=2.05, loss_scale=1, train_wall=11, gb_free=2.8, wall=98717 2021-06-19 22:04:14 | INFO | train_inner | epoch 003: 2694 / 3002 loss=2.641, ppl=6.24, wps=5900.9, ups=0.09, wpb=64863, bsz=128, num_updates=8643, lr=9.99389e-05, gnorm=2.183, loss_scale=1, train_wall=11, gb_free=2.8, wall=98728 2021-06-19 22:04:25 | INFO | train_inner | epoch 003: 2695 / 3002 loss=2.571, ppl=5.94, wps=5879.7, ups=0.09, wpb=64832, bsz=128, num_updates=8644, lr=9.99388e-05, gnorm=2.35, loss_scale=1, train_wall=11, gb_free=2.8, wall=98739 2021-06-19 22:04:36 | INFO | train_inner | epoch 003: 2696 / 3002 loss=2.508, ppl=5.69, wps=5795, ups=0.09, wpb=64780, bsz=128, num_updates=8645, lr=9.99388e-05, gnorm=2.067, loss_scale=1, train_wall=11, gb_free=2.8, wall=98750 2021-06-19 22:04:47 | INFO | train_inner | epoch 003: 2697 / 3002 loss=2.552, ppl=5.86, wps=5830.8, ups=0.09, wpb=64836, bsz=128, num_updates=8646, lr=9.99388e-05, gnorm=2.168, loss_scale=1, train_wall=11, gb_free=2.8, wall=98761 2021-06-19 22:04:58 | INFO | train_inner | epoch 003: 2698 / 3002 loss=2.703, ppl=6.51, wps=5866.8, ups=0.09, wpb=64904, bsz=128, num_updates=8647, lr=9.99388e-05, gnorm=4.152, loss_scale=1, train_wall=11, gb_free=2.8, wall=98772 2021-06-19 22:05:09 | INFO | train_inner | epoch 003: 2699 / 3002 loss=2.667, ppl=6.35, wps=5908.1, ups=0.09, wpb=64799, bsz=128, num_updates=8648, lr=9.99388e-05, gnorm=2.304, loss_scale=1, train_wall=11, gb_free=2.8, wall=98783 2021-06-19 22:05:20 | INFO | train_inner | epoch 003: 2700 / 3002 loss=2.605, ppl=6.08, wps=5890.5, ups=0.09, wpb=64892, bsz=128, num_updates=8649, lr=9.99388e-05, gnorm=2.1, loss_scale=1, train_wall=11, gb_free=2.8, wall=98794 2021-06-19 22:05:31 | INFO | train_inner | epoch 003: 2701 / 3002 loss=2.583, ppl=5.99, wps=5760, ups=0.09, wpb=64829, bsz=128, num_updates=8650, lr=9.99388e-05, gnorm=2.34, loss_scale=1, train_wall=11, gb_free=2.8, wall=98806 2021-06-19 22:05:42 | INFO | train_inner | epoch 003: 2702 / 3002 loss=2.561, ppl=5.9, wps=5839.4, ups=0.09, wpb=64878, bsz=128, num_updates=8651, lr=9.99388e-05, gnorm=2.12, loss_scale=1, train_wall=11, gb_free=2.8, wall=98817 2021-06-19 22:05:53 | INFO | train_inner | epoch 003: 2703 / 3002 loss=2.622, ppl=6.16, wps=5864.3, ups=0.09, wpb=64804, bsz=128, num_updates=8652, lr=9.99388e-05, gnorm=2.506, loss_scale=1, train_wall=11, gb_free=2.8, wall=98828 2021-06-19 22:06:04 | INFO | train_inner | epoch 003: 2704 / 3002 loss=2.599, ppl=6.06, wps=5917.4, ups=0.09, wpb=64743, bsz=128, num_updates=8653, lr=9.99388e-05, gnorm=2.295, loss_scale=1, train_wall=10, gb_free=2.8, wall=98839 2021-06-19 22:06:15 | INFO | train_inner | epoch 003: 2705 / 3002 loss=2.685, ppl=6.43, wps=5847, ups=0.09, wpb=64862, bsz=128, num_updates=8654, lr=9.99388e-05, gnorm=2.339, loss_scale=1, train_wall=11, gb_free=2.8, wall=98850 2021-06-19 22:06:27 | INFO | train_inner | epoch 003: 2706 / 3002 loss=2.525, ppl=5.76, wps=5881, ups=0.09, wpb=64834, bsz=128, num_updates=8655, lr=9.99388e-05, gnorm=2.552, loss_scale=1, train_wall=11, gb_free=2.8, wall=98861 2021-06-19 22:06:38 | INFO | train_inner | epoch 003: 2707 / 3002 loss=2.689, ppl=6.45, wps=5772.5, ups=0.09, wpb=64828, bsz=128, num_updates=8656, lr=9.99387e-05, gnorm=2.157, loss_scale=1, train_wall=11, gb_free=2.8, wall=98872 2021-06-19 22:06:49 | INFO | train_inner | epoch 003: 2708 / 3002 loss=2.511, ppl=5.7, wps=5870.7, ups=0.09, wpb=64811, bsz=128, num_updates=8657, lr=9.99387e-05, gnorm=2.087, loss_scale=1, train_wall=11, gb_free=2.8, wall=98883 2021-06-19 22:07:00 | INFO | train_inner | epoch 003: 2709 / 3002 loss=2.639, ppl=6.23, wps=5845.5, ups=0.09, wpb=64826, bsz=128, num_updates=8658, lr=9.99387e-05, gnorm=2.031, loss_scale=1, train_wall=11, gb_free=2.8, wall=98894 2021-06-19 22:07:11 | INFO | train_inner | epoch 003: 2710 / 3002 loss=2.527, ppl=5.77, wps=5971.5, ups=0.09, wpb=64901, bsz=128, num_updates=8659, lr=9.99387e-05, gnorm=12.579, loss_scale=1, train_wall=10, gb_free=2.8, wall=98905 2021-06-19 22:07:22 | INFO | train_inner | epoch 003: 2711 / 3002 loss=2.731, ppl=6.64, wps=5828.1, ups=0.09, wpb=64822, bsz=128, num_updates=8660, lr=9.99387e-05, gnorm=4.434, loss_scale=1, train_wall=11, gb_free=2.8, wall=98916 2021-06-19 22:07:33 | INFO | train_inner | epoch 003: 2712 / 3002 loss=2.706, ppl=6.53, wps=5728.3, ups=0.09, wpb=64844, bsz=128, num_updates=8661, lr=9.99387e-05, gnorm=2.171, loss_scale=1, train_wall=11, gb_free=2.8, wall=98928 2021-06-19 22:07:44 | INFO | train_inner | epoch 003: 2713 / 3002 loss=2.447, ppl=5.45, wps=5886.1, ups=0.09, wpb=64866, bsz=128, num_updates=8662, lr=9.99387e-05, gnorm=2.078, loss_scale=1, train_wall=11, gb_free=2.8, wall=98939 2021-06-19 22:07:55 | INFO | train_inner | epoch 003: 2714 / 3002 loss=2.627, ppl=6.18, wps=5772.9, ups=0.09, wpb=64876, bsz=128, num_updates=8663, lr=9.99387e-05, gnorm=2.298, loss_scale=1, train_wall=11, gb_free=2.8, wall=98950 2021-06-19 22:08:07 | INFO | train_inner | epoch 003: 2715 / 3002 loss=2.664, ppl=6.34, wps=5789.4, ups=0.09, wpb=64847, bsz=128, num_updates=8664, lr=9.99387e-05, gnorm=3.032, loss_scale=1, train_wall=11, gb_free=2.8, wall=98961 2021-06-19 22:08:18 | INFO | train_inner | epoch 003: 2716 / 3002 loss=2.71, ppl=6.54, wps=5802.1, ups=0.09, wpb=64801, bsz=128, num_updates=8665, lr=9.99387e-05, gnorm=2.276, loss_scale=1, train_wall=11, gb_free=2.8, wall=98972 2021-06-19 22:08:29 | INFO | train_inner | epoch 003: 2717 / 3002 loss=2.614, ppl=6.12, wps=5914.9, ups=0.09, wpb=64819, bsz=128, num_updates=8666, lr=9.99387e-05, gnorm=2.114, loss_scale=1, train_wall=11, gb_free=2.8, wall=98983 2021-06-19 22:08:40 | INFO | train_inner | epoch 003: 2718 / 3002 loss=2.63, ppl=6.19, wps=5821.6, ups=0.09, wpb=64822, bsz=128, num_updates=8667, lr=9.99387e-05, gnorm=2.176, loss_scale=1, train_wall=11, gb_free=2.8, wall=98994 2021-06-19 22:08:51 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 2021-06-19 22:09:02 | INFO | train_inner | epoch 003: 2720 / 3002 loss=2.587, ppl=6.01, wps=2961.2, ups=0.05, wpb=64843, bsz=128, num_updates=8668, lr=9.99387e-05, gnorm=2.18, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=99016 2021-06-19 22:09:13 | INFO | train_inner | epoch 003: 2721 / 3002 loss=2.652, ppl=6.29, wps=5763.9, ups=0.09, wpb=64870, bsz=128, num_updates=8669, lr=9.99386e-05, gnorm=2.517, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99027 2021-06-19 22:09:24 | INFO | train_inner | epoch 003: 2722 / 3002 loss=2.52, ppl=5.74, wps=5977, ups=0.09, wpb=64875, bsz=128, num_updates=8670, lr=9.99386e-05, gnorm=2.111, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99038 2021-06-19 22:09:35 | INFO | train_inner | epoch 003: 2723 / 3002 loss=2.559, ppl=5.89, wps=5913.1, ups=0.09, wpb=64847, bsz=128, num_updates=8671, lr=9.99386e-05, gnorm=2.24, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99049 2021-06-19 22:09:46 | INFO | train_inner | epoch 003: 2724 / 3002 loss=2.692, ppl=6.46, wps=5909.9, ups=0.09, wpb=64849, bsz=128, num_updates=8672, lr=9.99386e-05, gnorm=2.163, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99060 2021-06-19 22:09:57 | INFO | train_inner | epoch 003: 2725 / 3002 loss=2.713, ppl=6.56, wps=5803.3, ups=0.09, wpb=64700, bsz=128, num_updates=8673, lr=9.99386e-05, gnorm=2.258, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99071 2021-06-19 22:10:08 | INFO | train_inner | epoch 003: 2726 / 3002 loss=2.626, ppl=6.17, wps=5877.2, ups=0.09, wpb=64835, bsz=128, num_updates=8674, lr=9.99386e-05, gnorm=2.277, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99082 2021-06-19 22:10:19 | INFO | train_inner | epoch 003: 2727 / 3002 loss=2.61, ppl=6.11, wps=6010.2, ups=0.09, wpb=64883, bsz=128, num_updates=8675, lr=9.99386e-05, gnorm=2.297, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99093 2021-06-19 22:10:30 | INFO | train_inner | epoch 003: 2728 / 3002 loss=2.673, ppl=6.38, wps=5937.5, ups=0.09, wpb=64854, bsz=128, num_updates=8676, lr=9.99386e-05, gnorm=2.182, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99104 2021-06-19 22:10:41 | INFO | train_inner | epoch 003: 2729 / 3002 loss=2.526, ppl=5.76, wps=5843.8, ups=0.09, wpb=64786, bsz=128, num_updates=8677, lr=9.99386e-05, gnorm=2.382, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99115 2021-06-19 22:10:52 | INFO | train_inner | epoch 003: 2730 / 3002 loss=2.721, ppl=6.59, wps=5922.6, ups=0.09, wpb=64802, bsz=128, num_updates=8678, lr=9.99386e-05, gnorm=2.123, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99126 2021-06-19 22:11:03 | INFO | train_inner | epoch 003: 2731 / 3002 loss=2.742, ppl=6.69, wps=5811.9, ups=0.09, wpb=64871, bsz=128, num_updates=8679, lr=9.99386e-05, gnorm=2.199, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99137 2021-06-19 22:11:14 | INFO | train_inner | epoch 003: 2732 / 3002 loss=2.727, ppl=6.62, wps=5796.2, ups=0.09, wpb=64815, bsz=128, num_updates=8680, lr=9.99386e-05, gnorm=2.169, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99148 2021-06-19 22:11:25 | INFO | train_inner | epoch 003: 2733 / 3002 loss=2.652, ppl=6.28, wps=5805.7, ups=0.09, wpb=64795, bsz=128, num_updates=8681, lr=9.99385e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99160 2021-06-19 22:11:36 | INFO | train_inner | epoch 003: 2734 / 3002 loss=2.549, ppl=5.85, wps=5908.2, ups=0.09, wpb=64839, bsz=128, num_updates=8682, lr=9.99385e-05, gnorm=2.186, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99171 2021-06-19 22:11:47 | INFO | train_inner | epoch 003: 2735 / 3002 loss=2.67, ppl=6.36, wps=5928.2, ups=0.09, wpb=64825, bsz=128, num_updates=8683, lr=9.99385e-05, gnorm=2.097, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99182 2021-06-19 22:11:58 | INFO | train_inner | epoch 003: 2736 / 3002 loss=2.585, ppl=6, wps=5894.8, ups=0.09, wpb=64891, bsz=128, num_updates=8684, lr=9.99385e-05, gnorm=2.814, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99193 2021-06-19 22:12:09 | INFO | train_inner | epoch 003: 2737 / 3002 loss=2.705, ppl=6.52, wps=5756.2, ups=0.09, wpb=64745, bsz=128, num_updates=8685, lr=9.99385e-05, gnorm=2.077, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99204 2021-06-19 22:12:21 | INFO | train_inner | epoch 003: 2738 / 3002 loss=2.642, ppl=6.24, wps=5756.1, ups=0.09, wpb=64745, bsz=128, num_updates=8686, lr=9.99385e-05, gnorm=2.244, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99215 2021-06-19 22:12:32 | INFO | train_inner | epoch 003: 2739 / 3002 loss=2.689, ppl=6.45, wps=5850.6, ups=0.09, wpb=64874, bsz=128, num_updates=8687, lr=9.99385e-05, gnorm=2.068, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99226 2021-06-19 22:12:43 | INFO | train_inner | epoch 003: 2740 / 3002 loss=2.646, ppl=6.26, wps=5840.3, ups=0.09, wpb=64773, bsz=128, num_updates=8688, lr=9.99385e-05, gnorm=2.147, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99237 2021-06-19 22:12:54 | INFO | train_inner | epoch 003: 2741 / 3002 loss=2.526, ppl=5.76, wps=5942.7, ups=0.09, wpb=64835, bsz=128, num_updates=8689, lr=9.99385e-05, gnorm=2.083, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99248 2021-06-19 22:13:05 | INFO | train_inner | epoch 003: 2742 / 3002 loss=2.769, ppl=6.82, wps=5830.6, ups=0.09, wpb=64755, bsz=128, num_updates=8690, lr=9.99385e-05, gnorm=2.184, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99259 2021-06-19 22:13:16 | INFO | train_inner | epoch 003: 2743 / 3002 loss=2.502, ppl=5.67, wps=5915.1, ups=0.09, wpb=64831, bsz=128, num_updates=8691, lr=9.99385e-05, gnorm=2.057, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99270 2021-06-19 22:13:27 | INFO | train_inner | epoch 003: 2744 / 3002 loss=2.54, ppl=5.82, wps=5939.4, ups=0.09, wpb=64848, bsz=128, num_updates=8692, lr=9.99385e-05, gnorm=2.184, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99281 2021-06-19 22:13:38 | INFO | train_inner | epoch 003: 2745 / 3002 loss=2.636, ppl=6.21, wps=5801.4, ups=0.09, wpb=64849, bsz=128, num_updates=8693, lr=9.99385e-05, gnorm=2.769, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99292 2021-06-19 22:13:49 | INFO | train_inner | epoch 003: 2746 / 3002 loss=2.628, ppl=6.18, wps=5909.3, ups=0.09, wpb=64920, bsz=128, num_updates=8694, lr=9.99384e-05, gnorm=2.202, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99303 2021-06-19 22:14:00 | INFO | train_inner | epoch 003: 2747 / 3002 loss=2.686, ppl=6.43, wps=5930.2, ups=0.09, wpb=64914, bsz=128, num_updates=8695, lr=9.99384e-05, gnorm=2.126, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99314 2021-06-19 22:14:11 | INFO | train_inner | epoch 003: 2748 / 3002 loss=2.743, ppl=6.7, wps=5814.1, ups=0.09, wpb=64764, bsz=128, num_updates=8696, lr=9.99384e-05, gnorm=2.119, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99325 2021-06-19 22:14:22 | INFO | train_inner | epoch 003: 2749 / 3002 loss=2.629, ppl=6.19, wps=5807, ups=0.09, wpb=64826, bsz=128, num_updates=8697, lr=9.99384e-05, gnorm=2.187, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99337 2021-06-19 22:14:33 | INFO | train_inner | epoch 003: 2750 / 3002 loss=2.547, ppl=5.85, wps=5849.1, ups=0.09, wpb=64894, bsz=128, num_updates=8698, lr=9.99384e-05, gnorm=2.092, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99348 2021-06-19 22:14:44 | INFO | train_inner | epoch 003: 2751 / 3002 loss=2.612, ppl=6.12, wps=5943.5, ups=0.09, wpb=64852, bsz=128, num_updates=8699, lr=9.99384e-05, gnorm=2.457, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99359 2021-06-19 22:14:55 | INFO | train_inner | epoch 003: 2752 / 3002 loss=2.703, ppl=6.51, wps=5961, ups=0.09, wpb=64801, bsz=128, num_updates=8700, lr=9.99384e-05, gnorm=2.162, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99369 2021-06-19 22:15:06 | INFO | train_inner | epoch 003: 2753 / 3002 loss=2.648, ppl=6.27, wps=5896.6, ups=0.09, wpb=64716, bsz=128, num_updates=8701, lr=9.99384e-05, gnorm=2.828, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99380 2021-06-19 22:15:17 | INFO | train_inner | epoch 003: 2754 / 3002 loss=2.658, ppl=6.31, wps=5765.4, ups=0.09, wpb=64716, bsz=128, num_updates=8702, lr=9.99384e-05, gnorm=2.208, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99392 2021-06-19 22:15:28 | INFO | train_inner | epoch 003: 2755 / 3002 loss=2.611, ppl=6.11, wps=5938.1, ups=0.09, wpb=64845, bsz=128, num_updates=8703, lr=9.99384e-05, gnorm=2.12, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99403 2021-06-19 22:15:39 | INFO | train_inner | epoch 003: 2756 / 3002 loss=2.46, ppl=5.5, wps=5853.1, ups=0.09, wpb=64813, bsz=128, num_updates=8704, lr=9.99384e-05, gnorm=2.225, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99414 2021-06-19 22:15:50 | INFO | train_inner | epoch 003: 2757 / 3002 loss=2.663, ppl=6.33, wps=5879.2, ups=0.09, wpb=64812, bsz=128, num_updates=8705, lr=9.99384e-05, gnorm=2.072, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99425 2021-06-19 22:16:01 | INFO | train_inner | epoch 003: 2758 / 3002 loss=2.634, ppl=6.21, wps=5851.9, ups=0.09, wpb=64862, bsz=128, num_updates=8706, lr=9.99383e-05, gnorm=2.282, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99436 2021-06-19 22:16:12 | INFO | train_inner | epoch 003: 2759 / 3002 loss=2.606, ppl=6.09, wps=5938.9, ups=0.09, wpb=64806, bsz=128, num_updates=8707, lr=9.99383e-05, gnorm=2.248, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99447 2021-06-19 22:16:23 | INFO | train_inner | epoch 003: 2760 / 3002 loss=2.553, ppl=5.87, wps=5950.7, ups=0.09, wpb=64867, bsz=128, num_updates=8708, lr=9.99383e-05, gnorm=2.221, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99458 2021-06-19 22:16:34 | INFO | train_inner | epoch 003: 2761 / 3002 loss=2.415, ppl=5.33, wps=5834.2, ups=0.09, wpb=64879, bsz=128, num_updates=8709, lr=9.99383e-05, gnorm=2.278, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99469 2021-06-19 22:16:45 | INFO | train_inner | epoch 003: 2762 / 3002 loss=2.68, ppl=6.41, wps=5800, ups=0.09, wpb=64808, bsz=128, num_updates=8710, lr=9.99383e-05, gnorm=2.2, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99480 2021-06-19 22:16:57 | INFO | train_inner | epoch 003: 2763 / 3002 loss=2.622, ppl=6.16, wps=5819.1, ups=0.09, wpb=64836, bsz=128, num_updates=8711, lr=9.99383e-05, gnorm=2.168, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99491 2021-06-19 22:17:08 | INFO | train_inner | epoch 003: 2764 / 3002 loss=2.652, ppl=6.29, wps=5844.5, ups=0.09, wpb=64807, bsz=128, num_updates=8712, lr=9.99383e-05, gnorm=2.82, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99502 2021-06-19 22:17:19 | INFO | train_inner | epoch 003: 2765 / 3002 loss=2.494, ppl=5.63, wps=5844.9, ups=0.09, wpb=64878, bsz=128, num_updates=8713, lr=9.99383e-05, gnorm=2.181, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99513 2021-06-19 22:17:30 | INFO | train_inner | epoch 003: 2766 / 3002 loss=2.514, ppl=5.71, wps=5823.9, ups=0.09, wpb=64818, bsz=128, num_updates=8714, lr=9.99383e-05, gnorm=2.172, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99524 2021-06-19 22:17:41 | INFO | train_inner | epoch 003: 2767 / 3002 loss=2.731, ppl=6.64, wps=5821.4, ups=0.09, wpb=64844, bsz=128, num_updates=8715, lr=9.99383e-05, gnorm=2.144, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99535 2021-06-19 22:17:52 | INFO | train_inner | epoch 003: 2768 / 3002 loss=2.695, ppl=6.47, wps=5789.4, ups=0.09, wpb=64745, bsz=128, num_updates=8716, lr=9.99383e-05, gnorm=2.072, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99547 2021-06-19 22:18:03 | INFO | train_inner | epoch 003: 2769 / 3002 loss=2.772, ppl=6.83, wps=5790.6, ups=0.09, wpb=64759, bsz=128, num_updates=8717, lr=9.99383e-05, gnorm=2.149, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99558 2021-06-19 22:18:15 | INFO | train_inner | epoch 003: 2770 / 3002 loss=2.466, ppl=5.53, wps=5832.1, ups=0.09, wpb=64843, bsz=128, num_updates=8718, lr=9.99383e-05, gnorm=2.218, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99569 2021-06-19 22:18:26 | INFO | train_inner | epoch 003: 2771 / 3002 loss=2.485, ppl=5.6, wps=5903.6, ups=0.09, wpb=64808, bsz=128, num_updates=8719, lr=9.99382e-05, gnorm=2.236, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99580 2021-06-19 22:18:37 | INFO | train_inner | epoch 003: 2772 / 3002 loss=2.635, ppl=6.21, wps=5783.6, ups=0.09, wpb=64778, bsz=128, num_updates=8720, lr=9.99382e-05, gnorm=2.331, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99591 2021-06-19 22:18:48 | INFO | train_inner | epoch 003: 2773 / 3002 loss=2.585, ppl=6, wps=5815.6, ups=0.09, wpb=64774, bsz=128, num_updates=8721, lr=9.99382e-05, gnorm=2.648, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99602 2021-06-19 22:18:59 | INFO | train_inner | epoch 003: 2774 / 3002 loss=2.53, ppl=5.78, wps=5884.4, ups=0.09, wpb=64891, bsz=128, num_updates=8722, lr=9.99382e-05, gnorm=9.276, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99613 2021-06-19 22:19:10 | INFO | train_inner | epoch 003: 2775 / 3002 loss=2.664, ppl=6.34, wps=5892.2, ups=0.09, wpb=64877, bsz=128, num_updates=8723, lr=9.99382e-05, gnorm=2.17, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99624 2021-06-19 22:19:21 | INFO | train_inner | epoch 003: 2776 / 3002 loss=2.532, ppl=5.78, wps=5747.7, ups=0.09, wpb=64785, bsz=128, num_updates=8724, lr=9.99382e-05, gnorm=2.088, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99636 2021-06-19 22:19:32 | INFO | train_inner | epoch 003: 2777 / 3002 loss=2.595, ppl=6.04, wps=5775.3, ups=0.09, wpb=64744, bsz=128, num_updates=8725, lr=9.99382e-05, gnorm=2.796, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99647 2021-06-19 22:19:44 | INFO | train_inner | epoch 003: 2778 / 3002 loss=2.608, ppl=6.1, wps=5834.1, ups=0.09, wpb=64797, bsz=128, num_updates=8726, lr=9.99382e-05, gnorm=2.308, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99658 2021-06-19 22:19:55 | INFO | train_inner | epoch 003: 2779 / 3002 loss=2.734, ppl=6.65, wps=5774.2, ups=0.09, wpb=64724, bsz=128, num_updates=8727, lr=9.99382e-05, gnorm=2.172, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99669 2021-06-19 22:20:06 | INFO | train_inner | epoch 003: 2780 / 3002 loss=2.521, ppl=5.74, wps=5819.6, ups=0.09, wpb=64863, bsz=128, num_updates=8728, lr=9.99382e-05, gnorm=2.105, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99680 2021-06-19 22:20:17 | INFO | train_inner | epoch 003: 2781 / 3002 loss=2.759, ppl=6.77, wps=5868.7, ups=0.09, wpb=64810, bsz=128, num_updates=8729, lr=9.99382e-05, gnorm=2.394, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99691 2021-06-19 22:20:28 | INFO | train_inner | epoch 003: 2782 / 3002 loss=2.435, ppl=5.41, wps=5912, ups=0.09, wpb=64860, bsz=128, num_updates=8730, lr=9.99382e-05, gnorm=2.397, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99702 2021-06-19 22:20:39 | INFO | train_inner | epoch 003: 2783 / 3002 loss=2.458, ppl=5.49, wps=5893.5, ups=0.09, wpb=64905, bsz=128, num_updates=8731, lr=9.99381e-05, gnorm=2.086, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99713 2021-06-19 22:20:50 | INFO | train_inner | epoch 003: 2784 / 3002 loss=2.654, ppl=6.29, wps=5895.6, ups=0.09, wpb=64838, bsz=128, num_updates=8732, lr=9.99381e-05, gnorm=2.24, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99724 2021-06-19 22:21:01 | INFO | train_inner | epoch 003: 2785 / 3002 loss=2.607, ppl=6.09, wps=5777.7, ups=0.09, wpb=64862, bsz=128, num_updates=8733, lr=9.99381e-05, gnorm=2.052, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99735 2021-06-19 22:21:12 | INFO | train_inner | epoch 003: 2786 / 3002 loss=2.551, ppl=5.86, wps=5777.9, ups=0.09, wpb=64846, bsz=128, num_updates=8734, lr=9.99381e-05, gnorm=2.022, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99747 2021-06-19 22:21:23 | INFO | train_inner | epoch 003: 2787 / 3002 loss=2.612, ppl=6.12, wps=5864.9, ups=0.09, wpb=64818, bsz=128, num_updates=8735, lr=9.99381e-05, gnorm=17.357, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99758 2021-06-19 22:21:35 | INFO | train_inner | epoch 003: 2788 / 3002 loss=2.678, ppl=6.4, wps=5787.4, ups=0.09, wpb=64839, bsz=128, num_updates=8736, lr=9.99381e-05, gnorm=2.167, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99769 2021-06-19 22:21:46 | INFO | train_inner | epoch 003: 2789 / 3002 loss=2.913, ppl=7.53, wps=5824.7, ups=0.09, wpb=64786, bsz=128, num_updates=8737, lr=9.99381e-05, gnorm=3.465, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99780 2021-06-19 22:21:57 | INFO | train_inner | epoch 003: 2790 / 3002 loss=2.673, ppl=6.38, wps=5907.5, ups=0.09, wpb=64849, bsz=128, num_updates=8738, lr=9.99381e-05, gnorm=2.168, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99791 2021-06-19 22:22:08 | INFO | train_inner | epoch 003: 2791 / 3002 loss=2.569, ppl=5.93, wps=5914.2, ups=0.09, wpb=64876, bsz=128, num_updates=8739, lr=9.99381e-05, gnorm=7.887, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99802 2021-06-19 22:22:19 | INFO | train_inner | epoch 003: 2792 / 3002 loss=2.633, ppl=6.2, wps=5882.7, ups=0.09, wpb=64835, bsz=128, num_updates=8740, lr=9.99381e-05, gnorm=2.848, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99813 2021-06-19 22:22:30 | INFO | train_inner | epoch 003: 2793 / 3002 loss=2.704, ppl=6.52, wps=5801.7, ups=0.09, wpb=64874, bsz=128, num_updates=8741, lr=9.99381e-05, gnorm=2.278, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99824 2021-06-19 22:22:41 | INFO | train_inner | epoch 003: 2794 / 3002 loss=2.616, ppl=6.13, wps=5817.8, ups=0.09, wpb=64832, bsz=128, num_updates=8742, lr=9.99381e-05, gnorm=2.176, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99835 2021-06-19 22:22:52 | INFO | train_inner | epoch 003: 2795 / 3002 loss=2.509, ppl=5.69, wps=5884.1, ups=0.09, wpb=64825, bsz=128, num_updates=8743, lr=9.99381e-05, gnorm=2.048, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99846 2021-06-19 22:23:03 | INFO | train_inner | epoch 003: 2796 / 3002 loss=2.67, ppl=6.36, wps=5787.3, ups=0.09, wpb=64818, bsz=128, num_updates=8744, lr=9.9938e-05, gnorm=2.37, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99858 2021-06-19 22:23:14 | INFO | train_inner | epoch 003: 2797 / 3002 loss=2.516, ppl=5.72, wps=5807.5, ups=0.09, wpb=64860, bsz=128, num_updates=8745, lr=9.9938e-05, gnorm=2.336, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99869 2021-06-19 22:23:26 | INFO | train_inner | epoch 003: 2798 / 3002 loss=2.519, ppl=5.73, wps=5830.1, ups=0.09, wpb=64808, bsz=128, num_updates=8746, lr=9.9938e-05, gnorm=2.277, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99880 2021-06-19 22:23:36 | INFO | train_inner | epoch 003: 2799 / 3002 loss=2.597, ppl=6.05, wps=5925.9, ups=0.09, wpb=64888, bsz=128, num_updates=8747, lr=9.9938e-05, gnorm=2.188, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99891 2021-06-19 22:23:48 | INFO | train_inner | epoch 003: 2800 / 3002 loss=2.793, ppl=6.93, wps=5755.1, ups=0.09, wpb=64769, bsz=128, num_updates=8748, lr=9.9938e-05, gnorm=4.338, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99902 2021-06-19 22:23:59 | INFO | train_inner | epoch 003: 2801 / 3002 loss=2.704, ppl=6.52, wps=5880.5, ups=0.09, wpb=64857, bsz=128, num_updates=8749, lr=9.9938e-05, gnorm=2.265, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99913 2021-06-19 22:24:10 | INFO | train_inner | epoch 003: 2802 / 3002 loss=2.666, ppl=6.35, wps=5787.8, ups=0.09, wpb=64892, bsz=128, num_updates=8750, lr=9.9938e-05, gnorm=2.724, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99924 2021-06-19 22:24:21 | INFO | train_inner | epoch 003: 2803 / 3002 loss=2.607, ppl=6.09, wps=5899.1, ups=0.09, wpb=64844, bsz=128, num_updates=8751, lr=9.9938e-05, gnorm=2.316, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99935 2021-06-19 22:24:32 | INFO | train_inner | epoch 003: 2804 / 3002 loss=2.584, ppl=5.99, wps=5862.2, ups=0.09, wpb=64861, bsz=128, num_updates=8752, lr=9.9938e-05, gnorm=2.271, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99946 2021-06-19 22:24:43 | INFO | train_inner | epoch 003: 2805 / 3002 loss=2.738, ppl=6.67, wps=5921.5, ups=0.09, wpb=64728, bsz=128, num_updates=8753, lr=9.9938e-05, gnorm=2.245, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99957 2021-06-19 22:24:54 | INFO | train_inner | epoch 003: 2806 / 3002 loss=2.657, ppl=6.31, wps=5959.2, ups=0.09, wpb=64846, bsz=128, num_updates=8754, lr=9.9938e-05, gnorm=2.271, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99968 2021-06-19 22:25:05 | INFO | train_inner | epoch 003: 2807 / 3002 loss=2.779, ppl=6.86, wps=5846.9, ups=0.09, wpb=64809, bsz=128, num_updates=8755, lr=9.9938e-05, gnorm=2.213, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99979 2021-06-19 22:25:16 | INFO | train_inner | epoch 003: 2808 / 3002 loss=2.566, ppl=5.92, wps=5910.9, ups=0.09, wpb=64807, bsz=128, num_updates=8756, lr=9.99379e-05, gnorm=2.757, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99990 2021-06-19 22:25:27 | INFO | train_inner | epoch 003: 2809 / 3002 loss=2.79, ppl=6.92, wps=5792.7, ups=0.09, wpb=64779, bsz=128, num_updates=8757, lr=9.99379e-05, gnorm=2.324, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100001 2021-06-19 22:25:38 | INFO | train_inner | epoch 003: 2810 / 3002 loss=2.55, ppl=5.86, wps=5780.7, ups=0.09, wpb=64815, bsz=128, num_updates=8758, lr=9.99379e-05, gnorm=2.366, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100013 2021-06-19 22:25:49 | INFO | train_inner | epoch 003: 2811 / 3002 loss=2.487, ppl=5.6, wps=5861.2, ups=0.09, wpb=64839, bsz=128, num_updates=8759, lr=9.99379e-05, gnorm=2.351, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100024 2021-06-19 22:26:00 | INFO | train_inner | epoch 003: 2812 / 3002 loss=2.626, ppl=6.17, wps=5944.1, ups=0.09, wpb=64839, bsz=128, num_updates=8760, lr=9.99379e-05, gnorm=2.292, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=100035 2021-06-19 22:26:11 | INFO | train_inner | epoch 003: 2813 / 3002 loss=2.485, ppl=5.6, wps=5813.3, ups=0.09, wpb=64962, bsz=128, num_updates=8761, lr=9.99379e-05, gnorm=3.626, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100046 2021-06-19 22:26:23 | INFO | train_inner | epoch 003: 2814 / 3002 loss=2.52, ppl=5.74, wps=5825.4, ups=0.09, wpb=64847, bsz=128, num_updates=8762, lr=9.99379e-05, gnorm=2.371, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100057 2021-06-19 22:26:34 | INFO | train_inner | epoch 003: 2815 / 3002 loss=2.685, ppl=6.43, wps=5855.7, ups=0.09, wpb=64786, bsz=128, num_updates=8763, lr=9.99379e-05, gnorm=2.531, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100068 2021-06-19 22:26:45 | INFO | train_inner | epoch 003: 2816 / 3002 loss=2.538, ppl=5.81, wps=5755.7, ups=0.09, wpb=64754, bsz=128, num_updates=8764, lr=9.99379e-05, gnorm=9.554, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100079 2021-06-19 22:26:56 | INFO | train_inner | epoch 003: 2817 / 3002 loss=2.529, ppl=5.77, wps=5780.2, ups=0.09, wpb=64781, bsz=128, num_updates=8765, lr=9.99379e-05, gnorm=2.387, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100090 2021-06-19 22:27:07 | INFO | train_inner | epoch 003: 2818 / 3002 loss=2.53, ppl=5.78, wps=5777.5, ups=0.09, wpb=64789, bsz=128, num_updates=8766, lr=9.99379e-05, gnorm=2.183, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100102 2021-06-19 22:27:18 | INFO | train_inner | epoch 003: 2819 / 3002 loss=2.576, ppl=5.96, wps=5929.2, ups=0.09, wpb=64822, bsz=128, num_updates=8767, lr=9.99379e-05, gnorm=5.255, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=100113 2021-06-19 22:27:29 | INFO | train_inner | epoch 003: 2820 / 3002 loss=2.554, ppl=5.87, wps=5869.1, ups=0.09, wpb=64810, bsz=128, num_updates=8768, lr=9.99379e-05, gnorm=2.483, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100124 2021-06-19 22:27:40 | INFO | train_inner | epoch 003: 2821 / 3002 loss=2.597, ppl=6.05, wps=5837.9, ups=0.09, wpb=64814, bsz=128, num_updates=8769, lr=9.99378e-05, gnorm=2.558, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100135 2021-06-19 22:27:51 | INFO | train_inner | epoch 003: 2822 / 3002 loss=2.763, ppl=6.79, wps=5827.5, ups=0.09, wpb=64754, bsz=128, num_updates=8770, lr=9.99378e-05, gnorm=2.365, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100146 2021-06-19 22:28:02 | INFO | train_inner | epoch 003: 2823 / 3002 loss=2.574, ppl=5.95, wps=5891, ups=0.09, wpb=64867, bsz=128, num_updates=8771, lr=9.99378e-05, gnorm=2.305, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100157 2021-06-19 22:28:14 | INFO | train_inner | epoch 003: 2824 / 3002 loss=2.574, ppl=5.96, wps=5777.7, ups=0.09, wpb=64812, bsz=128, num_updates=8772, lr=9.99378e-05, gnorm=4.563, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100168 2021-06-19 22:28:25 | INFO | train_inner | epoch 003: 2825 / 3002 loss=2.575, ppl=5.96, wps=5836.7, ups=0.09, wpb=64770, bsz=128, num_updates=8773, lr=9.99378e-05, gnorm=2.788, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100179 2021-06-19 22:28:36 | INFO | train_inner | epoch 003: 2826 / 3002 loss=2.551, ppl=5.86, wps=5866, ups=0.09, wpb=64871, bsz=128, num_updates=8774, lr=9.99378e-05, gnorm=2.363, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100190 2021-06-19 22:28:47 | INFO | train_inner | epoch 003: 2827 / 3002 loss=2.793, ppl=6.93, wps=5911.4, ups=0.09, wpb=64949, bsz=128, num_updates=8775, lr=9.99378e-05, gnorm=2.859, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100201 2021-06-19 22:28:58 | INFO | train_inner | epoch 003: 2828 / 3002 loss=2.833, ppl=7.12, wps=5915.4, ups=0.09, wpb=64795, bsz=128, num_updates=8776, lr=9.99378e-05, gnorm=2.344, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100212 2021-06-19 22:29:09 | INFO | train_inner | epoch 003: 2829 / 3002 loss=2.712, ppl=6.55, wps=5857.3, ups=0.09, wpb=64840, bsz=128, num_updates=8777, lr=9.99378e-05, gnorm=2.224, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100223 2021-06-19 22:29:20 | INFO | train_inner | epoch 003: 2830 / 3002 loss=2.794, ppl=6.94, wps=5933, ups=0.09, wpb=64888, bsz=128, num_updates=8778, lr=9.99378e-05, gnorm=2.211, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=100234 2021-06-19 22:29:31 | INFO | train_inner | epoch 003: 2831 / 3002 loss=2.877, ppl=7.34, wps=5793.5, ups=0.09, wpb=64801, bsz=128, num_updates=8779, lr=9.99378e-05, gnorm=14.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100245 2021-06-19 22:29:42 | INFO | train_inner | epoch 003: 2832 / 3002 loss=2.537, ppl=5.8, wps=5926.8, ups=0.09, wpb=64827, bsz=128, num_updates=8780, lr=9.99378e-05, gnorm=3.103, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=100256 2021-06-19 22:29:53 | INFO | train_inner | epoch 003: 2833 / 3002 loss=2.656, ppl=6.3, wps=5824.7, ups=0.09, wpb=64850, bsz=128, num_updates=8781, lr=9.99377e-05, gnorm=3.005, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100267 2021-06-19 22:30:04 | INFO | train_inner | epoch 003: 2834 / 3002 loss=2.626, ppl=6.17, wps=5832, ups=0.09, wpb=64824, bsz=128, num_updates=8782, lr=9.99377e-05, gnorm=2.365, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100279 2021-06-19 22:30:15 | INFO | train_inner | epoch 003: 2835 / 3002 loss=2.743, ppl=6.7, wps=5918.4, ups=0.09, wpb=64934, bsz=128, num_updates=8783, lr=9.99377e-05, gnorm=2.752, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=100289 2021-06-19 22:30:26 | INFO | train_inner | epoch 003: 2836 / 3002 loss=2.566, ppl=5.92, wps=5849.3, ups=0.09, wpb=64853, bsz=128, num_updates=8784, lr=9.99377e-05, gnorm=3.166, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100301 2021-06-19 22:30:37 | INFO | train_inner | epoch 003: 2837 / 3002 loss=2.685, ppl=6.43, wps=5901.7, ups=0.09, wpb=64887, bsz=128, num_updates=8785, lr=9.99377e-05, gnorm=2.266, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100312 2021-06-19 22:30:48 | INFO | train_inner | epoch 003: 2838 / 3002 loss=2.665, ppl=6.34, wps=5808.6, ups=0.09, wpb=64752, bsz=128, num_updates=8786, lr=9.99377e-05, gnorm=2.421, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100323 2021-06-19 22:30:59 | INFO | train_inner | epoch 003: 2839 / 3002 loss=2.62, ppl=6.15, wps=5841.4, ups=0.09, wpb=64833, bsz=128, num_updates=8787, lr=9.99377e-05, gnorm=2.191, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100334 2021-06-19 22:31:11 | INFO | train_inner | epoch 003: 2840 / 3002 loss=2.592, ppl=6.03, wps=5828.2, ups=0.09, wpb=64892, bsz=128, num_updates=8788, lr=9.99377e-05, gnorm=2.192, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100345 2021-06-19 22:31:22 | INFO | train_inner | epoch 003: 2841 / 3002 loss=2.588, ppl=6.01, wps=5841.3, ups=0.09, wpb=64747, bsz=128, num_updates=8789, lr=9.99377e-05, gnorm=3.304, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100356 2021-06-19 22:31:33 | INFO | train_inner | epoch 003: 2842 / 3002 loss=2.551, ppl=5.86, wps=5944.2, ups=0.09, wpb=64825, bsz=128, num_updates=8790, lr=9.99377e-05, gnorm=2.316, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=100367 2021-06-19 22:31:44 | INFO | train_inner | epoch 003: 2843 / 3002 loss=2.638, ppl=6.22, wps=5789.6, ups=0.09, wpb=64849, bsz=128, num_updates=8791, lr=9.99377e-05, gnorm=2.413, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100378 2021-06-19 22:31:55 | INFO | train_inner | epoch 003: 2844 / 3002 loss=2.653, ppl=6.29, wps=5791.2, ups=0.09, wpb=64831, bsz=128, num_updates=8792, lr=9.99377e-05, gnorm=3.429, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100389 2021-06-19 22:32:06 | INFO | train_inner | epoch 003: 2845 / 3002 loss=2.576, ppl=5.96, wps=5891.1, ups=0.09, wpb=64919, bsz=128, num_updates=8793, lr=9.99377e-05, gnorm=2.187, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100400 2021-06-19 22:32:17 | INFO | train_inner | epoch 003: 2846 / 3002 loss=2.662, ppl=6.33, wps=5833.9, ups=0.09, wpb=64779, bsz=128, num_updates=8794, lr=9.99376e-05, gnorm=2.283, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100411 2021-06-19 22:32:28 | INFO | train_inner | epoch 003: 2847 / 3002 loss=2.602, ppl=6.07, wps=5734, ups=0.09, wpb=64798, bsz=128, num_updates=8795, lr=9.99376e-05, gnorm=2.321, loss_scale=1, train_wall=11, gb_free=2.8, wall=100423 2021-06-19 22:32:40 | INFO | train_inner | epoch 003: 2848 / 3002 loss=2.833, ppl=7.12, wps=5770.2, ups=0.09, wpb=64774, bsz=128, num_updates=8796, lr=9.99376e-05, gnorm=4.234, loss_scale=1, train_wall=11, gb_free=2.8, wall=100434 2021-06-19 22:32:51 | INFO | train_inner | epoch 003: 2849 / 3002 loss=2.551, ppl=5.86, wps=5800.5, ups=0.09, wpb=64864, bsz=128, num_updates=8797, lr=9.99376e-05, gnorm=2.254, loss_scale=1, train_wall=11, gb_free=2.8, wall=100445 2021-06-19 22:33:02 | INFO | train_inner | epoch 003: 2850 / 3002 loss=2.539, ppl=5.81, wps=5720.7, ups=0.09, wpb=64830, bsz=128, num_updates=8798, lr=9.99376e-05, gnorm=2.13, loss_scale=1, train_wall=11, gb_free=2.8, wall=100457 2021-06-19 22:33:13 | INFO | train_inner | epoch 003: 2851 / 3002 loss=2.561, ppl=5.9, wps=5890.7, ups=0.09, wpb=64887, bsz=128, num_updates=8799, lr=9.99376e-05, gnorm=2.091, loss_scale=1, train_wall=11, gb_free=2.8, wall=100468 2021-06-19 22:33:24 | INFO | train_inner | epoch 003: 2852 / 3002 loss=2.715, ppl=6.57, wps=5777.7, ups=0.09, wpb=64797, bsz=128, num_updates=8800, lr=9.99376e-05, gnorm=4.371, loss_scale=1, train_wall=11, gb_free=2.8, wall=100479 2021-06-19 22:33:36 | INFO | train_inner | epoch 003: 2853 / 3002 loss=2.533, ppl=5.79, wps=5800.3, ups=0.09, wpb=64833, bsz=128, num_updates=8801, lr=9.99376e-05, gnorm=2.16, loss_scale=1, train_wall=11, gb_free=2.8, wall=100490 2021-06-19 22:33:47 | INFO | train_inner | epoch 003: 2854 / 3002 loss=2.538, ppl=5.81, wps=5844.7, ups=0.09, wpb=64835, bsz=128, num_updates=8802, lr=9.99376e-05, gnorm=2.828, loss_scale=1, train_wall=11, gb_free=2.8, wall=100501 2021-06-19 22:33:58 | INFO | train_inner | epoch 003: 2855 / 3002 loss=2.766, ppl=6.8, wps=5797.2, ups=0.09, wpb=64824, bsz=128, num_updates=8803, lr=9.99376e-05, gnorm=2.217, loss_scale=1, train_wall=11, gb_free=2.8, wall=100512 2021-06-19 22:34:09 | INFO | train_inner | epoch 003: 2856 / 3002 loss=2.748, ppl=6.72, wps=5761.9, ups=0.09, wpb=64898, bsz=128, num_updates=8804, lr=9.99376e-05, gnorm=2.722, loss_scale=1, train_wall=11, gb_free=2.8, wall=100523 2021-06-19 22:34:20 | INFO | train_inner | epoch 003: 2857 / 3002 loss=2.65, ppl=6.28, wps=5757, ups=0.09, wpb=64788, bsz=128, num_updates=8805, lr=9.99376e-05, gnorm=10.385, loss_scale=1, train_wall=11, gb_free=2.8, wall=100535 2021-06-19 22:34:31 | INFO | train_inner | epoch 003: 2858 / 3002 loss=2.632, ppl=6.2, wps=5855.1, ups=0.09, wpb=64816, bsz=128, num_updates=8806, lr=9.99375e-05, gnorm=2.156, loss_scale=1, train_wall=11, gb_free=2.8, wall=100546 2021-06-19 22:34:43 | INFO | train_inner | epoch 003: 2859 / 3002 loss=2.726, ppl=6.62, wps=5846.7, ups=0.09, wpb=64789, bsz=128, num_updates=8807, lr=9.99375e-05, gnorm=2.181, loss_scale=1, train_wall=11, gb_free=2.8, wall=100557 2021-06-19 22:34:54 | INFO | train_inner | epoch 003: 2860 / 3002 loss=2.665, ppl=6.34, wps=5897.1, ups=0.09, wpb=64800, bsz=128, num_updates=8808, lr=9.99375e-05, gnorm=2.197, loss_scale=1, train_wall=10, gb_free=2.8, wall=100568 2021-06-19 22:35:05 | INFO | train_inner | epoch 003: 2861 / 3002 loss=2.571, ppl=5.94, wps=5866.7, ups=0.09, wpb=64848, bsz=128, num_updates=8809, lr=9.99375e-05, gnorm=2.181, loss_scale=1, train_wall=11, gb_free=2.8, wall=100579 2021-06-19 22:35:16 | INFO | train_inner | epoch 003: 2862 / 3002 loss=2.634, ppl=6.21, wps=5826.3, ups=0.09, wpb=64795, bsz=128, num_updates=8810, lr=9.99375e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=100590 2021-06-19 22:35:27 | INFO | train_inner | epoch 003: 2863 / 3002 loss=2.608, ppl=6.1, wps=5751.9, ups=0.09, wpb=64811, bsz=128, num_updates=8811, lr=9.99375e-05, gnorm=2.585, loss_scale=1, train_wall=11, gb_free=2.8, wall=100601 2021-06-19 22:35:38 | INFO | train_inner | epoch 003: 2864 / 3002 loss=2.772, ppl=6.83, wps=5935, ups=0.09, wpb=64853, bsz=128, num_updates=8812, lr=9.99375e-05, gnorm=6.563, loss_scale=1, train_wall=10, gb_free=2.8, wall=100612 2021-06-19 22:35:49 | INFO | train_inner | epoch 003: 2865 / 3002 loss=2.657, ppl=6.31, wps=5738.5, ups=0.09, wpb=64915, bsz=128, num_updates=8813, lr=9.99375e-05, gnorm=2.596, loss_scale=1, train_wall=11, gb_free=2.8, wall=100624 2021-06-19 22:36:00 | INFO | train_inner | epoch 003: 2866 / 3002 loss=2.602, ppl=6.07, wps=5841.9, ups=0.09, wpb=64899, bsz=128, num_updates=8814, lr=9.99375e-05, gnorm=2.365, loss_scale=1, train_wall=11, gb_free=2.8, wall=100635 2021-06-19 22:36:11 | INFO | train_inner | epoch 003: 2867 / 3002 loss=2.613, ppl=6.12, wps=5888, ups=0.09, wpb=64881, bsz=128, num_updates=8815, lr=9.99375e-05, gnorm=2.248, loss_scale=1, train_wall=11, gb_free=2.8, wall=100646 2021-06-19 22:36:22 | INFO | train_inner | epoch 003: 2868 / 3002 loss=2.74, ppl=6.68, wps=5950.2, ups=0.09, wpb=64860, bsz=128, num_updates=8816, lr=9.99375e-05, gnorm=2.724, loss_scale=1, train_wall=10, gb_free=2.8, wall=100657 2021-06-19 22:36:33 | INFO | train_inner | epoch 003: 2869 / 3002 loss=2.524, ppl=5.75, wps=5880.6, ups=0.09, wpb=64896, bsz=128, num_updates=8817, lr=9.99375e-05, gnorm=3.268, loss_scale=1, train_wall=11, gb_free=2.8, wall=100668 2021-06-19 22:36:44 | INFO | train_inner | epoch 003: 2870 / 3002 loss=2.705, ppl=6.52, wps=5832, ups=0.09, wpb=64823, bsz=128, num_updates=8818, lr=9.99375e-05, gnorm=2.204, loss_scale=1, train_wall=11, gb_free=2.8, wall=100679 2021-06-19 22:36:55 | INFO | train_inner | epoch 003: 2871 / 3002 loss=2.656, ppl=6.3, wps=5951.3, ups=0.09, wpb=64835, bsz=128, num_updates=8819, lr=9.99374e-05, gnorm=3.697, loss_scale=1, train_wall=10, gb_free=2.8, wall=100690 2021-06-19 22:37:06 | INFO | train_inner | epoch 003: 2872 / 3002 loss=2.775, ppl=6.84, wps=5857, ups=0.09, wpb=64736, bsz=128, num_updates=8820, lr=9.99374e-05, gnorm=2.24, loss_scale=1, train_wall=11, gb_free=2.8, wall=100701 2021-06-19 22:37:17 | INFO | train_inner | epoch 003: 2873 / 3002 loss=2.474, ppl=5.56, wps=5899.7, ups=0.09, wpb=64852, bsz=128, num_updates=8821, lr=9.99374e-05, gnorm=4.818, loss_scale=1, train_wall=11, gb_free=2.8, wall=100712 2021-06-19 22:37:29 | INFO | train_inner | epoch 003: 2874 / 3002 loss=2.536, ppl=5.8, wps=5774, ups=0.09, wpb=64863, bsz=128, num_updates=8822, lr=9.99374e-05, gnorm=2.296, loss_scale=1, train_wall=11, gb_free=2.8, wall=100723 2021-06-19 22:37:40 | INFO | train_inner | epoch 003: 2875 / 3002 loss=2.508, ppl=5.69, wps=5758.4, ups=0.09, wpb=64867, bsz=128, num_updates=8823, lr=9.99374e-05, gnorm=2.447, loss_scale=1, train_wall=11, gb_free=2.8, wall=100734 2021-06-19 22:37:51 | INFO | train_inner | epoch 003: 2876 / 3002 loss=2.655, ppl=6.3, wps=5835.6, ups=0.09, wpb=64862, bsz=128, num_updates=8824, lr=9.99374e-05, gnorm=2.202, loss_scale=1, train_wall=11, gb_free=2.8, wall=100745 2021-06-19 22:38:02 | INFO | train_inner | epoch 003: 2877 / 3002 loss=2.629, ppl=6.18, wps=5881.4, ups=0.09, wpb=64801, bsz=128, num_updates=8825, lr=9.99374e-05, gnorm=2.257, loss_scale=1, train_wall=11, gb_free=2.8, wall=100756 2021-06-19 22:38:13 | INFO | train_inner | epoch 003: 2878 / 3002 loss=2.484, ppl=5.6, wps=5780.4, ups=0.09, wpb=64821, bsz=128, num_updates=8826, lr=9.99374e-05, gnorm=2.531, loss_scale=1, train_wall=11, gb_free=2.8, wall=100768 2021-06-19 22:38:24 | INFO | train_inner | epoch 003: 2879 / 3002 loss=2.634, ppl=6.21, wps=5858.8, ups=0.09, wpb=64795, bsz=128, num_updates=8827, lr=9.99374e-05, gnorm=2.605, loss_scale=1, train_wall=11, gb_free=2.8, wall=100779 2021-06-19 22:38:35 | INFO | train_inner | epoch 003: 2880 / 3002 loss=2.687, ppl=6.44, wps=5813.5, ups=0.09, wpb=64747, bsz=128, num_updates=8828, lr=9.99374e-05, gnorm=2.254, loss_scale=1, train_wall=11, gb_free=2.8, wall=100790 2021-06-19 22:38:46 | INFO | train_inner | epoch 003: 2881 / 3002 loss=2.676, ppl=6.39, wps=5844.9, ups=0.09, wpb=64837, bsz=128, num_updates=8829, lr=9.99374e-05, gnorm=2.29, loss_scale=1, train_wall=11, gb_free=2.8, wall=100801 2021-06-19 22:38:58 | INFO | train_inner | epoch 003: 2882 / 3002 loss=2.653, ppl=6.29, wps=5847.6, ups=0.09, wpb=64901, bsz=128, num_updates=8830, lr=9.99374e-05, gnorm=12.284, loss_scale=1, train_wall=11, gb_free=2.8, wall=100812 2021-06-19 22:39:09 | INFO | train_inner | epoch 003: 2883 / 3002 loss=2.86, ppl=7.26, wps=5819.8, ups=0.09, wpb=64768, bsz=128, num_updates=8831, lr=9.99373e-05, gnorm=2.257, loss_scale=1, train_wall=11, gb_free=2.8, wall=100823 2021-06-19 22:39:20 | INFO | train_inner | epoch 003: 2884 / 3002 loss=2.712, ppl=6.55, wps=5864.5, ups=0.09, wpb=64876, bsz=128, num_updates=8832, lr=9.99373e-05, gnorm=2.186, loss_scale=1, train_wall=11, gb_free=2.8, wall=100834 2021-06-19 22:39:31 | INFO | train_inner | epoch 003: 2885 / 3002 loss=2.507, ppl=5.68, wps=5944.6, ups=0.09, wpb=64891, bsz=128, num_updates=8833, lr=9.99373e-05, gnorm=2.092, loss_scale=1, train_wall=10, gb_free=2.8, wall=100845 2021-06-19 22:39:42 | INFO | train_inner | epoch 003: 2886 / 3002 loss=2.548, ppl=5.85, wps=5767.3, ups=0.09, wpb=64826, bsz=128, num_updates=8834, lr=9.99373e-05, gnorm=2.039, loss_scale=1, train_wall=11, gb_free=2.8, wall=100856 2021-06-19 22:39:53 | INFO | train_inner | epoch 003: 2887 / 3002 loss=2.752, ppl=6.74, wps=5879, ups=0.09, wpb=64851, bsz=128, num_updates=8835, lr=9.99373e-05, gnorm=4.367, loss_scale=1, train_wall=11, gb_free=2.8, wall=100867 2021-06-19 22:40:04 | INFO | train_inner | epoch 003: 2888 / 3002 loss=2.767, ppl=6.81, wps=5851.9, ups=0.09, wpb=64906, bsz=128, num_updates=8836, lr=9.99373e-05, gnorm=2.318, loss_scale=1, train_wall=11, gb_free=2.8, wall=100878 2021-06-19 22:40:15 | INFO | train_inner | epoch 003: 2889 / 3002 loss=2.557, ppl=5.89, wps=5753, ups=0.09, wpb=64878, bsz=128, num_updates=8837, lr=9.99373e-05, gnorm=2.181, loss_scale=1, train_wall=11, gb_free=2.8, wall=100890 2021-06-19 22:40:26 | INFO | train_inner | epoch 003: 2890 / 3002 loss=2.616, ppl=6.13, wps=5868.3, ups=0.09, wpb=64909, bsz=128, num_updates=8838, lr=9.99373e-05, gnorm=3.001, loss_scale=1, train_wall=11, gb_free=2.8, wall=100901 2021-06-19 22:40:37 | INFO | train_inner | epoch 003: 2891 / 3002 loss=2.598, ppl=6.05, wps=5939.9, ups=0.09, wpb=64894, bsz=128, num_updates=8839, lr=9.99373e-05, gnorm=2.73, loss_scale=1, train_wall=10, gb_free=2.8, wall=100912 2021-06-19 22:40:49 | INFO | train_inner | epoch 003: 2892 / 3002 loss=2.499, ppl=5.65, wps=5746.8, ups=0.09, wpb=64761, bsz=128, num_updates=8840, lr=9.99373e-05, gnorm=2.312, loss_scale=1, train_wall=11, gb_free=2.8, wall=100923 2021-06-19 22:41:00 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 2021-06-19 22:41:11 | INFO | train_inner | epoch 003: 2894 / 3002 loss=2.531, ppl=5.78, wps=2887.9, ups=0.04, wpb=64805, bsz=128, num_updates=8841, lr=9.99373e-05, gnorm=2.676, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=100945 2021-06-19 22:41:22 | INFO | train_inner | epoch 003: 2895 / 3002 loss=2.659, ppl=6.32, wps=5848.8, ups=0.09, wpb=64823, bsz=128, num_updates=8842, lr=9.99373e-05, gnorm=2.904, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100956 2021-06-19 22:41:33 | INFO | train_inner | epoch 003: 2896 / 3002 loss=2.556, ppl=5.88, wps=5751.1, ups=0.09, wpb=64819, bsz=128, num_updates=8843, lr=9.99373e-05, gnorm=2.542, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100968 2021-06-19 22:41:44 | INFO | train_inner | epoch 003: 2897 / 3002 loss=2.712, ppl=6.55, wps=5962.4, ups=0.09, wpb=64875, bsz=128, num_updates=8844, lr=9.99372e-05, gnorm=17.082, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=100979 2021-06-19 22:41:55 | INFO | train_inner | epoch 003: 2898 / 3002 loss=2.502, ppl=5.67, wps=5776.6, ups=0.09, wpb=64831, bsz=128, num_updates=8845, lr=9.99372e-05, gnorm=2.777, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100990 2021-06-19 22:42:06 | INFO | train_inner | epoch 003: 2899 / 3002 loss=2.74, ppl=6.68, wps=5960, ups=0.09, wpb=64825, bsz=128, num_updates=8846, lr=9.99372e-05, gnorm=2.337, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101001 2021-06-19 22:42:18 | INFO | train_inner | epoch 003: 2900 / 3002 loss=2.634, ppl=6.21, wps=5808.4, ups=0.09, wpb=64819, bsz=128, num_updates=8847, lr=9.99372e-05, gnorm=2.839, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101012 2021-06-19 22:42:29 | INFO | train_inner | epoch 003: 2901 / 3002 loss=2.703, ppl=6.51, wps=5805.1, ups=0.09, wpb=64787, bsz=128, num_updates=8848, lr=9.99372e-05, gnorm=2.238, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101023 2021-06-19 22:42:40 | INFO | train_inner | epoch 003: 2902 / 3002 loss=2.598, ppl=6.06, wps=5840.7, ups=0.09, wpb=64855, bsz=128, num_updates=8849, lr=9.99372e-05, gnorm=2.751, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101034 2021-06-19 22:42:51 | INFO | train_inner | epoch 003: 2903 / 3002 loss=2.683, ppl=6.42, wps=5763.4, ups=0.09, wpb=64777, bsz=128, num_updates=8850, lr=9.99372e-05, gnorm=2.516, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101045 2021-06-19 22:43:02 | INFO | train_inner | epoch 003: 2904 / 3002 loss=2.774, ppl=6.84, wps=5870.9, ups=0.09, wpb=64822, bsz=128, num_updates=8851, lr=9.99372e-05, gnorm=2.515, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101056 2021-06-19 22:43:13 | INFO | train_inner | epoch 003: 2905 / 3002 loss=2.561, ppl=5.9, wps=5743.5, ups=0.09, wpb=64783, bsz=128, num_updates=8852, lr=9.99372e-05, gnorm=18.514, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101068 2021-06-19 22:43:24 | INFO | train_inner | epoch 003: 2906 / 3002 loss=2.6, ppl=6.06, wps=5864.5, ups=0.09, wpb=64883, bsz=128, num_updates=8853, lr=9.99372e-05, gnorm=2.461, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101079 2021-06-19 22:43:36 | INFO | train_inner | epoch 003: 2907 / 3002 loss=2.704, ppl=6.52, wps=5825.9, ups=0.09, wpb=64851, bsz=128, num_updates=8854, lr=9.99372e-05, gnorm=2.301, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101090 2021-06-19 22:43:47 | INFO | train_inner | epoch 003: 2908 / 3002 loss=2.693, ppl=6.47, wps=5753.6, ups=0.09, wpb=64755, bsz=128, num_updates=8855, lr=9.99372e-05, gnorm=2.233, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101101 2021-06-19 22:43:58 | INFO | train_inner | epoch 003: 2909 / 3002 loss=2.563, ppl=5.91, wps=5836.1, ups=0.09, wpb=64794, bsz=128, num_updates=8856, lr=9.99371e-05, gnorm=2.34, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101112 2021-06-19 22:44:09 | INFO | train_inner | epoch 003: 2910 / 3002 loss=2.664, ppl=6.34, wps=5827, ups=0.09, wpb=64798, bsz=128, num_updates=8857, lr=9.99371e-05, gnorm=2.386, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101123 2021-06-19 22:44:20 | INFO | train_inner | epoch 003: 2911 / 3002 loss=2.729, ppl=6.63, wps=5841.5, ups=0.09, wpb=64823, bsz=128, num_updates=8858, lr=9.99371e-05, gnorm=2.211, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101134 2021-06-19 22:44:31 | INFO | train_inner | epoch 003: 2912 / 3002 loss=2.679, ppl=6.41, wps=5911.4, ups=0.09, wpb=64843, bsz=128, num_updates=8859, lr=9.99371e-05, gnorm=2.34, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101145 2021-06-19 22:44:42 | INFO | train_inner | epoch 003: 2913 / 3002 loss=2.611, ppl=6.11, wps=5803.3, ups=0.09, wpb=64823, bsz=128, num_updates=8860, lr=9.99371e-05, gnorm=2.153, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101157 2021-06-19 22:44:53 | INFO | train_inner | epoch 003: 2914 / 3002 loss=2.624, ppl=6.16, wps=5772.6, ups=0.09, wpb=64792, bsz=128, num_updates=8861, lr=9.99371e-05, gnorm=2.29, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101168 2021-06-19 22:45:05 | INFO | train_inner | epoch 003: 2915 / 3002 loss=2.618, ppl=6.14, wps=5792.7, ups=0.09, wpb=64792, bsz=128, num_updates=8862, lr=9.99371e-05, gnorm=2.433, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101179 2021-06-19 22:45:16 | INFO | train_inner | epoch 003: 2916 / 3002 loss=2.552, ppl=5.86, wps=5904, ups=0.09, wpb=64790, bsz=128, num_updates=8863, lr=9.99371e-05, gnorm=2.414, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101190 2021-06-19 22:45:27 | INFO | train_inner | epoch 003: 2917 / 3002 loss=2.599, ppl=6.06, wps=5792.2, ups=0.09, wpb=64757, bsz=128, num_updates=8864, lr=9.99371e-05, gnorm=2.196, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101201 2021-06-19 22:45:38 | INFO | train_inner | epoch 003: 2918 / 3002 loss=2.594, ppl=6.04, wps=5785.9, ups=0.09, wpb=64871, bsz=128, num_updates=8865, lr=9.99371e-05, gnorm=2.254, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101212 2021-06-19 22:45:49 | INFO | train_inner | epoch 003: 2919 / 3002 loss=2.572, ppl=5.95, wps=5748.4, ups=0.09, wpb=64861, bsz=128, num_updates=8866, lr=9.99371e-05, gnorm=2.314, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101224 2021-06-19 22:46:00 | INFO | train_inner | epoch 003: 2920 / 3002 loss=2.635, ppl=6.21, wps=5916.2, ups=0.09, wpb=64889, bsz=128, num_updates=8867, lr=9.99371e-05, gnorm=2.193, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101235 2021-06-19 22:46:11 | INFO | train_inner | epoch 003: 2921 / 3002 loss=2.625, ppl=6.17, wps=5897.9, ups=0.09, wpb=64855, bsz=128, num_updates=8868, lr=9.99371e-05, gnorm=2.451, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101246 2021-06-19 22:46:22 | INFO | train_inner | epoch 003: 2922 / 3002 loss=2.577, ppl=5.97, wps=5832, ups=0.09, wpb=64867, bsz=128, num_updates=8869, lr=9.9937e-05, gnorm=2.623, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101257 2021-06-19 22:46:34 | INFO | train_inner | epoch 003: 2923 / 3002 loss=2.619, ppl=6.15, wps=5773.9, ups=0.09, wpb=64852, bsz=128, num_updates=8870, lr=9.9937e-05, gnorm=2.506, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101268 2021-06-19 22:46:45 | INFO | train_inner | epoch 003: 2924 / 3002 loss=2.594, ppl=6.04, wps=5818.1, ups=0.09, wpb=64856, bsz=128, num_updates=8871, lr=9.9937e-05, gnorm=32.179, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101279 2021-06-19 22:46:56 | INFO | train_inner | epoch 003: 2925 / 3002 loss=2.652, ppl=6.28, wps=5855.7, ups=0.09, wpb=64861, bsz=128, num_updates=8872, lr=9.9937e-05, gnorm=2.33, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101290 2021-06-19 22:47:07 | INFO | train_inner | epoch 003: 2926 / 3002 loss=2.79, ppl=6.92, wps=5867.8, ups=0.09, wpb=64864, bsz=128, num_updates=8873, lr=9.9937e-05, gnorm=2.746, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101301 2021-06-19 22:47:18 | INFO | train_inner | epoch 003: 2927 / 3002 loss=2.619, ppl=6.14, wps=5830.3, ups=0.09, wpb=64739, bsz=128, num_updates=8874, lr=9.9937e-05, gnorm=2.964, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101312 2021-06-19 22:47:29 | INFO | train_inner | epoch 003: 2928 / 3002 loss=2.661, ppl=6.32, wps=5792.4, ups=0.09, wpb=64823, bsz=128, num_updates=8875, lr=9.9937e-05, gnorm=2.226, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101324 2021-06-19 22:47:40 | INFO | train_inner | epoch 003: 2929 / 3002 loss=2.62, ppl=6.15, wps=5832.3, ups=0.09, wpb=64814, bsz=128, num_updates=8876, lr=9.9937e-05, gnorm=2.976, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101335 2021-06-19 22:47:51 | INFO | train_inner | epoch 003: 2930 / 3002 loss=2.564, ppl=5.91, wps=5873, ups=0.09, wpb=64763, bsz=128, num_updates=8877, lr=9.9937e-05, gnorm=2.241, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101346 2021-06-19 22:48:02 | INFO | train_inner | epoch 003: 2931 / 3002 loss=2.63, ppl=6.19, wps=5934, ups=0.09, wpb=64896, bsz=128, num_updates=8878, lr=9.9937e-05, gnorm=3.389, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101357 2021-06-19 22:48:14 | INFO | train_inner | epoch 003: 2932 / 3002 loss=2.735, ppl=6.66, wps=5766, ups=0.09, wpb=64783, bsz=128, num_updates=8879, lr=9.9937e-05, gnorm=2.996, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101368 2021-06-19 22:48:25 | INFO | train_inner | epoch 003: 2933 / 3002 loss=2.664, ppl=6.34, wps=5824.1, ups=0.09, wpb=64817, bsz=128, num_updates=8880, lr=9.9937e-05, gnorm=2.402, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101379 2021-06-19 22:48:36 | INFO | train_inner | epoch 003: 2934 / 3002 loss=2.634, ppl=6.21, wps=5804.2, ups=0.09, wpb=64785, bsz=128, num_updates=8881, lr=9.99369e-05, gnorm=5.055, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101390 2021-06-19 22:48:47 | INFO | train_inner | epoch 003: 2935 / 3002 loss=2.66, ppl=6.32, wps=5784.7, ups=0.09, wpb=64798, bsz=128, num_updates=8882, lr=9.99369e-05, gnorm=2.32, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101401 2021-06-19 22:48:58 | INFO | train_inner | epoch 003: 2936 / 3002 loss=2.563, ppl=5.91, wps=5941.7, ups=0.09, wpb=64845, bsz=128, num_updates=8883, lr=9.99369e-05, gnorm=2.741, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101412 2021-06-19 22:49:09 | INFO | train_inner | epoch 003: 2937 / 3002 loss=2.626, ppl=6.17, wps=5909.2, ups=0.09, wpb=64887, bsz=128, num_updates=8884, lr=9.99369e-05, gnorm=3.078, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101423 2021-06-19 22:49:20 | INFO | train_inner | epoch 003: 2938 / 3002 loss=2.915, ppl=7.54, wps=5799.6, ups=0.09, wpb=64790, bsz=128, num_updates=8885, lr=9.99369e-05, gnorm=4.825, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101434 2021-06-19 22:49:31 | INFO | train_inner | epoch 003: 2939 / 3002 loss=2.61, ppl=6.1, wps=5900.6, ups=0.09, wpb=64889, bsz=128, num_updates=8886, lr=9.99369e-05, gnorm=2.414, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101445 2021-06-19 22:49:42 | INFO | train_inner | epoch 003: 2940 / 3002 loss=2.554, ppl=5.87, wps=5816.8, ups=0.09, wpb=64802, bsz=128, num_updates=8887, lr=9.99369e-05, gnorm=5.978, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101457 2021-06-19 22:49:53 | INFO | train_inner | epoch 003: 2941 / 3002 loss=2.494, ppl=5.63, wps=5859.8, ups=0.09, wpb=64911, bsz=128, num_updates=8888, lr=9.99369e-05, gnorm=2.14, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101468 2021-06-19 22:50:04 | INFO | train_inner | epoch 003: 2942 / 3002 loss=2.602, ppl=6.07, wps=5778.7, ups=0.09, wpb=64781, bsz=128, num_updates=8889, lr=9.99369e-05, gnorm=2.261, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101479 2021-06-19 22:50:16 | INFO | train_inner | epoch 003: 2943 / 3002 loss=2.743, ppl=6.7, wps=5822.2, ups=0.09, wpb=64791, bsz=128, num_updates=8890, lr=9.99369e-05, gnorm=33.718, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101490 2021-06-19 22:50:26 | INFO | train_inner | epoch 003: 2944 / 3002 loss=2.841, ppl=7.16, wps=5981.3, ups=0.09, wpb=64867, bsz=128, num_updates=8891, lr=9.99369e-05, gnorm=2.905, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101501 2021-06-19 22:50:37 | INFO | train_inner | epoch 003: 2945 / 3002 loss=2.67, ppl=6.36, wps=5974.8, ups=0.09, wpb=64828, bsz=128, num_updates=8892, lr=9.99369e-05, gnorm=3.419, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101512 2021-06-19 22:50:48 | INFO | train_inner | epoch 003: 2946 / 3002 loss=2.784, ppl=6.89, wps=5827, ups=0.09, wpb=64850, bsz=128, num_updates=8893, lr=9.99369e-05, gnorm=3.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101523 2021-06-19 22:51:00 | INFO | train_inner | epoch 003: 2947 / 3002 loss=2.579, ppl=5.97, wps=5851.4, ups=0.09, wpb=64741, bsz=128, num_updates=8894, lr=9.99368e-05, gnorm=3.804, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101534 2021-06-19 22:51:10 | INFO | train_inner | epoch 003: 2948 / 3002 loss=2.456, ppl=5.49, wps=5944.7, ups=0.09, wpb=64788, bsz=128, num_updates=8895, lr=9.99368e-05, gnorm=2.427, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101545 2021-06-19 22:51:22 | INFO | train_inner | epoch 003: 2949 / 3002 loss=2.718, ppl=6.58, wps=5792.7, ups=0.09, wpb=64746, bsz=128, num_updates=8896, lr=9.99368e-05, gnorm=2.47, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101556 2021-06-19 22:51:33 | INFO | train_inner | epoch 003: 2950 / 3002 loss=2.557, ppl=5.89, wps=5846.6, ups=0.09, wpb=64894, bsz=128, num_updates=8897, lr=9.99368e-05, gnorm=2.324, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101567 2021-06-19 22:51:44 | INFO | train_inner | epoch 003: 2951 / 3002 loss=2.704, ppl=6.51, wps=5798.3, ups=0.09, wpb=64859, bsz=128, num_updates=8898, lr=9.99368e-05, gnorm=4.024, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101578 2021-06-19 22:51:55 | INFO | train_inner | epoch 003: 2952 / 3002 loss=2.698, ppl=6.49, wps=5795.3, ups=0.09, wpb=64813, bsz=128, num_updates=8899, lr=9.99368e-05, gnorm=2.353, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101589 2021-06-19 22:52:06 | INFO | train_inner | epoch 003: 2953 / 3002 loss=2.652, ppl=6.29, wps=5820.7, ups=0.09, wpb=64831, bsz=128, num_updates=8900, lr=9.99368e-05, gnorm=2.337, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101601 2021-06-19 22:52:17 | INFO | train_inner | epoch 003: 2954 / 3002 loss=2.616, ppl=6.13, wps=5976.5, ups=0.09, wpb=64844, bsz=128, num_updates=8901, lr=9.99368e-05, gnorm=2.475, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101611 2021-06-19 22:52:28 | INFO | train_inner | epoch 003: 2955 / 3002 loss=2.653, ppl=6.29, wps=5863, ups=0.09, wpb=64839, bsz=128, num_updates=8902, lr=9.99368e-05, gnorm=2.573, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101622 2021-06-19 22:52:39 | INFO | train_inner | epoch 003: 2956 / 3002 loss=2.777, ppl=6.85, wps=5902.9, ups=0.09, wpb=64863, bsz=128, num_updates=8903, lr=9.99368e-05, gnorm=2.459, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101633 2021-06-19 22:52:50 | INFO | train_inner | epoch 003: 2957 / 3002 loss=2.871, ppl=7.32, wps=5982.7, ups=0.09, wpb=64785, bsz=128, num_updates=8904, lr=9.99368e-05, gnorm=2.242, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101644 2021-06-19 22:53:01 | INFO | train_inner | epoch 003: 2958 / 3002 loss=2.765, ppl=6.8, wps=5878.3, ups=0.09, wpb=64779, bsz=128, num_updates=8905, lr=9.99368e-05, gnorm=2.355, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101655 2021-06-19 22:53:12 | INFO | train_inner | epoch 003: 2959 / 3002 loss=2.555, ppl=5.88, wps=5878.3, ups=0.09, wpb=64807, bsz=128, num_updates=8906, lr=9.99367e-05, gnorm=3.162, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101666 2021-06-19 22:53:23 | INFO | train_inner | epoch 003: 2960 / 3002 loss=2.497, ppl=5.64, wps=5880.2, ups=0.09, wpb=64843, bsz=128, num_updates=8907, lr=9.99367e-05, gnorm=2.258, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101677 2021-06-19 22:53:34 | INFO | train_inner | epoch 003: 2961 / 3002 loss=2.671, ppl=6.37, wps=5929.6, ups=0.09, wpb=64810, bsz=128, num_updates=8908, lr=9.99367e-05, gnorm=3.416, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101688 2021-06-19 22:53:45 | INFO | train_inner | epoch 003: 2962 / 3002 loss=2.604, ppl=6.08, wps=5784, ups=0.09, wpb=64860, bsz=128, num_updates=8909, lr=9.99367e-05, gnorm=2.676, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101699 2021-06-19 22:53:56 | INFO | train_inner | epoch 003: 2963 / 3002 loss=2.608, ppl=6.1, wps=5921.4, ups=0.09, wpb=64887, bsz=128, num_updates=8910, lr=9.99367e-05, gnorm=2.51, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101710 2021-06-19 22:54:07 | INFO | train_inner | epoch 003: 2964 / 3002 loss=2.63, ppl=6.19, wps=5824.1, ups=0.09, wpb=64894, bsz=128, num_updates=8911, lr=9.99367e-05, gnorm=2.609, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101722 2021-06-19 22:54:18 | INFO | train_inner | epoch 003: 2965 / 3002 loss=2.603, ppl=6.07, wps=5970, ups=0.09, wpb=64789, bsz=128, num_updates=8912, lr=9.99367e-05, gnorm=2.187, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101732 2021-06-19 22:54:29 | INFO | train_inner | epoch 003: 2966 / 3002 loss=2.619, ppl=6.14, wps=5781.3, ups=0.09, wpb=64781, bsz=128, num_updates=8913, lr=9.99367e-05, gnorm=3.717, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101744 2021-06-19 22:54:40 | INFO | train_inner | epoch 003: 2967 / 3002 loss=2.608, ppl=6.1, wps=5801.5, ups=0.09, wpb=64893, bsz=128, num_updates=8914, lr=9.99367e-05, gnorm=2.264, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101755 2021-06-19 22:54:52 | INFO | train_inner | epoch 003: 2968 / 3002 loss=2.557, ppl=5.89, wps=5789.7, ups=0.09, wpb=64828, bsz=128, num_updates=8915, lr=9.99367e-05, gnorm=2.13, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101766 2021-06-19 22:55:03 | INFO | train_inner | epoch 003: 2969 / 3002 loss=2.741, ppl=6.68, wps=5834.2, ups=0.09, wpb=64817, bsz=128, num_updates=8916, lr=9.99367e-05, gnorm=2.809, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101777 2021-06-19 22:55:14 | INFO | train_inner | epoch 003: 2970 / 3002 loss=2.533, ppl=5.79, wps=5865.6, ups=0.09, wpb=64805, bsz=128, num_updates=8917, lr=9.99367e-05, gnorm=2.489, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101788 2021-06-19 22:55:25 | INFO | train_inner | epoch 003: 2971 / 3002 loss=2.708, ppl=6.54, wps=5711.8, ups=0.09, wpb=64790, bsz=128, num_updates=8918, lr=9.99367e-05, gnorm=2.098, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101800 2021-06-19 22:55:36 | INFO | train_inner | epoch 003: 2972 / 3002 loss=2.597, ppl=6.05, wps=5864.4, ups=0.09, wpb=64815, bsz=128, num_updates=8919, lr=9.99366e-05, gnorm=3.585, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101811 2021-06-19 22:55:47 | INFO | train_inner | epoch 003: 2973 / 3002 loss=2.692, ppl=6.46, wps=5813.5, ups=0.09, wpb=64833, bsz=128, num_updates=8920, lr=9.99366e-05, gnorm=4.659, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101822 2021-06-19 22:55:59 | INFO | train_inner | epoch 003: 2974 / 3002 loss=2.637, ppl=6.22, wps=5731.9, ups=0.09, wpb=64724, bsz=128, num_updates=8921, lr=9.99366e-05, gnorm=2.291, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101833 2021-06-19 22:56:10 | INFO | train_inner | epoch 003: 2975 / 3002 loss=2.795, ppl=6.94, wps=5912.6, ups=0.09, wpb=64759, bsz=128, num_updates=8922, lr=9.99366e-05, gnorm=3.96, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101844 2021-06-19 22:56:21 | INFO | train_inner | epoch 003: 2976 / 3002 loss=2.688, ppl=6.44, wps=5754.9, ups=0.09, wpb=64793, bsz=128, num_updates=8923, lr=9.99366e-05, gnorm=5.874, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101855 2021-06-19 22:56:32 | INFO | train_inner | epoch 003: 2977 / 3002 loss=2.721, ppl=6.59, wps=5862.5, ups=0.09, wpb=64816, bsz=128, num_updates=8924, lr=9.99366e-05, gnorm=2.467, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101866 2021-06-19 22:56:43 | INFO | train_inner | epoch 003: 2978 / 3002 loss=2.583, ppl=5.99, wps=5871.2, ups=0.09, wpb=64809, bsz=128, num_updates=8925, lr=9.99366e-05, gnorm=2.524, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101877 2021-06-19 22:56:54 | INFO | train_inner | epoch 003: 2979 / 3002 loss=2.617, ppl=6.14, wps=5864.8, ups=0.09, wpb=64814, bsz=128, num_updates=8926, lr=9.99366e-05, gnorm=2.152, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101888 2021-06-19 22:57:05 | INFO | train_inner | epoch 003: 2980 / 3002 loss=2.604, ppl=6.08, wps=5965, ups=0.09, wpb=64823, bsz=128, num_updates=8927, lr=9.99366e-05, gnorm=2.369, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101899 2021-06-19 22:57:16 | INFO | train_inner | epoch 003: 2981 / 3002 loss=2.663, ppl=6.33, wps=5875.6, ups=0.09, wpb=64850, bsz=128, num_updates=8928, lr=9.99366e-05, gnorm=2.299, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101910 2021-06-19 22:57:27 | INFO | train_inner | epoch 003: 2982 / 3002 loss=2.634, ppl=6.21, wps=5856.3, ups=0.09, wpb=64819, bsz=128, num_updates=8929, lr=9.99366e-05, gnorm=2.309, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101921 2021-06-19 22:57:38 | INFO | train_inner | epoch 003: 2983 / 3002 loss=2.687, ppl=6.44, wps=5861.8, ups=0.09, wpb=64839, bsz=128, num_updates=8930, lr=9.99366e-05, gnorm=6.849, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101932 2021-06-19 22:57:49 | INFO | train_inner | epoch 003: 2984 / 3002 loss=2.699, ppl=6.49, wps=5917.3, ups=0.09, wpb=64756, bsz=128, num_updates=8931, lr=9.99365e-05, gnorm=8.182, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101943 2021-06-19 22:58:00 | INFO | train_inner | epoch 003: 2985 / 3002 loss=2.527, ppl=5.76, wps=5849.8, ups=0.09, wpb=64791, bsz=128, num_updates=8932, lr=9.99365e-05, gnorm=2.198, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101954 2021-06-19 22:58:11 | INFO | train_inner | epoch 003: 2986 / 3002 loss=2.517, ppl=5.72, wps=5811.5, ups=0.09, wpb=64870, bsz=128, num_updates=8933, lr=9.99365e-05, gnorm=2.299, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101966 2021-06-19 22:58:23 | INFO | train_inner | epoch 003: 2987 / 3002 loss=2.637, ppl=6.22, wps=5744.7, ups=0.09, wpb=64767, bsz=128, num_updates=8934, lr=9.99365e-05, gnorm=4.498, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101977 2021-06-19 22:58:34 | INFO | train_inner | epoch 003: 2988 / 3002 loss=2.538, ppl=5.81, wps=5728.8, ups=0.09, wpb=64728, bsz=128, num_updates=8935, lr=9.99365e-05, gnorm=2.22, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101988 2021-06-19 22:58:45 | INFO | train_inner | epoch 003: 2989 / 3002 loss=2.541, ppl=5.82, wps=5768.6, ups=0.09, wpb=64809, bsz=128, num_updates=8936, lr=9.99365e-05, gnorm=2.311, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101999 2021-06-19 22:58:56 | INFO | train_inner | epoch 003: 2990 / 3002 loss=2.693, ppl=6.47, wps=5897.1, ups=0.09, wpb=64847, bsz=128, num_updates=8937, lr=9.99365e-05, gnorm=4.031, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102010 2021-06-19 22:59:07 | INFO | train_inner | epoch 003: 2991 / 3002 loss=2.73, ppl=6.63, wps=5740.5, ups=0.09, wpb=64884, bsz=128, num_updates=8938, lr=9.99365e-05, gnorm=2.271, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102022 2021-06-19 22:59:18 | INFO | train_inner | epoch 003: 2992 / 3002 loss=2.605, ppl=6.08, wps=5830, ups=0.09, wpb=64803, bsz=128, num_updates=8939, lr=9.99365e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102033 2021-06-19 22:59:29 | INFO | train_inner | epoch 003: 2993 / 3002 loss=2.722, ppl=6.6, wps=5935.6, ups=0.09, wpb=64823, bsz=128, num_updates=8940, lr=9.99365e-05, gnorm=3.385, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=102044 2021-06-19 22:59:40 | INFO | train_inner | epoch 003: 2994 / 3002 loss=2.772, ppl=6.83, wps=5858.1, ups=0.09, wpb=64848, bsz=128, num_updates=8941, lr=9.99365e-05, gnorm=2.775, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102055 2021-06-19 22:59:52 | INFO | train_inner | epoch 003: 2995 / 3002 loss=2.792, ppl=6.93, wps=5882.4, ups=0.09, wpb=64875, bsz=128, num_updates=8942, lr=9.99365e-05, gnorm=20.74, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102066 2021-06-19 23:00:03 | INFO | train_inner | epoch 003: 2996 / 3002 loss=2.547, ppl=5.84, wps=5780.7, ups=0.09, wpb=64793, bsz=128, num_updates=8943, lr=9.99365e-05, gnorm=2.143, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102077 2021-06-19 23:00:14 | INFO | train_inner | epoch 003: 2997 / 3002 loss=2.694, ppl=6.47, wps=5748.3, ups=0.09, wpb=64779, bsz=128, num_updates=8944, lr=9.99364e-05, gnorm=36.553, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102088 2021-06-19 23:00:25 | INFO | train_inner | epoch 003: 2998 / 3002 loss=2.703, ppl=6.51, wps=5960.5, ups=0.09, wpb=64854, bsz=128, num_updates=8945, lr=9.99364e-05, gnorm=2.163, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=102099 2021-06-19 23:00:36 | INFO | train_inner | epoch 003: 2999 / 3002 loss=2.533, ppl=5.79, wps=5875.5, ups=0.09, wpb=64819, bsz=128, num_updates=8946, lr=9.99364e-05, gnorm=2.165, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102110 2021-06-19 23:00:47 | INFO | train_inner | epoch 003: 3000 / 3002 loss=2.629, ppl=6.19, wps=5885.1, ups=0.09, wpb=64823, bsz=128, num_updates=8947, lr=9.99364e-05, gnorm=2.207, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102121 2021-06-19 23:00:58 | INFO | train_inner | epoch 003: 3001 / 3002 loss=2.635, ppl=6.21, wps=5781.7, ups=0.09, wpb=64897, bsz=128, num_updates=8948, lr=9.99364e-05, gnorm=2.209, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102132 2021-06-19 23:01:04 | INFO | train_inner | epoch 003: 3002 / 3002 loss=2.67, ppl=6.36, wps=5843.2, ups=0.16, wpb=36454, bsz=72, num_updates=8949, lr=9.99364e-05, gnorm=3.212, loss_scale=0.5, train_wall=6, gb_free=2.8, wall=102139 2021-06-19 23:01:04 | INFO | fairseq_cli.train | begin validation on "valid" subset 2021-06-19 23:16:01 | INFO | valid | epoch 003 | valid on 'valid' subset | loss 2.505 | ppl 5.68 | wps 19673.8 | wpb 506.5 | bsz 1 | num_updates 8949 | best_loss 2.505 2021-06-19 23:16:01 | INFO | fairseq.checkpoint_utils | Preparing to save checkpoint for epoch 3 @ 8949 updates 2021-06-19 23:16:01 | INFO | fairseq.trainer | Saving checkpoint to checkpoints/checkpoint3.pt 2021-06-19 23:16:13 | INFO | fairseq.trainer | Finished saving checkpoint to checkpoints/checkpoint3.pt 2021-06-19 23:21:10 | INFO | fairseq.checkpoint_utils | Saved checkpoint checkpoints/checkpoint3.pt (epoch 3 @ 8949 updates, score 2.505) (writing took 308.19168733900005 seconds) 2021-06-19 23:21:10 | INFO | fairseq_cli.train | end of epoch 3 (average epoch stats below) 2021-06-19 23:21:10 | INFO | train | epoch 003 | loss 2.648 | ppl 6.27 | wps 5606.4 | ups 0.09 | wpb 64819.5 | bsz 128 | num_updates 8949 | lr 9.99364e-05 | gnorm 2.655 | loss_scale 0.5 | train_wall 31896 | gb_free 2.8 | wall 103344 2021-06-19 23:21:10 | INFO | fairseq.trainer | begin training epoch 4 2021-06-19 23:21:10 | INFO | fairseq_cli.train | Start iterating over samples 2021-06-19 23:21:20 | INFO | train_inner | epoch 004: 1 / 3002 loss=2.704, ppl=6.52, wps=53.3, ups=0, wpb=64845, bsz=128, num_updates=8950, lr=9.99364e-05, gnorm=2.272, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103355 2021-06-19 23:21:31 | INFO | train_inner | epoch 004: 2 / 3002 loss=2.657, ppl=6.31, wps=6228.2, ups=0.1, wpb=64758, bsz=128, num_updates=8951, lr=9.99364e-05, gnorm=17.478, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103365 2021-06-19 23:21:41 | INFO | train_inner | epoch 004: 3 / 3002 loss=2.761, ppl=6.78, wps=6278.9, ups=0.1, wpb=64796, bsz=128, num_updates=8952, lr=9.99364e-05, gnorm=11.167, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103376 2021-06-19 23:21:52 | INFO | train_inner | epoch 004: 4 / 3002 loss=2.732, ppl=6.65, wps=6087.7, ups=0.09, wpb=64764, bsz=128, num_updates=8953, lr=9.99364e-05, gnorm=2.747, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103386 2021-06-19 23:22:03 | INFO | train_inner | epoch 004: 5 / 3002 loss=2.562, ppl=5.9, wps=6007.8, ups=0.09, wpb=64775, bsz=128, num_updates=8954, lr=9.99364e-05, gnorm=2.415, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103397 2021-06-19 23:22:14 | INFO | train_inner | epoch 004: 6 / 3002 loss=2.595, ppl=6.04, wps=5963.8, ups=0.09, wpb=64830, bsz=128, num_updates=8955, lr=9.99364e-05, gnorm=2.41, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103408 2021-06-19 23:22:24 | INFO | train_inner | epoch 004: 7 / 3002 loss=2.74, ppl=6.68, wps=6054.6, ups=0.09, wpb=64850, bsz=128, num_updates=8956, lr=9.99363e-05, gnorm=2.373, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103419 2021-06-19 23:22:35 | INFO | train_inner | epoch 004: 8 / 3002 loss=2.688, ppl=6.44, wps=6159.6, ups=0.09, wpb=64889, bsz=128, num_updates=8957, lr=9.99363e-05, gnorm=2.358, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103429 2021-06-19 23:22:46 | INFO | train_inner | epoch 004: 9 / 3002 loss=2.574, ppl=5.96, wps=5979.8, ups=0.09, wpb=64797, bsz=128, num_updates=8958, lr=9.99363e-05, gnorm=2.197, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103440 2021-06-19 23:22:56 | INFO | train_inner | epoch 004: 10 / 3002 loss=2.754, ppl=6.75, wps=5992.8, ups=0.09, wpb=64814, bsz=128, num_updates=8959, lr=9.99363e-05, gnorm=2.367, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103451 2021-06-19 23:23:07 | INFO | train_inner | epoch 004: 11 / 3002 loss=2.68, ppl=6.41, wps=6002.1, ups=0.09, wpb=64840, bsz=128, num_updates=8960, lr=9.99363e-05, gnorm=2.258, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103462 2021-06-19 23:23:18 | INFO | train_inner | epoch 004: 12 / 3002 loss=2.703, ppl=6.51, wps=5917, ups=0.09, wpb=64796, bsz=128, num_updates=8961, lr=9.99363e-05, gnorm=2.211, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=103472 2021-06-19 23:23:29 | INFO | train_inner | epoch 004: 13 / 3002 loss=2.534, ppl=5.79, wps=5939.8, ups=0.09, wpb=64838, bsz=128, num_updates=8962, lr=9.99363e-05, gnorm=2.314, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103483 2021-06-19 23:23:40 | INFO | train_inner | epoch 004: 14 / 3002 loss=2.704, ppl=6.52, wps=5727.8, ups=0.09, wpb=64776, bsz=128, num_updates=8963, lr=9.99363e-05, gnorm=2.399, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=103495 2021-06-19 23:23:51 | INFO | train_inner | epoch 004: 15 / 3002 loss=2.549, ppl=5.85, wps=5983.9, ups=0.09, wpb=64925, bsz=128, num_updates=8964, lr=9.99363e-05, gnorm=2.398, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103506 2021-06-19 23:24:02 | INFO | train_inner | epoch 004: 16 / 3002 loss=2.569, ppl=5.94, wps=5903.2, ups=0.09, wpb=64911, bsz=128, num_updates=8965, lr=9.99363e-05, gnorm=2.15, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=103517 2021-06-19 23:24:14 | INFO | train_inner | epoch 004: 17 / 3002 loss=2.681, ppl=6.41, wps=5720.6, ups=0.09, wpb=64762, bsz=128, num_updates=8966, lr=9.99363e-05, gnorm=2.183, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=103528 2021-06-19 23:24:25 | INFO | train_inner | epoch 004: 18 / 3002 loss=2.724, ppl=6.61, wps=5835.4, ups=0.09, wpb=64889, bsz=128, num_updates=8967, lr=9.99363e-05, gnorm=2.292, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=103539 2021-06-19 23:24:36 | INFO | train_inner | epoch 004: 19 / 3002 loss=2.623, ppl=6.16, wps=5811.9, ups=0.09, wpb=64871, bsz=128, num_updates=8968, lr=9.99363e-05, gnorm=2.377, loss_scale=1, train_wall=11, gb_free=2.8, wall=103550 2021-06-19 23:24:47 | INFO | train_inner | epoch 004: 20 / 3002 loss=2.557, ppl=5.88, wps=5712.8, ups=0.09, wpb=64884, bsz=128, num_updates=8969, lr=9.99362e-05, gnorm=11.215, loss_scale=1, train_wall=11, gb_free=2.8, wall=103562 2021-06-19 23:24:58 | INFO | train_inner | epoch 004: 21 / 3002 loss=2.473, ppl=5.55, wps=5813.1, ups=0.09, wpb=64800, bsz=128, num_updates=8970, lr=9.99362e-05, gnorm=2.146, loss_scale=1, train_wall=11, gb_free=2.8, wall=103573 2021-06-19 23:25:09 | INFO | train_inner | epoch 004: 22 / 3002 loss=2.652, ppl=6.28, wps=5928.3, ups=0.09, wpb=64820, bsz=128, num_updates=8971, lr=9.99362e-05, gnorm=2.104, loss_scale=1, train_wall=10, gb_free=2.8, wall=103584 2021-06-19 23:25:20 | INFO | train_inner | epoch 004: 23 / 3002 loss=2.743, ppl=6.7, wps=5899.2, ups=0.09, wpb=64812, bsz=128, num_updates=8972, lr=9.99362e-05, gnorm=2.312, loss_scale=1, train_wall=11, gb_free=2.8, wall=103595 2021-06-19 23:25:31 | INFO | train_inner | epoch 004: 24 / 3002 loss=2.755, ppl=6.75, wps=5837, ups=0.09, wpb=64794, bsz=128, num_updates=8973, lr=9.99362e-05, gnorm=2.32, loss_scale=1, train_wall=11, gb_free=2.8, wall=103606 2021-06-19 23:25:42 | INFO | train_inner | epoch 004: 25 / 3002 loss=2.61, ppl=6.1, wps=5884.8, ups=0.09, wpb=64806, bsz=128, num_updates=8974, lr=9.99362e-05, gnorm=2.151, loss_scale=1, train_wall=11, gb_free=2.8, wall=103617 2021-06-19 23:25:53 | INFO | train_inner | epoch 004: 26 / 3002 loss=2.685, ppl=6.43, wps=5828.6, ups=0.09, wpb=64852, bsz=128, num_updates=8975, lr=9.99362e-05, gnorm=2.948, loss_scale=1, train_wall=11, gb_free=2.8, wall=103628 2021-06-19 23:26:04 | INFO | train_inner | epoch 004: 27 / 3002 loss=2.609, ppl=6.1, wps=5943.7, ups=0.09, wpb=64850, bsz=128, num_updates=8976, lr=9.99362e-05, gnorm=2.107, loss_scale=1, train_wall=10, gb_free=2.8, wall=103639 2021-06-19 23:26:15 | INFO | train_inner | epoch 004: 28 / 3002 loss=2.569, ppl=5.94, wps=5992.1, ups=0.09, wpb=64737, bsz=128, num_updates=8977, lr=9.99362e-05, gnorm=2.354, loss_scale=1, train_wall=10, gb_free=2.8, wall=103650 2021-06-19 23:26:26 | INFO | train_inner | epoch 004: 29 / 3002 loss=2.659, ppl=6.32, wps=5800.5, ups=0.09, wpb=64834, bsz=128, num_updates=8978, lr=9.99362e-05, gnorm=2.16, loss_scale=1, train_wall=11, gb_free=2.8, wall=103661 2021-06-19 23:26:38 | INFO | train_inner | epoch 004: 30 / 3002 loss=2.661, ppl=6.32, wps=5790.3, ups=0.09, wpb=64830, bsz=128, num_updates=8979, lr=9.99362e-05, gnorm=2.178, loss_scale=1, train_wall=11, gb_free=2.8, wall=103672 2021-06-19 23:26:49 | INFO | train_inner | epoch 004: 31 / 3002 loss=2.704, ppl=6.52, wps=5784.4, ups=0.09, wpb=64768, bsz=128, num_updates=8980, lr=9.99362e-05, gnorm=2.212, loss_scale=1, train_wall=11, gb_free=2.8, wall=103683 2021-06-19 23:27:00 | INFO | train_inner | epoch 004: 32 / 3002 loss=2.588, ppl=6.01, wps=5953.8, ups=0.09, wpb=64831, bsz=128, num_updates=8981, lr=9.99361e-05, gnorm=2.253, loss_scale=1, train_wall=10, gb_free=2.8, wall=103694 2021-06-19 23:27:11 | INFO | train_inner | epoch 004: 33 / 3002 loss=2.642, ppl=6.24, wps=5842.8, ups=0.09, wpb=64864, bsz=128, num_updates=8982, lr=9.99361e-05, gnorm=2.348, loss_scale=1, train_wall=11, gb_free=2.8, wall=103705 2021-06-19 23:27:22 | INFO | train_inner | epoch 004: 34 / 3002 loss=2.707, ppl=6.53, wps=5930.1, ups=0.09, wpb=64865, bsz=128, num_updates=8983, lr=9.99361e-05, gnorm=2.485, loss_scale=1, train_wall=10, gb_free=2.8, wall=103716 2021-06-19 23:27:33 | INFO | train_inner | epoch 004: 35 / 3002 loss=2.673, ppl=6.38, wps=5744.8, ups=0.09, wpb=64829, bsz=128, num_updates=8984, lr=9.99361e-05, gnorm=2.32, loss_scale=1, train_wall=11, gb_free=2.8, wall=103727 2021-06-19 23:27:44 | INFO | train_inner | epoch 004: 36 / 3002 loss=2.61, ppl=6.1, wps=5910.8, ups=0.09, wpb=64806, bsz=128, num_updates=8985, lr=9.99361e-05, gnorm=6.062, loss_scale=1, train_wall=10, gb_free=2.8, wall=103738 2021-06-19 23:27:55 | INFO | train_inner | epoch 004: 37 / 3002 loss=2.611, ppl=6.11, wps=5885.2, ups=0.09, wpb=64792, bsz=128, num_updates=8986, lr=9.99361e-05, gnorm=2.155, loss_scale=1, train_wall=11, gb_free=2.8, wall=103749 2021-06-19 23:28:06 | INFO | train_inner | epoch 004: 38 / 3002 loss=2.684, ppl=6.42, wps=5885.6, ups=0.09, wpb=64885, bsz=128, num_updates=8987, lr=9.99361e-05, gnorm=2.193, loss_scale=1, train_wall=11, gb_free=2.8, wall=103760 2021-06-19 23:28:17 | INFO | train_inner | epoch 004: 39 / 3002 loss=2.709, ppl=6.54, wps=5847.9, ups=0.09, wpb=64746, bsz=128, num_updates=8988, lr=9.99361e-05, gnorm=2.298, loss_scale=1, train_wall=11, gb_free=2.8, wall=103771 2021-06-19 23:28:28 | INFO | train_inner | epoch 004: 40 / 3002 loss=2.635, ppl=6.21, wps=5870, ups=0.09, wpb=64778, bsz=128, num_updates=8989, lr=9.99361e-05, gnorm=2.321, loss_scale=1, train_wall=11, gb_free=2.8, wall=103782 2021-06-19 23:28:39 | INFO | train_inner | epoch 004: 41 / 3002 loss=2.833, ppl=7.13, wps=5982.6, ups=0.09, wpb=64863, bsz=128, num_updates=8990, lr=9.99361e-05, gnorm=2.24, loss_scale=1, train_wall=10, gb_free=2.8, wall=103793 2021-06-19 23:28:50 | INFO | train_inner | epoch 004: 42 / 3002 loss=2.596, ppl=6.05, wps=5855.6, ups=0.09, wpb=64923, bsz=128, num_updates=8991, lr=9.99361e-05, gnorm=2.131, loss_scale=1, train_wall=11, gb_free=2.8, wall=103804 2021-06-19 23:29:01 | INFO | train_inner | epoch 004: 43 / 3002 loss=2.736, ppl=6.66, wps=5802.2, ups=0.09, wpb=64865, bsz=128, num_updates=8992, lr=9.99361e-05, gnorm=2.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=103816 2021-06-19 23:29:12 | INFO | train_inner | epoch 004: 44 / 3002 loss=2.784, ppl=6.89, wps=5959.9, ups=0.09, wpb=64924, bsz=128, num_updates=8993, lr=9.99361e-05, gnorm=12.927, loss_scale=1, train_wall=10, gb_free=2.8, wall=103826 2021-06-19 23:29:23 | INFO | train_inner | epoch 004: 45 / 3002 loss=2.467, ppl=5.53, wps=6011, ups=0.09, wpb=64848, bsz=128, num_updates=8994, lr=9.9936e-05, gnorm=2.311, loss_scale=1, train_wall=10, gb_free=2.8, wall=103837 2021-06-19 23:29:34 | INFO | train_inner | epoch 004: 46 / 3002 loss=2.762, ppl=6.79, wps=5888, ups=0.09, wpb=64839, bsz=128, num_updates=8995, lr=9.9936e-05, gnorm=2.434, loss_scale=1, train_wall=11, gb_free=2.8, wall=103848 2021-06-19 23:29:45 | INFO | train_inner | epoch 004: 47 / 3002 loss=2.792, ppl=6.93, wps=5835.2, ups=0.09, wpb=64775, bsz=128, num_updates=8996, lr=9.9936e-05, gnorm=2.978, loss_scale=1, train_wall=11, gb_free=2.8, wall=103859 2021-06-19 23:29:56 | INFO | train_inner | epoch 004: 48 / 3002 loss=2.672, ppl=6.37, wps=5818.5, ups=0.09, wpb=64754, bsz=128, num_updates=8997, lr=9.9936e-05, gnorm=2.188, loss_scale=1, train_wall=11, gb_free=2.8, wall=103870 2021-06-19 23:30:07 | INFO | train_inner | epoch 004: 49 / 3002 loss=2.523, ppl=5.75, wps=5777.6, ups=0.09, wpb=64546, bsz=128, num_updates=8998, lr=9.9936e-05, gnorm=2.236, loss_scale=1, train_wall=11, gb_free=2.8, wall=103882 2021-06-19 23:30:18 | INFO | train_inner | epoch 004: 50 / 3002 loss=2.663, ppl=6.33, wps=5898.3, ups=0.09, wpb=64798, bsz=128, num_updates=8999, lr=9.9936e-05, gnorm=2.998, loss_scale=1, train_wall=11, gb_free=2.8, wall=103893 2021-06-19 23:30:29 | INFO | train_inner | epoch 004: 51 / 3002 loss=2.504, ppl=5.67, wps=5965, ups=0.09, wpb=64928, bsz=128, num_updates=9000, lr=9.9936e-05, gnorm=2.26, loss_scale=1, train_wall=10, gb_free=2.8, wall=103904 2021-06-19 23:30:40 | INFO | train_inner | epoch 004: 52 / 3002 loss=2.743, ppl=6.69, wps=5950.2, ups=0.09, wpb=64825, bsz=128, num_updates=9001, lr=9.9936e-05, gnorm=2.215, loss_scale=1, train_wall=10, gb_free=2.8, wall=103914 2021-06-19 23:30:51 | INFO | train_inner | epoch 004: 53 / 3002 loss=2.736, ppl=6.66, wps=5819.2, ups=0.09, wpb=64713, bsz=128, num_updates=9002, lr=9.9936e-05, gnorm=2.547, loss_scale=1, train_wall=11, gb_free=2.8, wall=103926 2021-06-19 23:31:02 | INFO | train_inner | epoch 004: 54 / 3002 loss=2.729, ppl=6.63, wps=5795.7, ups=0.09, wpb=64788, bsz=128, num_updates=9003, lr=9.9936e-05, gnorm=2.198, loss_scale=1, train_wall=11, gb_free=2.8, wall=103937 2021-06-19 23:31:13 | INFO | train_inner | epoch 004: 55 / 3002 loss=2.659, ppl=6.32, wps=5883.5, ups=0.09, wpb=64792, bsz=128, num_updates=9004, lr=9.9936e-05, gnorm=3.554, loss_scale=1, train_wall=11, gb_free=2.8, wall=103948 2021-06-19 23:31:24 | INFO | train_inner | epoch 004: 56 / 3002 loss=2.613, ppl=6.12, wps=5943.2, ups=0.09, wpb=64840, bsz=128, num_updates=9005, lr=9.9936e-05, gnorm=2.234, loss_scale=1, train_wall=10, gb_free=2.8, wall=103959 2021-06-19 23:31:36 | INFO | train_inner | epoch 004: 57 / 3002 loss=2.649, ppl=6.27, wps=5720.6, ups=0.09, wpb=64776, bsz=128, num_updates=9006, lr=9.99359e-05, gnorm=2.113, loss_scale=1, train_wall=11, gb_free=2.8, wall=103970 2021-06-19 23:31:47 | INFO | train_inner | epoch 004: 58 / 3002 loss=2.579, ppl=5.97, wps=5821, ups=0.09, wpb=64847, bsz=128, num_updates=9007, lr=9.99359e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=103981 2021-06-19 23:31:58 | INFO | train_inner | epoch 004: 59 / 3002 loss=2.592, ppl=6.03, wps=5971, ups=0.09, wpb=64885, bsz=128, num_updates=9008, lr=9.99359e-05, gnorm=2.23, loss_scale=1, train_wall=10, gb_free=2.8, wall=103992 2021-06-19 23:32:09 | INFO | train_inner | epoch 004: 60 / 3002 loss=2.738, ppl=6.67, wps=5813.2, ups=0.09, wpb=64837, bsz=128, num_updates=9009, lr=9.99359e-05, gnorm=2.197, loss_scale=1, train_wall=11, gb_free=2.8, wall=104003 2021-06-19 23:32:20 | INFO | train_inner | epoch 004: 61 / 3002 loss=2.587, ppl=6.01, wps=5803.4, ups=0.09, wpb=64829, bsz=128, num_updates=9010, lr=9.99359e-05, gnorm=2.168, loss_scale=1, train_wall=11, gb_free=2.8, wall=104014 2021-06-19 23:32:31 | INFO | train_inner | epoch 004: 62 / 3002 loss=2.631, ppl=6.2, wps=5834.9, ups=0.09, wpb=64811, bsz=128, num_updates=9011, lr=9.99359e-05, gnorm=2.767, loss_scale=1, train_wall=11, gb_free=2.8, wall=104025 2021-06-19 23:32:42 | INFO | train_inner | epoch 004: 63 / 3002 loss=2.482, ppl=5.59, wps=6025.2, ups=0.09, wpb=64915, bsz=128, num_updates=9012, lr=9.99359e-05, gnorm=2.68, loss_scale=1, train_wall=10, gb_free=2.8, wall=104036 2021-06-19 23:32:53 | INFO | train_inner | epoch 004: 64 / 3002 loss=2.57, ppl=5.94, wps=5924.3, ups=0.09, wpb=64960, bsz=128, num_updates=9013, lr=9.99359e-05, gnorm=2.065, loss_scale=1, train_wall=11, gb_free=2.8, wall=104047 2021-06-19 23:33:04 | INFO | train_inner | epoch 004: 65 / 3002 loss=2.584, ppl=6, wps=5907.4, ups=0.09, wpb=64842, bsz=128, num_updates=9014, lr=9.99359e-05, gnorm=2.771, loss_scale=1, train_wall=11, gb_free=2.8, wall=104058 2021-06-19 23:33:15 | INFO | train_inner | epoch 004: 66 / 3002 loss=2.705, ppl=6.52, wps=5804.2, ups=0.09, wpb=64813, bsz=128, num_updates=9015, lr=9.99359e-05, gnorm=2.197, loss_scale=1, train_wall=11, gb_free=2.8, wall=104069 2021-06-19 23:33:26 | INFO | train_inner | epoch 004: 67 / 3002 loss=2.616, ppl=6.13, wps=5820.2, ups=0.09, wpb=64839, bsz=128, num_updates=9016, lr=9.99359e-05, gnorm=2.226, loss_scale=1, train_wall=11, gb_free=2.8, wall=104080 2021-06-19 23:33:37 | INFO | train_inner | epoch 004: 68 / 3002 loss=2.64, ppl=6.24, wps=5885.3, ups=0.09, wpb=64824, bsz=128, num_updates=9017, lr=9.99359e-05, gnorm=2.128, loss_scale=1, train_wall=11, gb_free=2.8, wall=104091 2021-06-19 23:33:48 | INFO | train_inner | epoch 004: 69 / 3002 loss=2.647, ppl=6.26, wps=5984.6, ups=0.09, wpb=64889, bsz=128, num_updates=9018, lr=9.99359e-05, gnorm=2.268, loss_scale=1, train_wall=10, gb_free=2.8, wall=104102 2021-06-19 23:33:59 | INFO | train_inner | epoch 004: 70 / 3002 loss=2.564, ppl=5.91, wps=5834, ups=0.09, wpb=64866, bsz=128, num_updates=9019, lr=9.99358e-05, gnorm=2.115, loss_scale=1, train_wall=11, gb_free=2.8, wall=104113 2021-06-19 23:34:10 | INFO | train_inner | epoch 004: 71 / 3002 loss=2.599, ppl=6.06, wps=5825.9, ups=0.09, wpb=64460, bsz=128, num_updates=9020, lr=9.99358e-05, gnorm=3.296, loss_scale=1, train_wall=11, gb_free=2.8, wall=104124 2021-06-19 23:34:21 | INFO | train_inner | epoch 004: 72 / 3002 loss=2.638, ppl=6.23, wps=5876.9, ups=0.09, wpb=64916, bsz=128, num_updates=9021, lr=9.99358e-05, gnorm=2.119, loss_scale=1, train_wall=11, gb_free=2.8, wall=104136 2021-06-19 23:34:32 | INFO | train_inner | epoch 004: 73 / 3002 loss=2.621, ppl=6.15, wps=5805.6, ups=0.09, wpb=64835, bsz=128, num_updates=9022, lr=9.99358e-05, gnorm=2.827, loss_scale=1, train_wall=11, gb_free=2.8, wall=104147 2021-06-19 23:34:43 | INFO | train_inner | epoch 004: 74 / 3002 loss=2.672, ppl=6.37, wps=5834.4, ups=0.09, wpb=64833, bsz=128, num_updates=9023, lr=9.99358e-05, gnorm=2.164, loss_scale=1, train_wall=11, gb_free=2.8, wall=104158 2021-06-19 23:34:55 | INFO | train_inner | epoch 004: 75 / 3002 loss=2.58, ppl=5.98, wps=5818.5, ups=0.09, wpb=64847, bsz=128, num_updates=9024, lr=9.99358e-05, gnorm=2.166, loss_scale=1, train_wall=11, gb_free=2.8, wall=104169 2021-06-19 23:35:06 | INFO | train_inner | epoch 004: 76 / 3002 loss=2.666, ppl=6.35, wps=5855.4, ups=0.09, wpb=64854, bsz=128, num_updates=9025, lr=9.99358e-05, gnorm=2.709, loss_scale=1, train_wall=11, gb_free=2.8, wall=104180 2021-06-19 23:35:17 | INFO | train_inner | epoch 004: 77 / 3002 loss=2.611, ppl=6.11, wps=5907, ups=0.09, wpb=64813, bsz=128, num_updates=9026, lr=9.99358e-05, gnorm=2.272, loss_scale=1, train_wall=10, gb_free=2.8, wall=104191 2021-06-19 23:35:28 | INFO | train_inner | epoch 004: 78 / 3002 loss=2.618, ppl=6.14, wps=5913.1, ups=0.09, wpb=64852, bsz=128, num_updates=9027, lr=9.99358e-05, gnorm=2.327, loss_scale=1, train_wall=10, gb_free=2.8, wall=104202 2021-06-19 23:35:39 | INFO | train_inner | epoch 004: 79 / 3002 loss=2.578, ppl=5.97, wps=5776.6, ups=0.09, wpb=64841, bsz=128, num_updates=9028, lr=9.99358e-05, gnorm=2.404, loss_scale=1, train_wall=11, gb_free=2.8, wall=104213 2021-06-19 23:35:50 | INFO | train_inner | epoch 004: 80 / 3002 loss=2.686, ppl=6.44, wps=5825, ups=0.09, wpb=64767, bsz=128, num_updates=9029, lr=9.99358e-05, gnorm=2.307, loss_scale=1, train_wall=11, gb_free=2.8, wall=104224 2021-06-19 23:36:01 | INFO | train_inner | epoch 004: 81 / 3002 loss=2.463, ppl=5.51, wps=5915.6, ups=0.09, wpb=64910, bsz=128, num_updates=9030, lr=9.99358e-05, gnorm=2.507, loss_scale=1, train_wall=11, gb_free=2.8, wall=104235 2021-06-19 23:36:12 | INFO | train_inner | epoch 004: 82 / 3002 loss=2.645, ppl=6.26, wps=5860.8, ups=0.09, wpb=64710, bsz=128, num_updates=9031, lr=9.99357e-05, gnorm=2.244, loss_scale=1, train_wall=11, gb_free=2.8, wall=104246 2021-06-19 23:36:23 | INFO | train_inner | epoch 004: 83 / 3002 loss=2.651, ppl=6.28, wps=5885.5, ups=0.09, wpb=64871, bsz=128, num_updates=9032, lr=9.99357e-05, gnorm=2.154, loss_scale=1, train_wall=11, gb_free=2.8, wall=104257 2021-06-19 23:36:34 | INFO | train_inner | epoch 004: 84 / 3002 loss=2.63, ppl=6.19, wps=5884.1, ups=0.09, wpb=64825, bsz=128, num_updates=9033, lr=9.99357e-05, gnorm=2.079, loss_scale=1, train_wall=11, gb_free=2.8, wall=104268 2021-06-19 23:36:45 | INFO | train_inner | epoch 004: 85 / 3002 loss=2.674, ppl=6.38, wps=5810.3, ups=0.09, wpb=64797, bsz=128, num_updates=9034, lr=9.99357e-05, gnorm=2.273, loss_scale=1, train_wall=11, gb_free=2.8, wall=104280 2021-06-19 23:36:56 | INFO | train_inner | epoch 004: 86 / 3002 loss=2.588, ppl=6.01, wps=5899.6, ups=0.09, wpb=64930, bsz=128, num_updates=9035, lr=9.99357e-05, gnorm=2.273, loss_scale=1, train_wall=11, gb_free=2.8, wall=104291 2021-06-19 23:37:07 | INFO | train_inner | epoch 004: 87 / 3002 loss=2.638, ppl=6.23, wps=5844.8, ups=0.09, wpb=64734, bsz=128, num_updates=9036, lr=9.99357e-05, gnorm=2.061, loss_scale=1, train_wall=11, gb_free=2.8, wall=104302 2021-06-19 23:37:18 | INFO | train_inner | epoch 004: 88 / 3002 loss=2.697, ppl=6.48, wps=5892.6, ups=0.09, wpb=64854, bsz=128, num_updates=9037, lr=9.99357e-05, gnorm=2.477, loss_scale=1, train_wall=11, gb_free=2.8, wall=104313 2021-06-19 23:37:30 | INFO | train_inner | epoch 004: 89 / 3002 loss=2.493, ppl=5.63, wps=5700, ups=0.09, wpb=64754, bsz=128, num_updates=9038, lr=9.99357e-05, gnorm=2.394, loss_scale=1, train_wall=11, gb_free=2.8, wall=104324 2021-06-19 23:37:41 | INFO | train_inner | epoch 004: 90 / 3002 loss=2.611, ppl=6.11, wps=5893.5, ups=0.09, wpb=64728, bsz=128, num_updates=9039, lr=9.99357e-05, gnorm=2.143, loss_scale=1, train_wall=11, gb_free=2.8, wall=104335 2021-06-19 23:37:52 | INFO | train_inner | epoch 004: 91 / 3002 loss=2.637, ppl=6.22, wps=5931.5, ups=0.09, wpb=64836, bsz=128, num_updates=9040, lr=9.99357e-05, gnorm=2.564, loss_scale=1, train_wall=10, gb_free=2.8, wall=104346 2021-06-19 23:38:03 | INFO | train_inner | epoch 004: 92 / 3002 loss=2.548, ppl=5.85, wps=5825.9, ups=0.09, wpb=64875, bsz=128, num_updates=9041, lr=9.99357e-05, gnorm=2.31, loss_scale=1, train_wall=11, gb_free=2.8, wall=104357 2021-06-19 23:38:14 | INFO | train_inner | epoch 004: 93 / 3002 loss=2.735, ppl=6.66, wps=5810.6, ups=0.09, wpb=64850, bsz=128, num_updates=9042, lr=9.99357e-05, gnorm=2.266, loss_scale=1, train_wall=11, gb_free=2.8, wall=104368 2021-06-19 23:38:25 | INFO | train_inner | epoch 004: 94 / 3002 loss=2.518, ppl=5.73, wps=5943.5, ups=0.09, wpb=64934, bsz=128, num_updates=9043, lr=9.99357e-05, gnorm=2.146, loss_scale=1, train_wall=10, gb_free=2.8, wall=104379 2021-06-19 23:38:36 | INFO | train_inner | epoch 004: 95 / 3002 loss=2.767, ppl=6.81, wps=5851.2, ups=0.09, wpb=64859, bsz=128, num_updates=9044, lr=9.99356e-05, gnorm=6.565, loss_scale=1, train_wall=11, gb_free=2.8, wall=104390 2021-06-19 23:38:47 | INFO | train_inner | epoch 004: 96 / 3002 loss=2.661, ppl=6.32, wps=5979.7, ups=0.09, wpb=64927, bsz=128, num_updates=9045, lr=9.99356e-05, gnorm=2.947, loss_scale=1, train_wall=10, gb_free=2.8, wall=104401 2021-06-19 23:38:58 | INFO | train_inner | epoch 004: 97 / 3002 loss=2.526, ppl=5.76, wps=5887.9, ups=0.09, wpb=64885, bsz=128, num_updates=9046, lr=9.99356e-05, gnorm=2.259, loss_scale=1, train_wall=11, gb_free=2.8, wall=104412 2021-06-19 23:39:09 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 2021-06-19 23:39:20 | INFO | train_inner | epoch 004: 99 / 3002 loss=2.673, ppl=6.38, wps=2950.5, ups=0.05, wpb=64798, bsz=128, num_updates=9047, lr=9.99356e-05, gnorm=2.185, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=104434 2021-06-19 23:39:31 | INFO | train_inner | epoch 004: 100 / 3002 loss=2.627, ppl=6.18, wps=5856.1, ups=0.09, wpb=64839, bsz=128, num_updates=9048, lr=9.99356e-05, gnorm=3.124, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104445 2021-06-19 23:39:42 | INFO | train_inner | epoch 004: 101 / 3002 loss=2.425, ppl=5.37, wps=5803.3, ups=0.09, wpb=64815, bsz=128, num_updates=9049, lr=9.99356e-05, gnorm=16.203, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104456 2021-06-19 23:39:53 | INFO | train_inner | epoch 004: 102 / 3002 loss=2.598, ppl=6.05, wps=5908.5, ups=0.09, wpb=64814, bsz=128, num_updates=9050, lr=9.99356e-05, gnorm=2.317, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104467 2021-06-19 23:40:04 | INFO | train_inner | epoch 004: 103 / 3002 loss=2.471, ppl=5.55, wps=5957.5, ups=0.09, wpb=64835, bsz=128, num_updates=9051, lr=9.99356e-05, gnorm=2.024, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104478 2021-06-19 23:40:15 | INFO | train_inner | epoch 004: 104 / 3002 loss=2.588, ppl=6.01, wps=5965.8, ups=0.09, wpb=64898, bsz=128, num_updates=9052, lr=9.99356e-05, gnorm=2.102, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104489 2021-06-19 23:40:26 | INFO | train_inner | epoch 004: 105 / 3002 loss=2.546, ppl=5.84, wps=5953.7, ups=0.09, wpb=64871, bsz=128, num_updates=9053, lr=9.99356e-05, gnorm=2.088, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104500 2021-06-19 23:40:37 | INFO | train_inner | epoch 004: 106 / 3002 loss=2.502, ppl=5.66, wps=5791.6, ups=0.09, wpb=64822, bsz=128, num_updates=9054, lr=9.99356e-05, gnorm=2.223, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104511 2021-06-19 23:40:48 | INFO | train_inner | epoch 004: 107 / 3002 loss=2.688, ppl=6.44, wps=5897.9, ups=0.09, wpb=64801, bsz=128, num_updates=9055, lr=9.99356e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104522 2021-06-19 23:40:59 | INFO | train_inner | epoch 004: 108 / 3002 loss=2.759, ppl=6.77, wps=5925.5, ups=0.09, wpb=64812, bsz=128, num_updates=9056, lr=9.99355e-05, gnorm=2.251, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104533 2021-06-19 23:41:10 | INFO | train_inner | epoch 004: 109 / 3002 loss=2.573, ppl=5.95, wps=5910.6, ups=0.09, wpb=64782, bsz=128, num_updates=9057, lr=9.99355e-05, gnorm=2.107, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104544 2021-06-19 23:41:21 | INFO | train_inner | epoch 004: 110 / 3002 loss=2.521, ppl=5.74, wps=5906.5, ups=0.09, wpb=64878, bsz=128, num_updates=9058, lr=9.99355e-05, gnorm=2.31, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104555 2021-06-19 23:41:32 | INFO | train_inner | epoch 004: 111 / 3002 loss=2.785, ppl=6.89, wps=5829.2, ups=0.09, wpb=64817, bsz=128, num_updates=9059, lr=9.99355e-05, gnorm=2.246, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104566 2021-06-19 23:41:43 | INFO | train_inner | epoch 004: 112 / 3002 loss=2.537, ppl=5.8, wps=5844.3, ups=0.09, wpb=64856, bsz=128, num_updates=9060, lr=9.99355e-05, gnorm=2.294, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104577 2021-06-19 23:41:54 | INFO | train_inner | epoch 004: 113 / 3002 loss=2.569, ppl=5.94, wps=5912, ups=0.09, wpb=64830, bsz=128, num_updates=9061, lr=9.99355e-05, gnorm=2.259, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104588 2021-06-19 23:42:05 | INFO | train_inner | epoch 004: 114 / 3002 loss=2.851, ppl=7.21, wps=5894.1, ups=0.09, wpb=64829, bsz=128, num_updates=9062, lr=9.99355e-05, gnorm=2.429, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104599 2021-06-19 23:42:16 | INFO | train_inner | epoch 004: 115 / 3002 loss=2.477, ppl=5.57, wps=5812.8, ups=0.09, wpb=64912, bsz=128, num_updates=9063, lr=9.99355e-05, gnorm=3.431, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104610 2021-06-19 23:42:27 | INFO | train_inner | epoch 004: 116 / 3002 loss=2.738, ppl=6.67, wps=5960.9, ups=0.09, wpb=64851, bsz=128, num_updates=9064, lr=9.99355e-05, gnorm=3.455, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104621 2021-06-19 23:42:38 | INFO | train_inner | epoch 004: 117 / 3002 loss=2.501, ppl=5.66, wps=5723.4, ups=0.09, wpb=64832, bsz=128, num_updates=9065, lr=9.99355e-05, gnorm=2.389, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104633 2021-06-19 23:42:49 | INFO | train_inner | epoch 004: 118 / 3002 loss=2.518, ppl=5.73, wps=5918.6, ups=0.09, wpb=64902, bsz=128, num_updates=9066, lr=9.99355e-05, gnorm=3.039, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104643 2021-06-19 23:43:00 | INFO | train_inner | epoch 004: 119 / 3002 loss=2.645, ppl=6.25, wps=5943.3, ups=0.09, wpb=64893, bsz=128, num_updates=9067, lr=9.99355e-05, gnorm=2.128, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104654 2021-06-19 23:43:11 | INFO | train_inner | epoch 004: 120 / 3002 loss=2.515, ppl=5.72, wps=5834.2, ups=0.09, wpb=64871, bsz=128, num_updates=9068, lr=9.99355e-05, gnorm=74.549, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104666 2021-06-19 23:43:22 | INFO | train_inner | epoch 004: 121 / 3002 loss=2.523, ppl=5.75, wps=5768.1, ups=0.09, wpb=64816, bsz=128, num_updates=9069, lr=9.99354e-05, gnorm=2.278, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104677 2021-06-19 23:43:33 | INFO | train_inner | epoch 004: 122 / 3002 loss=2.681, ppl=6.41, wps=5937.9, ups=0.09, wpb=64908, bsz=128, num_updates=9070, lr=9.99354e-05, gnorm=3.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104688 2021-06-19 23:43:44 | INFO | train_inner | epoch 004: 123 / 3002 loss=2.614, ppl=6.12, wps=5871.5, ups=0.09, wpb=64804, bsz=128, num_updates=9071, lr=9.99354e-05, gnorm=2.481, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104699 2021-06-19 23:43:55 | INFO | train_inner | epoch 004: 124 / 3002 loss=2.618, ppl=6.14, wps=5854.3, ups=0.09, wpb=64825, bsz=128, num_updates=9072, lr=9.99354e-05, gnorm=2.554, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104710 2021-06-19 23:44:07 | INFO | train_inner | epoch 004: 125 / 3002 loss=2.66, ppl=6.32, wps=5804.8, ups=0.09, wpb=64793, bsz=128, num_updates=9073, lr=9.99354e-05, gnorm=2.86, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104721 2021-06-19 23:44:17 | INFO | train_inner | epoch 004: 126 / 3002 loss=2.749, ppl=6.72, wps=5992, ups=0.09, wpb=64830, bsz=128, num_updates=9074, lr=9.99354e-05, gnorm=3.559, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104732 2021-06-19 23:44:29 | INFO | train_inner | epoch 004: 127 / 3002 loss=2.695, ppl=6.47, wps=5759.5, ups=0.09, wpb=64802, bsz=128, num_updates=9075, lr=9.99354e-05, gnorm=3.054, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104743 2021-06-19 23:44:40 | INFO | train_inner | epoch 004: 128 / 3002 loss=2.724, ppl=6.61, wps=5820.2, ups=0.09, wpb=64815, bsz=128, num_updates=9076, lr=9.99354e-05, gnorm=2.782, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104754 2021-06-19 23:44:51 | INFO | train_inner | epoch 004: 129 / 3002 loss=2.654, ppl=6.3, wps=5835, ups=0.09, wpb=64829, bsz=128, num_updates=9077, lr=9.99354e-05, gnorm=2.752, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104765 2021-06-19 23:45:02 | INFO | train_inner | epoch 004: 130 / 3002 loss=2.707, ppl=6.53, wps=5807.8, ups=0.09, wpb=64837, bsz=128, num_updates=9078, lr=9.99354e-05, gnorm=2.458, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104776 2021-06-19 23:45:13 | INFO | train_inner | epoch 004: 131 / 3002 loss=2.657, ppl=6.31, wps=5893.7, ups=0.09, wpb=64789, bsz=128, num_updates=9079, lr=9.99354e-05, gnorm=2.345, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104787 2021-06-19 23:45:24 | INFO | train_inner | epoch 004: 132 / 3002 loss=2.64, ppl=6.23, wps=5729.5, ups=0.09, wpb=64858, bsz=128, num_updates=9080, lr=9.99354e-05, gnorm=2.533, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104799 2021-06-19 23:45:35 | INFO | train_inner | epoch 004: 133 / 3002 loss=2.494, ppl=5.63, wps=5940.2, ups=0.09, wpb=64876, bsz=128, num_updates=9081, lr=9.99353e-05, gnorm=3.12, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104810 2021-06-19 23:45:46 | INFO | train_inner | epoch 004: 134 / 3002 loss=2.83, ppl=7.11, wps=5858.8, ups=0.09, wpb=64811, bsz=128, num_updates=9082, lr=9.99353e-05, gnorm=2.272, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104821 2021-06-19 23:45:58 | INFO | train_inner | epoch 004: 135 / 3002 loss=2.692, ppl=6.46, wps=5784.6, ups=0.09, wpb=64867, bsz=128, num_updates=9083, lr=9.99353e-05, gnorm=2.172, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104832 2021-06-19 23:46:09 | INFO | train_inner | epoch 004: 136 / 3002 loss=2.471, ppl=5.55, wps=5850.2, ups=0.09, wpb=64820, bsz=128, num_updates=9084, lr=9.99353e-05, gnorm=2.187, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104843 2021-06-19 23:46:20 | INFO | train_inner | epoch 004: 137 / 3002 loss=2.624, ppl=6.16, wps=5807.5, ups=0.09, wpb=64914, bsz=128, num_updates=9085, lr=9.99353e-05, gnorm=2.287, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104854 2021-06-19 23:46:31 | INFO | train_inner | epoch 004: 138 / 3002 loss=2.542, ppl=5.82, wps=5858.7, ups=0.09, wpb=64871, bsz=128, num_updates=9086, lr=9.99353e-05, gnorm=2.109, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104865 2021-06-19 23:46:42 | INFO | train_inner | epoch 004: 139 / 3002 loss=2.659, ppl=6.32, wps=5764.8, ups=0.09, wpb=64870, bsz=128, num_updates=9087, lr=9.99353e-05, gnorm=8.641, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104877 2021-06-19 23:46:53 | INFO | train_inner | epoch 004: 140 / 3002 loss=2.694, ppl=6.47, wps=5818.2, ups=0.09, wpb=64762, bsz=128, num_updates=9088, lr=9.99353e-05, gnorm=2.27, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104888 2021-06-19 23:47:04 | INFO | train_inner | epoch 004: 141 / 3002 loss=2.602, ppl=6.07, wps=5960.1, ups=0.09, wpb=64827, bsz=128, num_updates=9089, lr=9.99353e-05, gnorm=2.164, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104899 2021-06-19 23:47:15 | INFO | train_inner | epoch 004: 142 / 3002 loss=2.818, ppl=7.05, wps=5827.7, ups=0.09, wpb=64827, bsz=128, num_updates=9090, lr=9.99353e-05, gnorm=2.791, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104910 2021-06-19 23:47:27 | INFO | train_inner | epoch 004: 143 / 3002 loss=2.608, ppl=6.1, wps=5782.2, ups=0.09, wpb=64844, bsz=128, num_updates=9091, lr=9.99353e-05, gnorm=2.614, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104921 2021-06-19 23:47:38 | INFO | train_inner | epoch 004: 144 / 3002 loss=2.578, ppl=5.97, wps=5886.5, ups=0.09, wpb=64805, bsz=128, num_updates=9092, lr=9.99353e-05, gnorm=5.421, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104932 2021-06-19 23:47:49 | INFO | train_inner | epoch 004: 145 / 3002 loss=2.622, ppl=6.16, wps=5917.9, ups=0.09, wpb=64838, bsz=128, num_updates=9093, lr=9.99353e-05, gnorm=2.109, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104943 2021-06-19 23:48:00 | INFO | train_inner | epoch 004: 146 / 3002 loss=2.581, ppl=5.98, wps=5867.1, ups=0.09, wpb=64838, bsz=128, num_updates=9094, lr=9.99352e-05, gnorm=2.207, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104954 2021-06-19 23:48:11 | INFO | train_inner | epoch 004: 147 / 3002 loss=2.509, ppl=5.69, wps=5768.5, ups=0.09, wpb=64801, bsz=128, num_updates=9095, lr=9.99352e-05, gnorm=3.608, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104965 2021-06-19 23:48:22 | INFO | train_inner | epoch 004: 148 / 3002 loss=2.756, ppl=6.75, wps=5839.5, ups=0.09, wpb=64807, bsz=128, num_updates=9096, lr=9.99352e-05, gnorm=2.577, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104976 2021-06-19 23:48:33 | INFO | train_inner | epoch 004: 149 / 3002 loss=2.57, ppl=5.94, wps=5858.5, ups=0.09, wpb=64762, bsz=128, num_updates=9097, lr=9.99352e-05, gnorm=2.279, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104987 2021-06-19 23:48:44 | INFO | train_inner | epoch 004: 150 / 3002 loss=2.549, ppl=5.85, wps=5808.4, ups=0.09, wpb=64828, bsz=128, num_updates=9098, lr=9.99352e-05, gnorm=2.214, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104998 2021-06-19 23:48:55 | INFO | train_inner | epoch 004: 151 / 3002 loss=2.634, ppl=6.21, wps=5868.7, ups=0.09, wpb=64882, bsz=128, num_updates=9099, lr=9.99352e-05, gnorm=2.117, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105009 2021-06-19 23:49:06 | INFO | train_inner | epoch 004: 152 / 3002 loss=2.593, ppl=6.03, wps=5872.5, ups=0.09, wpb=64775, bsz=128, num_updates=9100, lr=9.99352e-05, gnorm=2.207, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105021 2021-06-19 23:49:17 | INFO | train_inner | epoch 004: 153 / 3002 loss=2.575, ppl=5.96, wps=5870.1, ups=0.09, wpb=64797, bsz=128, num_updates=9101, lr=9.99352e-05, gnorm=2.226, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105032 2021-06-19 23:49:28 | INFO | train_inner | epoch 004: 154 / 3002 loss=2.571, ppl=5.94, wps=5911.1, ups=0.09, wpb=64838, bsz=128, num_updates=9102, lr=9.99352e-05, gnorm=7.422, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105043 2021-06-19 23:49:39 | INFO | train_inner | epoch 004: 155 / 3002 loss=2.662, ppl=6.33, wps=5884.3, ups=0.09, wpb=64847, bsz=128, num_updates=9103, lr=9.99352e-05, gnorm=2.215, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105054 2021-06-19 23:49:50 | INFO | train_inner | epoch 004: 156 / 3002 loss=2.654, ppl=6.3, wps=5885.5, ups=0.09, wpb=64793, bsz=128, num_updates=9104, lr=9.99352e-05, gnorm=2.217, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105065 2021-06-19 23:50:01 | INFO | train_inner | epoch 004: 157 / 3002 loss=2.774, ppl=6.84, wps=5813.1, ups=0.09, wpb=64766, bsz=128, num_updates=9105, lr=9.99352e-05, gnorm=6.37, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105076 2021-06-19 23:50:12 | INFO | train_inner | epoch 004: 158 / 3002 loss=2.71, ppl=6.54, wps=5839.7, ups=0.09, wpb=64847, bsz=128, num_updates=9106, lr=9.99351e-05, gnorm=2.103, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105087 2021-06-19 23:50:24 | INFO | train_inner | epoch 004: 159 / 3002 loss=2.623, ppl=6.16, wps=5868.8, ups=0.09, wpb=64847, bsz=128, num_updates=9107, lr=9.99351e-05, gnorm=2.198, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105098 2021-06-19 23:50:35 | INFO | train_inner | epoch 004: 160 / 3002 loss=2.523, ppl=5.75, wps=5794.5, ups=0.09, wpb=64842, bsz=128, num_updates=9108, lr=9.99351e-05, gnorm=2.066, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105109 2021-06-19 23:50:46 | INFO | train_inner | epoch 004: 161 / 3002 loss=2.539, ppl=5.81, wps=5886.2, ups=0.09, wpb=64863, bsz=128, num_updates=9109, lr=9.99351e-05, gnorm=2.234, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105120 2021-06-19 23:50:57 | INFO | train_inner | epoch 004: 162 / 3002 loss=2.52, ppl=5.73, wps=5753.4, ups=0.09, wpb=64829, bsz=128, num_updates=9110, lr=9.99351e-05, gnorm=2.192, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105131 2021-06-19 23:51:08 | INFO | train_inner | epoch 004: 163 / 3002 loss=2.616, ppl=6.13, wps=5935.8, ups=0.09, wpb=64714, bsz=128, num_updates=9111, lr=9.99351e-05, gnorm=2.27, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105142 2021-06-19 23:51:19 | INFO | train_inner | epoch 004: 164 / 3002 loss=2.663, ppl=6.33, wps=5802.9, ups=0.09, wpb=64795, bsz=128, num_updates=9112, lr=9.99351e-05, gnorm=2.654, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105153 2021-06-19 23:51:30 | INFO | train_inner | epoch 004: 165 / 3002 loss=2.446, ppl=5.45, wps=5805.9, ups=0.09, wpb=64816, bsz=128, num_updates=9113, lr=9.99351e-05, gnorm=13.197, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105165 2021-06-19 23:51:41 | INFO | train_inner | epoch 004: 166 / 3002 loss=2.546, ppl=5.84, wps=5803.3, ups=0.09, wpb=64885, bsz=128, num_updates=9114, lr=9.99351e-05, gnorm=3.237, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105176 2021-06-19 23:51:52 | INFO | train_inner | epoch 004: 167 / 3002 loss=2.727, ppl=6.62, wps=5868.6, ups=0.09, wpb=64811, bsz=128, num_updates=9115, lr=9.99351e-05, gnorm=2.237, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105187 2021-06-19 23:52:04 | INFO | train_inner | epoch 004: 168 / 3002 loss=2.559, ppl=5.89, wps=5818.7, ups=0.09, wpb=64848, bsz=128, num_updates=9116, lr=9.99351e-05, gnorm=2.15, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105198 2021-06-19 23:52:15 | INFO | train_inner | epoch 004: 169 / 3002 loss=2.627, ppl=6.18, wps=5859.9, ups=0.09, wpb=64814, bsz=128, num_updates=9117, lr=9.99351e-05, gnorm=6.975, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105209 2021-06-19 23:52:26 | INFO | train_inner | epoch 004: 170 / 3002 loss=2.57, ppl=5.94, wps=5753.1, ups=0.09, wpb=64784, bsz=128, num_updates=9118, lr=9.99351e-05, gnorm=2.153, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105220 2021-06-19 23:52:37 | INFO | train_inner | epoch 004: 171 / 3002 loss=2.613, ppl=6.12, wps=5902.7, ups=0.09, wpb=64895, bsz=128, num_updates=9119, lr=9.9935e-05, gnorm=2.156, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105231 2021-06-19 23:52:48 | INFO | train_inner | epoch 004: 172 / 3002 loss=2.667, ppl=6.35, wps=5752.7, ups=0.09, wpb=64763, bsz=128, num_updates=9120, lr=9.9935e-05, gnorm=2.391, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105243 2021-06-19 23:52:59 | INFO | train_inner | epoch 004: 173 / 3002 loss=2.543, ppl=5.83, wps=5842.5, ups=0.09, wpb=64862, bsz=128, num_updates=9121, lr=9.9935e-05, gnorm=2.123, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105254 2021-06-19 23:53:10 | INFO | train_inner | epoch 004: 174 / 3002 loss=2.645, ppl=6.26, wps=5962.4, ups=0.09, wpb=64819, bsz=128, num_updates=9122, lr=9.9935e-05, gnorm=2.196, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105264 2021-06-19 23:53:21 | INFO | train_inner | epoch 004: 175 / 3002 loss=2.43, ppl=5.39, wps=5858.5, ups=0.09, wpb=64863, bsz=128, num_updates=9123, lr=9.9935e-05, gnorm=2.217, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105276 2021-06-19 23:53:32 | INFO | train_inner | epoch 004: 176 / 3002 loss=2.513, ppl=5.71, wps=5910.5, ups=0.09, wpb=64856, bsz=128, num_updates=9124, lr=9.9935e-05, gnorm=2.215, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105287 2021-06-19 23:53:43 | INFO | train_inner | epoch 004: 177 / 3002 loss=2.588, ppl=6.01, wps=5837.7, ups=0.09, wpb=64793, bsz=128, num_updates=9125, lr=9.9935e-05, gnorm=2.889, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105298 2021-06-19 23:53:55 | INFO | train_inner | epoch 004: 178 / 3002 loss=2.662, ppl=6.33, wps=5755.7, ups=0.09, wpb=64763, bsz=128, num_updates=9126, lr=9.9935e-05, gnorm=2.177, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105309 2021-06-19 23:54:06 | INFO | train_inner | epoch 004: 179 / 3002 loss=2.641, ppl=6.24, wps=5841.6, ups=0.09, wpb=64830, bsz=128, num_updates=9127, lr=9.9935e-05, gnorm=2.113, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105320 2021-06-19 23:54:17 | INFO | train_inner | epoch 004: 180 / 3002 loss=2.738, ppl=6.67, wps=5878.5, ups=0.09, wpb=64803, bsz=128, num_updates=9128, lr=9.9935e-05, gnorm=2.21, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105331 2021-06-19 23:54:28 | INFO | train_inner | epoch 004: 181 / 3002 loss=2.489, ppl=5.61, wps=5937.8, ups=0.09, wpb=64854, bsz=128, num_updates=9129, lr=9.9935e-05, gnorm=2.077, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105342 2021-06-19 23:54:39 | INFO | train_inner | epoch 004: 182 / 3002 loss=2.5, ppl=5.66, wps=5912.9, ups=0.09, wpb=64861, bsz=128, num_updates=9130, lr=9.9935e-05, gnorm=2.764, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105353 2021-06-19 23:54:50 | INFO | train_inner | epoch 004: 183 / 3002 loss=2.582, ppl=5.99, wps=5874.4, ups=0.09, wpb=64821, bsz=128, num_updates=9131, lr=9.99349e-05, gnorm=2.185, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105364 2021-06-19 23:55:01 | INFO | train_inner | epoch 004: 184 / 3002 loss=2.502, ppl=5.66, wps=5843.9, ups=0.09, wpb=64911, bsz=128, num_updates=9132, lr=9.99349e-05, gnorm=2.171, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105375 2021-06-19 23:55:12 | INFO | train_inner | epoch 004: 185 / 3002 loss=2.412, ppl=5.32, wps=5839.1, ups=0.09, wpb=64831, bsz=128, num_updates=9133, lr=9.99349e-05, gnorm=2.135, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105386 2021-06-19 23:55:23 | INFO | train_inner | epoch 004: 186 / 3002 loss=2.555, ppl=5.88, wps=5902, ups=0.09, wpb=64817, bsz=128, num_updates=9134, lr=9.99349e-05, gnorm=2.151, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105397 2021-06-19 23:55:34 | INFO | train_inner | epoch 004: 187 / 3002 loss=2.709, ppl=6.54, wps=5862.6, ups=0.09, wpb=64722, bsz=128, num_updates=9135, lr=9.99349e-05, gnorm=2.27, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105408 2021-06-19 23:55:45 | INFO | train_inner | epoch 004: 188 / 3002 loss=2.625, ppl=6.17, wps=5822.1, ups=0.09, wpb=64842, bsz=128, num_updates=9136, lr=9.99349e-05, gnorm=2.101, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105419 2021-06-19 23:55:56 | INFO | train_inner | epoch 004: 189 / 3002 loss=2.45, ppl=5.47, wps=5922, ups=0.09, wpb=64821, bsz=128, num_updates=9137, lr=9.99349e-05, gnorm=3.674, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105430 2021-06-19 23:56:07 | INFO | train_inner | epoch 004: 190 / 3002 loss=2.688, ppl=6.45, wps=5890.9, ups=0.09, wpb=64855, bsz=128, num_updates=9138, lr=9.99349e-05, gnorm=2.208, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105441 2021-06-19 23:56:18 | INFO | train_inner | epoch 004: 191 / 3002 loss=2.61, ppl=6.11, wps=5905.1, ups=0.09, wpb=64819, bsz=128, num_updates=9139, lr=9.99349e-05, gnorm=2.089, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105452 2021-06-19 23:56:29 | INFO | train_inner | epoch 004: 192 / 3002 loss=2.567, ppl=5.93, wps=6049.3, ups=0.09, wpb=64900, bsz=128, num_updates=9140, lr=9.99349e-05, gnorm=2.201, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105463 2021-06-19 23:56:40 | INFO | train_inner | epoch 004: 193 / 3002 loss=2.707, ppl=6.53, wps=5812.4, ups=0.09, wpb=64863, bsz=128, num_updates=9141, lr=9.99349e-05, gnorm=2.074, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105474 2021-06-19 23:56:51 | INFO | train_inner | epoch 004: 194 / 3002 loss=2.686, ppl=6.43, wps=5955.1, ups=0.09, wpb=64832, bsz=128, num_updates=9142, lr=9.99349e-05, gnorm=2.079, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105485 2021-06-19 23:57:02 | INFO | train_inner | epoch 004: 195 / 3002 loss=2.619, ppl=6.14, wps=5857.3, ups=0.09, wpb=64826, bsz=128, num_updates=9143, lr=9.99349e-05, gnorm=2.126, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105496 2021-06-19 23:57:13 | INFO | train_inner | epoch 004: 196 / 3002 loss=2.549, ppl=5.85, wps=5897.9, ups=0.09, wpb=64830, bsz=128, num_updates=9144, lr=9.99348e-05, gnorm=2.149, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105507 2021-06-19 23:57:24 | INFO | train_inner | epoch 004: 197 / 3002 loss=2.601, ppl=6.07, wps=5853.1, ups=0.09, wpb=64699, bsz=128, num_updates=9145, lr=9.99348e-05, gnorm=2.066, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105518 2021-06-19 23:57:35 | INFO | train_inner | epoch 004: 198 / 3002 loss=2.611, ppl=6.11, wps=5780.7, ups=0.09, wpb=64870, bsz=128, num_updates=9146, lr=9.99348e-05, gnorm=2.159, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105529 2021-06-19 23:57:46 | INFO | train_inner | epoch 004: 199 / 3002 loss=2.626, ppl=6.17, wps=5836.8, ups=0.09, wpb=64817, bsz=128, num_updates=9147, lr=9.99348e-05, gnorm=2.02, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105540 2021-06-19 23:57:57 | INFO | train_inner | epoch 004: 200 / 3002 loss=2.643, ppl=6.24, wps=5732.1, ups=0.09, wpb=64804, bsz=128, num_updates=9148, lr=9.99348e-05, gnorm=2.219, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105552 2021-06-19 23:58:08 | INFO | train_inner | epoch 004: 201 / 3002 loss=2.696, ppl=6.48, wps=5919.1, ups=0.09, wpb=64843, bsz=128, num_updates=9149, lr=9.99348e-05, gnorm=2.243, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105563 2021-06-19 23:58:19 | INFO | train_inner | epoch 004: 202 / 3002 loss=2.574, ppl=5.96, wps=5920.8, ups=0.09, wpb=64828, bsz=128, num_updates=9150, lr=9.99348e-05, gnorm=2.152, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105574 2021-06-19 23:58:30 | INFO | train_inner | epoch 004: 203 / 3002 loss=2.572, ppl=5.95, wps=5859.2, ups=0.09, wpb=64891, bsz=128, num_updates=9151, lr=9.99348e-05, gnorm=2.121, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105585 2021-06-19 23:58:41 | INFO | train_inner | epoch 004: 204 / 3002 loss=2.514, ppl=5.71, wps=5844, ups=0.09, wpb=64762, bsz=128, num_updates=9152, lr=9.99348e-05, gnorm=2.583, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105596 2021-06-19 23:58:53 | INFO | train_inner | epoch 004: 205 / 3002 loss=2.621, ppl=6.15, wps=5827, ups=0.09, wpb=64835, bsz=128, num_updates=9153, lr=9.99348e-05, gnorm=2.233, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105607 2021-06-19 23:59:03 | INFO | train_inner | epoch 004: 206 / 3002 loss=2.621, ppl=6.15, wps=5995.2, ups=0.09, wpb=64714, bsz=128, num_updates=9154, lr=9.99348e-05, gnorm=2.061, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105618 2021-06-19 23:59:15 | INFO | train_inner | epoch 004: 207 / 3002 loss=2.49, ppl=5.62, wps=5735.6, ups=0.09, wpb=64793, bsz=128, num_updates=9155, lr=9.99348e-05, gnorm=2.189, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105629 2021-06-19 23:59:26 | INFO | train_inner | epoch 004: 208 / 3002 loss=2.561, ppl=5.9, wps=5774.1, ups=0.09, wpb=64871, bsz=128, num_updates=9156, lr=9.99347e-05, gnorm=2.122, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105640 2021-06-19 23:59:37 | INFO | train_inner | epoch 004: 209 / 3002 loss=2.508, ppl=5.69, wps=5771.6, ups=0.09, wpb=64929, bsz=128, num_updates=9157, lr=9.99347e-05, gnorm=2.4, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105652 2021-06-19 23:59:48 | INFO | train_inner | epoch 004: 210 / 3002 loss=2.679, ppl=6.41, wps=5871, ups=0.09, wpb=64847, bsz=128, num_updates=9158, lr=9.99347e-05, gnorm=2.183, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105663 2021-06-19 23:59:59 | INFO | train_inner | epoch 004: 211 / 3002 loss=2.614, ppl=6.12, wps=5787.9, ups=0.09, wpb=64863, bsz=128, num_updates=9159, lr=9.99347e-05, gnorm=2.144, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105674 2021-06-20 00:00:11 | INFO | train_inner | epoch 004: 212 / 3002 loss=2.595, ppl=6.04, wps=5787.9, ups=0.09, wpb=64900, bsz=128, num_updates=9160, lr=9.99347e-05, gnorm=2.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105685 2021-06-20 00:00:22 | INFO | train_inner | epoch 004: 213 / 3002 loss=2.563, ppl=5.91, wps=5870.9, ups=0.09, wpb=64911, bsz=128, num_updates=9161, lr=9.99347e-05, gnorm=2.372, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105696 2021-06-20 00:00:33 | INFO | train_inner | epoch 004: 214 / 3002 loss=2.579, ppl=5.98, wps=5780.4, ups=0.09, wpb=64856, bsz=128, num_updates=9162, lr=9.99347e-05, gnorm=2.12, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105707 2021-06-20 00:00:44 | INFO | train_inner | epoch 004: 215 / 3002 loss=2.537, ppl=5.8, wps=5843.6, ups=0.09, wpb=64823, bsz=128, num_updates=9163, lr=9.99347e-05, gnorm=2.042, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105718 2021-06-20 00:00:55 | INFO | train_inner | epoch 004: 216 / 3002 loss=2.567, ppl=5.93, wps=5809.2, ups=0.09, wpb=64910, bsz=128, num_updates=9164, lr=9.99347e-05, gnorm=2.237, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105730 2021-06-20 00:01:06 | INFO | train_inner | epoch 004: 217 / 3002 loss=2.707, ppl=6.53, wps=5966.1, ups=0.09, wpb=64833, bsz=128, num_updates=9165, lr=9.99347e-05, gnorm=2.134, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105740 2021-06-20 00:01:17 | INFO | train_inner | epoch 004: 218 / 3002 loss=2.502, ppl=5.66, wps=5923.6, ups=0.09, wpb=64791, bsz=128, num_updates=9166, lr=9.99347e-05, gnorm=2.296, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105751 2021-06-20 00:01:28 | INFO | train_inner | epoch 004: 219 / 3002 loss=2.484, ppl=5.59, wps=5956, ups=0.09, wpb=64811, bsz=128, num_updates=9167, lr=9.99347e-05, gnorm=2.079, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105762 2021-06-20 00:01:39 | INFO | train_inner | epoch 004: 220 / 3002 loss=2.503, ppl=5.67, wps=5861.6, ups=0.09, wpb=64745, bsz=128, num_updates=9168, lr=9.99347e-05, gnorm=2.165, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105773 2021-06-20 00:01:50 | INFO | train_inner | epoch 004: 221 / 3002 loss=2.69, ppl=6.45, wps=5871.2, ups=0.09, wpb=64823, bsz=128, num_updates=9169, lr=9.99346e-05, gnorm=2.069, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105784 2021-06-20 00:02:01 | INFO | train_inner | epoch 004: 222 / 3002 loss=2.496, ppl=5.64, wps=5859.8, ups=0.09, wpb=64773, bsz=128, num_updates=9170, lr=9.99346e-05, gnorm=2.218, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105795 2021-06-20 00:02:12 | INFO | train_inner | epoch 004: 223 / 3002 loss=2.618, ppl=6.14, wps=5761, ups=0.09, wpb=64850, bsz=128, num_updates=9171, lr=9.99346e-05, gnorm=2.265, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105807 2021-06-20 00:02:23 | INFO | train_inner | epoch 004: 224 / 3002 loss=2.77, ppl=6.82, wps=5914.5, ups=0.09, wpb=64812, bsz=128, num_updates=9172, lr=9.99346e-05, gnorm=2.13, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105818 2021-06-20 00:02:34 | INFO | train_inner | epoch 004: 225 / 3002 loss=2.47, ppl=5.54, wps=5884.1, ups=0.09, wpb=64882, bsz=128, num_updates=9173, lr=9.99346e-05, gnorm=2.026, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105829 2021-06-20 00:02:45 | INFO | train_inner | epoch 004: 226 / 3002 loss=2.576, ppl=5.96, wps=5885.8, ups=0.09, wpb=64834, bsz=128, num_updates=9174, lr=9.99346e-05, gnorm=2.026, loss_scale=1, train_wall=11, gb_free=2.8, wall=105840 2021-06-20 00:02:57 | INFO | train_inner | epoch 004: 227 / 3002 loss=2.651, ppl=6.28, wps=5788.2, ups=0.09, wpb=64903, bsz=128, num_updates=9175, lr=9.99346e-05, gnorm=2.095, loss_scale=1, train_wall=11, gb_free=2.8, wall=105851 2021-06-20 00:03:07 | INFO | train_inner | epoch 004: 228 / 3002 loss=2.468, ppl=5.53, wps=6010.3, ups=0.09, wpb=64812, bsz=128, num_updates=9176, lr=9.99346e-05, gnorm=2.174, loss_scale=1, train_wall=10, gb_free=2.8, wall=105862 2021-06-20 00:03:19 | INFO | train_inner | epoch 004: 229 / 3002 loss=2.5, ppl=5.66, wps=5759.2, ups=0.09, wpb=64826, bsz=128, num_updates=9177, lr=9.99346e-05, gnorm=2.714, loss_scale=1, train_wall=11, gb_free=2.8, wall=105873 2021-06-20 00:03:30 | INFO | train_inner | epoch 004: 230 / 3002 loss=2.629, ppl=6.19, wps=5815.7, ups=0.09, wpb=64791, bsz=128, num_updates=9178, lr=9.99346e-05, gnorm=2.208, loss_scale=1, train_wall=11, gb_free=2.8, wall=105884 2021-06-20 00:03:41 | INFO | train_inner | epoch 004: 231 / 3002 loss=2.544, ppl=5.83, wps=5819.2, ups=0.09, wpb=64832, bsz=128, num_updates=9179, lr=9.99346e-05, gnorm=2.163, loss_scale=1, train_wall=11, gb_free=2.8, wall=105895 2021-06-20 00:03:52 | INFO | train_inner | epoch 004: 232 / 3002 loss=2.638, ppl=6.22, wps=5813.5, ups=0.09, wpb=64756, bsz=128, num_updates=9180, lr=9.99346e-05, gnorm=2.234, loss_scale=1, train_wall=11, gb_free=2.8, wall=105906 2021-06-20 00:04:03 | INFO | train_inner | epoch 004: 233 / 3002 loss=2.556, ppl=5.88, wps=5855.5, ups=0.09, wpb=64863, bsz=128, num_updates=9181, lr=9.99345e-05, gnorm=2.203, loss_scale=1, train_wall=11, gb_free=2.8, wall=105917 2021-06-20 00:04:14 | INFO | train_inner | epoch 004: 234 / 3002 loss=2.723, ppl=6.6, wps=5840.9, ups=0.09, wpb=64888, bsz=128, num_updates=9182, lr=9.99345e-05, gnorm=2.23, loss_scale=1, train_wall=11, gb_free=2.8, wall=105928 2021-06-20 00:04:25 | INFO | train_inner | epoch 004: 235 / 3002 loss=2.607, ppl=6.09, wps=5724.1, ups=0.09, wpb=64888, bsz=128, num_updates=9183, lr=9.99345e-05, gnorm=2.141, loss_scale=1, train_wall=11, gb_free=2.8, wall=105940 2021-06-20 00:04:37 | INFO | train_inner | epoch 004: 236 / 3002 loss=2.504, ppl=5.67, wps=5791.6, ups=0.09, wpb=64892, bsz=128, num_updates=9184, lr=9.99345e-05, gnorm=2.238, loss_scale=1, train_wall=11, gb_free=2.8, wall=105951 2021-06-20 00:04:48 | INFO | train_inner | epoch 004: 237 / 3002 loss=2.707, ppl=6.53, wps=5820.5, ups=0.09, wpb=64825, bsz=128, num_updates=9185, lr=9.99345e-05, gnorm=2.281, loss_scale=1, train_wall=11, gb_free=2.8, wall=105962 2021-06-20 00:04:59 | INFO | train_inner | epoch 004: 238 / 3002 loss=2.58, ppl=5.98, wps=5854.3, ups=0.09, wpb=64826, bsz=128, num_updates=9186, lr=9.99345e-05, gnorm=2.102, loss_scale=1, train_wall=11, gb_free=2.8, wall=105973 2021-06-20 00:05:10 | INFO | train_inner | epoch 004: 239 / 3002 loss=2.649, ppl=6.27, wps=5849.7, ups=0.09, wpb=64822, bsz=128, num_updates=9187, lr=9.99345e-05, gnorm=2.462, loss_scale=1, train_wall=11, gb_free=2.8, wall=105984 2021-06-20 00:05:21 | INFO | train_inner | epoch 004: 240 / 3002 loss=2.518, ppl=5.73, wps=5787.5, ups=0.09, wpb=64780, bsz=128, num_updates=9188, lr=9.99345e-05, gnorm=2.064, loss_scale=1, train_wall=11, gb_free=2.8, wall=105996 2021-06-20 00:05:32 | INFO | train_inner | epoch 004: 241 / 3002 loss=2.585, ppl=6, wps=5795.5, ups=0.09, wpb=64792, bsz=128, num_updates=9189, lr=9.99345e-05, gnorm=2.285, loss_scale=1, train_wall=11, gb_free=2.8, wall=106007 2021-06-20 00:05:43 | INFO | train_inner | epoch 004: 242 / 3002 loss=2.498, ppl=5.65, wps=5860.6, ups=0.09, wpb=64859, bsz=128, num_updates=9190, lr=9.99345e-05, gnorm=2.116, loss_scale=1, train_wall=11, gb_free=2.8, wall=106018 2021-06-20 00:05:54 | INFO | train_inner | epoch 004: 243 / 3002 loss=2.476, ppl=5.56, wps=5899.3, ups=0.09, wpb=64857, bsz=128, num_updates=9191, lr=9.99345e-05, gnorm=2.086, loss_scale=1, train_wall=11, gb_free=2.8, wall=106029 2021-06-20 00:06:05 | INFO | train_inner | epoch 004: 244 / 3002 loss=2.482, ppl=5.59, wps=5865.5, ups=0.09, wpb=64830, bsz=128, num_updates=9192, lr=9.99345e-05, gnorm=5.791, loss_scale=1, train_wall=11, gb_free=2.8, wall=106040 2021-06-20 00:06:17 | INFO | train_inner | epoch 004: 245 / 3002 loss=2.514, ppl=5.71, wps=5798.1, ups=0.09, wpb=64776, bsz=128, num_updates=9193, lr=9.99345e-05, gnorm=2.148, loss_scale=1, train_wall=11, gb_free=2.8, wall=106051 2021-06-20 00:06:27 | INFO | train_inner | epoch 004: 246 / 3002 loss=2.606, ppl=6.09, wps=6002.4, ups=0.09, wpb=64864, bsz=128, num_updates=9194, lr=9.99344e-05, gnorm=2.286, loss_scale=1, train_wall=10, gb_free=2.8, wall=106062 2021-06-20 00:06:38 | INFO | train_inner | epoch 004: 247 / 3002 loss=2.662, ppl=6.33, wps=5904, ups=0.09, wpb=64839, bsz=128, num_updates=9195, lr=9.99344e-05, gnorm=4.868, loss_scale=1, train_wall=11, gb_free=2.8, wall=106073 2021-06-20 00:06:50 | INFO | train_inner | epoch 004: 248 / 3002 loss=2.529, ppl=5.77, wps=5826, ups=0.09, wpb=64792, bsz=128, num_updates=9196, lr=9.99344e-05, gnorm=2.099, loss_scale=1, train_wall=11, gb_free=2.8, wall=106084 2021-06-20 00:07:01 | INFO | train_inner | epoch 004: 249 / 3002 loss=2.667, ppl=6.35, wps=5842.3, ups=0.09, wpb=64771, bsz=128, num_updates=9197, lr=9.99344e-05, gnorm=2.239, loss_scale=1, train_wall=11, gb_free=2.8, wall=106095 2021-06-20 00:07:11 | INFO | train_inner | epoch 004: 250 / 3002 loss=2.586, ppl=6.01, wps=5991, ups=0.09, wpb=64889, bsz=128, num_updates=9198, lr=9.99344e-05, gnorm=2.136, loss_scale=1, train_wall=10, gb_free=2.8, wall=106106 2021-06-20 00:07:23 | INFO | train_inner | epoch 004: 251 / 3002 loss=2.583, ppl=5.99, wps=5816.1, ups=0.09, wpb=64884, bsz=128, num_updates=9199, lr=9.99344e-05, gnorm=9.029, loss_scale=1, train_wall=11, gb_free=2.8, wall=106117 2021-06-20 00:07:34 | INFO | train_inner | epoch 004: 252 / 3002 loss=2.513, ppl=5.71, wps=5760.1, ups=0.09, wpb=64876, bsz=128, num_updates=9200, lr=9.99344e-05, gnorm=2.105, loss_scale=1, train_wall=11, gb_free=2.8, wall=106128 2021-06-20 00:07:45 | INFO | train_inner | epoch 004: 253 / 3002 loss=2.74, ppl=6.68, wps=5780.8, ups=0.09, wpb=64871, bsz=128, num_updates=9201, lr=9.99344e-05, gnorm=2.164, loss_scale=1, train_wall=11, gb_free=2.8, wall=106139 2021-06-20 00:07:56 | INFO | train_inner | epoch 004: 254 / 3002 loss=2.587, ppl=6.01, wps=5852.7, ups=0.09, wpb=64879, bsz=128, num_updates=9202, lr=9.99344e-05, gnorm=2.01, loss_scale=1, train_wall=11, gb_free=2.8, wall=106151 2021-06-20 00:08:07 | INFO | train_inner | epoch 004: 255 / 3002 loss=2.643, ppl=6.24, wps=5901.5, ups=0.09, wpb=64807, bsz=128, num_updates=9203, lr=9.99344e-05, gnorm=2.075, loss_scale=1, train_wall=11, gb_free=2.8, wall=106162 2021-06-20 00:08:18 | INFO | train_inner | epoch 004: 256 / 3002 loss=2.598, ppl=6.05, wps=5751.4, ups=0.09, wpb=64773, bsz=128, num_updates=9204, lr=9.99344e-05, gnorm=2.206, loss_scale=1, train_wall=11, gb_free=2.8, wall=106173 2021-06-20 00:08:30 | INFO | train_inner | epoch 004: 257 / 3002 loss=2.533, ppl=5.79, wps=5855.4, ups=0.09, wpb=64862, bsz=128, num_updates=9205, lr=9.99344e-05, gnorm=2.072, loss_scale=1, train_wall=11, gb_free=2.8, wall=106184 2021-06-20 00:08:41 | INFO | train_inner | epoch 004: 258 / 3002 loss=2.64, ppl=6.23, wps=5861.3, ups=0.09, wpb=64792, bsz=128, num_updates=9206, lr=9.99343e-05, gnorm=2.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=106195 2021-06-20 00:08:52 | INFO | train_inner | epoch 004: 259 / 3002 loss=2.74, ppl=6.68, wps=5899.7, ups=0.09, wpb=64816, bsz=128, num_updates=9207, lr=9.99343e-05, gnorm=3.521, loss_scale=1, train_wall=11, gb_free=2.8, wall=106206 2021-06-20 00:09:03 | INFO | train_inner | epoch 004: 260 / 3002 loss=2.61, ppl=6.11, wps=5843.6, ups=0.09, wpb=64771, bsz=128, num_updates=9208, lr=9.99343e-05, gnorm=2.215, loss_scale=1, train_wall=11, gb_free=2.8, wall=106217 2021-06-20 00:09:14 | INFO | train_inner | epoch 004: 261 / 3002 loss=2.507, ppl=5.69, wps=5732.9, ups=0.09, wpb=64805, bsz=128, num_updates=9209, lr=9.99343e-05, gnorm=2.022, loss_scale=1, train_wall=11, gb_free=2.8, wall=106228 2021-06-20 00:09:25 | INFO | train_inner | epoch 004: 262 / 3002 loss=2.492, ppl=5.63, wps=5857.4, ups=0.09, wpb=64939, bsz=128, num_updates=9210, lr=9.99343e-05, gnorm=2.335, loss_scale=1, train_wall=11, gb_free=2.8, wall=106239 2021-06-20 00:09:36 | INFO | train_inner | epoch 004: 263 / 3002 loss=2.521, ppl=5.74, wps=5843.2, ups=0.09, wpb=64833, bsz=128, num_updates=9211, lr=9.99343e-05, gnorm=2.17, loss_scale=1, train_wall=11, gb_free=2.8, wall=106250 2021-06-20 00:09:47 | INFO | train_inner | epoch 004: 264 / 3002 loss=2.646, ppl=6.26, wps=5716.9, ups=0.09, wpb=64837, bsz=128, num_updates=9212, lr=9.99343e-05, gnorm=20.779, loss_scale=1, train_wall=11, gb_free=2.8, wall=106262 2021-06-20 00:09:58 | INFO | train_inner | epoch 004: 265 / 3002 loss=2.627, ppl=6.18, wps=5919.3, ups=0.09, wpb=64901, bsz=128, num_updates=9213, lr=9.99343e-05, gnorm=2.156, loss_scale=1, train_wall=11, gb_free=2.8, wall=106273 2021-06-20 00:10:09 | INFO | train_inner | epoch 004: 266 / 3002 loss=2.551, ppl=5.86, wps=5868.8, ups=0.09, wpb=64849, bsz=128, num_updates=9214, lr=9.99343e-05, gnorm=2.213, loss_scale=1, train_wall=11, gb_free=2.8, wall=106284 2021-06-20 00:10:20 | INFO | train_inner | epoch 004: 267 / 3002 loss=2.655, ppl=6.3, wps=5946.8, ups=0.09, wpb=64873, bsz=128, num_updates=9215, lr=9.99343e-05, gnorm=2.71, loss_scale=1, train_wall=10, gb_free=2.8, wall=106295 2021-06-20 00:10:31 | INFO | train_inner | epoch 004: 268 / 3002 loss=2.589, ppl=6.02, wps=5942.5, ups=0.09, wpb=64811, bsz=128, num_updates=9216, lr=9.99343e-05, gnorm=2.077, loss_scale=1, train_wall=10, gb_free=2.8, wall=106306 2021-06-20 00:10:43 | INFO | train_inner | epoch 004: 269 / 3002 loss=2.666, ppl=6.35, wps=5764.2, ups=0.09, wpb=64834, bsz=128, num_updates=9217, lr=9.99343e-05, gnorm=2.629, loss_scale=1, train_wall=11, gb_free=2.8, wall=106317 2021-06-20 00:10:54 | INFO | train_inner | epoch 004: 270 / 3002 loss=2.61, ppl=6.11, wps=5787.6, ups=0.09, wpb=64826, bsz=128, num_updates=9218, lr=9.99343e-05, gnorm=2.9, loss_scale=1, train_wall=11, gb_free=2.8, wall=106328 2021-06-20 00:11:05 | INFO | train_inner | epoch 004: 271 / 3002 loss=2.502, ppl=5.66, wps=5780.6, ups=0.09, wpb=64810, bsz=128, num_updates=9219, lr=9.99342e-05, gnorm=2.14, loss_scale=1, train_wall=11, gb_free=2.8, wall=106339 2021-06-20 00:11:16 | INFO | train_inner | epoch 004: 272 / 3002 loss=2.555, ppl=5.88, wps=5881.2, ups=0.09, wpb=64822, bsz=128, num_updates=9220, lr=9.99342e-05, gnorm=2.238, loss_scale=1, train_wall=11, gb_free=2.8, wall=106350 2021-06-20 00:11:27 | INFO | train_inner | epoch 004: 273 / 3002 loss=2.687, ppl=6.44, wps=5997.5, ups=0.09, wpb=64854, bsz=128, num_updates=9221, lr=9.99342e-05, gnorm=2.767, loss_scale=1, train_wall=10, gb_free=2.8, wall=106361 2021-06-20 00:11:38 | INFO | train_inner | epoch 004: 274 / 3002 loss=2.481, ppl=5.58, wps=5913.6, ups=0.09, wpb=64860, bsz=128, num_updates=9222, lr=9.99342e-05, gnorm=2.259, loss_scale=1, train_wall=11, gb_free=2.8, wall=106372 2021-06-20 00:11:49 | INFO | train_inner | epoch 004: 275 / 3002 loss=2.718, ppl=6.58, wps=5939.1, ups=0.09, wpb=64849, bsz=128, num_updates=9223, lr=9.99342e-05, gnorm=2.287, loss_scale=1, train_wall=10, gb_free=2.8, wall=106383 2021-06-20 00:12:00 | INFO | train_inner | epoch 004: 276 / 3002 loss=2.504, ppl=5.67, wps=5783.2, ups=0.09, wpb=64808, bsz=128, num_updates=9224, lr=9.99342e-05, gnorm=2.177, loss_scale=1, train_wall=11, gb_free=2.8, wall=106394 2021-06-20 00:12:11 | INFO | train_inner | epoch 004: 277 / 3002 loss=2.662, ppl=6.33, wps=5905.8, ups=0.09, wpb=64814, bsz=128, num_updates=9225, lr=9.99342e-05, gnorm=5.048, loss_scale=1, train_wall=11, gb_free=2.8, wall=106405 2021-06-20 00:12:22 | INFO | train_inner | epoch 004: 278 / 3002 loss=2.63, ppl=6.19, wps=5879.4, ups=0.09, wpb=64889, bsz=128, num_updates=9226, lr=9.99342e-05, gnorm=2.088, loss_scale=1, train_wall=11, gb_free=2.8, wall=106416 2021-06-20 00:12:33 | INFO | train_inner | epoch 004: 279 / 3002 loss=2.489, ppl=5.61, wps=5839.4, ups=0.09, wpb=64875, bsz=128, num_updates=9227, lr=9.99342e-05, gnorm=2.123, loss_scale=1, train_wall=11, gb_free=2.8, wall=106427 2021-06-20 00:12:44 | INFO | train_inner | epoch 004: 280 / 3002 loss=2.533, ppl=5.79, wps=5813.8, ups=0.09, wpb=64821, bsz=128, num_updates=9228, lr=9.99342e-05, gnorm=2.236, loss_scale=1, train_wall=11, gb_free=2.8, wall=106439 2021-06-20 00:12:55 | INFO | train_inner | epoch 004: 281 / 3002 loss=2.713, ppl=6.56, wps=5769.3, ups=0.09, wpb=64823, bsz=128, num_updates=9229, lr=9.99342e-05, gnorm=2.226, loss_scale=1, train_wall=11, gb_free=2.8, wall=106450 2021-06-20 00:13:06 | INFO | train_inner | epoch 004: 282 / 3002 loss=2.691, ppl=6.46, wps=5891.9, ups=0.09, wpb=64861, bsz=128, num_updates=9230, lr=9.99342e-05, gnorm=2.17, loss_scale=1, train_wall=11, gb_free=2.8, wall=106461 2021-06-20 00:13:18 | INFO | train_inner | epoch 004: 283 / 3002 loss=2.637, ppl=6.22, wps=5771.2, ups=0.09, wpb=64895, bsz=128, num_updates=9231, lr=9.99341e-05, gnorm=2.309, loss_scale=1, train_wall=11, gb_free=2.8, wall=106472 2021-06-20 00:13:29 | INFO | train_inner | epoch 004: 284 / 3002 loss=2.497, ppl=5.64, wps=5976, ups=0.09, wpb=64936, bsz=128, num_updates=9232, lr=9.99341e-05, gnorm=2.154, loss_scale=1, train_wall=10, gb_free=2.8, wall=106483 2021-06-20 00:13:40 | INFO | train_inner | epoch 004: 285 / 3002 loss=2.727, ppl=6.62, wps=5872.2, ups=0.09, wpb=64827, bsz=128, num_updates=9233, lr=9.99341e-05, gnorm=2.156, loss_scale=1, train_wall=11, gb_free=2.8, wall=106494 2021-06-20 00:13:51 | INFO | train_inner | epoch 004: 286 / 3002 loss=2.512, ppl=5.7, wps=5763, ups=0.09, wpb=64817, bsz=128, num_updates=9234, lr=9.99341e-05, gnorm=2.266, loss_scale=1, train_wall=11, gb_free=2.8, wall=106505 2021-06-20 00:14:02 | INFO | train_inner | epoch 004: 287 / 3002 loss=2.508, ppl=5.69, wps=5867.8, ups=0.09, wpb=64824, bsz=128, num_updates=9235, lr=9.99341e-05, gnorm=3.325, loss_scale=1, train_wall=11, gb_free=2.8, wall=106516 2021-06-20 00:14:13 | INFO | train_inner | epoch 004: 288 / 3002 loss=2.57, ppl=5.94, wps=5797.9, ups=0.09, wpb=64840, bsz=128, num_updates=9236, lr=9.99341e-05, gnorm=2.412, loss_scale=1, train_wall=11, gb_free=2.8, wall=106527 2021-06-20 00:14:24 | INFO | train_inner | epoch 004: 289 / 3002 loss=2.574, ppl=5.95, wps=5911.9, ups=0.09, wpb=64810, bsz=128, num_updates=9237, lr=9.99341e-05, gnorm=2.231, loss_scale=1, train_wall=11, gb_free=2.8, wall=106538 2021-06-20 00:14:35 | INFO | train_inner | epoch 004: 290 / 3002 loss=2.751, ppl=6.73, wps=5894.2, ups=0.09, wpb=64847, bsz=128, num_updates=9238, lr=9.99341e-05, gnorm=2.229, loss_scale=1, train_wall=11, gb_free=2.8, wall=106549 2021-06-20 00:14:46 | INFO | train_inner | epoch 004: 291 / 3002 loss=2.647, ppl=6.26, wps=5860.9, ups=0.09, wpb=64823, bsz=128, num_updates=9239, lr=9.99341e-05, gnorm=2.318, loss_scale=1, train_wall=11, gb_free=2.8, wall=106560 2021-06-20 00:14:57 | INFO | train_inner | epoch 004: 292 / 3002 loss=2.548, ppl=5.85, wps=5774.7, ups=0.09, wpb=64723, bsz=128, num_updates=9240, lr=9.99341e-05, gnorm=2.073, loss_scale=1, train_wall=11, gb_free=2.8, wall=106572 2021-06-20 00:15:08 | INFO | train_inner | epoch 004: 293 / 3002 loss=2.507, ppl=5.68, wps=5886.4, ups=0.09, wpb=64795, bsz=128, num_updates=9241, lr=9.99341e-05, gnorm=3.227, loss_scale=1, train_wall=11, gb_free=2.8, wall=106583 2021-06-20 00:15:19 | INFO | train_inner | epoch 004: 294 / 3002 loss=2.623, ppl=6.16, wps=5873.8, ups=0.09, wpb=64892, bsz=128, num_updates=9242, lr=9.99341e-05, gnorm=2.615, loss_scale=1, train_wall=11, gb_free=2.8, wall=106594 2021-06-20 00:15:30 | INFO | train_inner | epoch 004: 295 / 3002 loss=2.721, ppl=6.59, wps=5838, ups=0.09, wpb=64911, bsz=128, num_updates=9243, lr=9.99341e-05, gnorm=2.33, loss_scale=1, train_wall=11, gb_free=2.8, wall=106605 2021-06-20 00:15:42 | INFO | train_inner | epoch 004: 296 / 3002 loss=2.611, ppl=6.11, wps=5845.6, ups=0.09, wpb=64843, bsz=128, num_updates=9244, lr=9.9934e-05, gnorm=2.386, loss_scale=1, train_wall=11, gb_free=2.8, wall=106616 2021-06-20 00:15:53 | INFO | train_inner | epoch 004: 297 / 3002 loss=2.581, ppl=5.98, wps=5851.2, ups=0.09, wpb=64800, bsz=128, num_updates=9245, lr=9.9934e-05, gnorm=2.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=106627 2021-06-20 00:16:04 | INFO | train_inner | epoch 004: 298 / 3002 loss=2.606, ppl=6.09, wps=5884.7, ups=0.09, wpb=64817, bsz=128, num_updates=9246, lr=9.9934e-05, gnorm=2.19, loss_scale=1, train_wall=11, gb_free=2.8, wall=106638 2021-06-20 00:16:15 | INFO | train_inner | epoch 004: 299 / 3002 loss=2.737, ppl=6.67, wps=5814.6, ups=0.09, wpb=64812, bsz=128, num_updates=9247, lr=9.9934e-05, gnorm=2.096, loss_scale=1, train_wall=11, gb_free=2.8, wall=106649 2021-06-20 00:16:26 | INFO | train_inner | epoch 004: 300 / 3002 loss=2.782, ppl=6.88, wps=5983.1, ups=0.09, wpb=64857, bsz=128, num_updates=9248, lr=9.9934e-05, gnorm=2.222, loss_scale=1, train_wall=10, gb_free=2.8, wall=106660 2021-06-20 00:16:37 | INFO | train_inner | epoch 004: 301 / 3002 loss=2.599, ppl=6.06, wps=5842.6, ups=0.09, wpb=64831, bsz=128, num_updates=9249, lr=9.9934e-05, gnorm=2.13, loss_scale=1, train_wall=11, gb_free=2.8, wall=106671 2021-06-20 00:16:48 | INFO | train_inner | epoch 004: 302 / 3002 loss=2.601, ppl=6.07, wps=5802.6, ups=0.09, wpb=64789, bsz=128, num_updates=9250, lr=9.9934e-05, gnorm=2.195, loss_scale=1, train_wall=11, gb_free=2.8, wall=106682 2021-06-20 00:16:59 | INFO | train_inner | epoch 004: 303 / 3002 loss=2.703, ppl=6.51, wps=5937.5, ups=0.09, wpb=64758, bsz=128, num_updates=9251, lr=9.9934e-05, gnorm=2.878, loss_scale=1, train_wall=10, gb_free=2.8, wall=106693 2021-06-20 00:17:10 | INFO | train_inner | epoch 004: 304 / 3002 loss=2.566, ppl=5.92, wps=5866.9, ups=0.09, wpb=64889, bsz=128, num_updates=9252, lr=9.9934e-05, gnorm=2.275, loss_scale=1, train_wall=11, gb_free=2.8, wall=106704 2021-06-20 00:17:21 | INFO | train_inner | epoch 004: 305 / 3002 loss=2.626, ppl=6.17, wps=5957.6, ups=0.09, wpb=64819, bsz=128, num_updates=9253, lr=9.9934e-05, gnorm=2.153, loss_scale=1, train_wall=10, gb_free=2.8, wall=106715 2021-06-20 00:17:32 | INFO | train_inner | epoch 004: 306 / 3002 loss=2.642, ppl=6.24, wps=5949.1, ups=0.09, wpb=64818, bsz=128, num_updates=9254, lr=9.9934e-05, gnorm=2.195, loss_scale=1, train_wall=10, gb_free=2.8, wall=106726 2021-06-20 00:17:43 | INFO | train_inner | epoch 004: 307 / 3002 loss=2.438, ppl=5.42, wps=5854.7, ups=0.09, wpb=64808, bsz=128, num_updates=9255, lr=9.9934e-05, gnorm=2.094, loss_scale=1, train_wall=11, gb_free=2.8, wall=106737 2021-06-20 00:17:54 | INFO | train_inner | epoch 004: 308 / 3002 loss=2.673, ppl=6.38, wps=5931.7, ups=0.09, wpb=64860, bsz=128, num_updates=9256, lr=9.99339e-05, gnorm=2.033, loss_scale=1, train_wall=10, gb_free=2.8, wall=106748 2021-06-20 00:18:05 | INFO | train_inner | epoch 004: 309 / 3002 loss=2.583, ppl=5.99, wps=5875.8, ups=0.09, wpb=64804, bsz=128, num_updates=9257, lr=9.99339e-05, gnorm=2.152, loss_scale=1, train_wall=11, gb_free=2.8, wall=106759 2021-06-20 00:18:16 | INFO | train_inner | epoch 004: 310 / 3002 loss=2.573, ppl=5.95, wps=5961.6, ups=0.09, wpb=64763, bsz=128, num_updates=9258, lr=9.99339e-05, gnorm=2.224, loss_scale=1, train_wall=10, gb_free=2.8, wall=106770 2021-06-20 00:18:26 | INFO | train_inner | epoch 004: 311 / 3002 loss=2.657, ppl=6.31, wps=5906.2, ups=0.09, wpb=64787, bsz=128, num_updates=9259, lr=9.99339e-05, gnorm=2.082, loss_scale=1, train_wall=10, gb_free=2.8, wall=106781 2021-06-20 00:18:38 | INFO | train_inner | epoch 004: 312 / 3002 loss=2.649, ppl=6.27, wps=5794, ups=0.09, wpb=64861, bsz=128, num_updates=9260, lr=9.99339e-05, gnorm=3.414, loss_scale=1, train_wall=11, gb_free=2.8, wall=106792 2021-06-20 00:18:49 | INFO | train_inner | epoch 004: 313 / 3002 loss=2.756, ppl=6.76, wps=5751.8, ups=0.09, wpb=64853, bsz=128, num_updates=9261, lr=9.99339e-05, gnorm=2.28, loss_scale=1, train_wall=11, gb_free=2.8, wall=106803 2021-06-20 00:19:00 | INFO | train_inner | epoch 004: 314 / 3002 loss=2.802, ppl=6.97, wps=5800.6, ups=0.09, wpb=64796, bsz=128, num_updates=9262, lr=9.99339e-05, gnorm=2.51, loss_scale=1, train_wall=11, gb_free=2.8, wall=106814 2021-06-20 00:19:11 | INFO | train_inner | epoch 004: 315 / 3002 loss=2.686, ppl=6.43, wps=5782.1, ups=0.09, wpb=64896, bsz=128, num_updates=9263, lr=9.99339e-05, gnorm=2.225, loss_scale=1, train_wall=11, gb_free=2.8, wall=106826 2021-06-20 00:19:22 | INFO | train_inner | epoch 004: 316 / 3002 loss=2.601, ppl=6.07, wps=5823.6, ups=0.09, wpb=64843, bsz=128, num_updates=9264, lr=9.99339e-05, gnorm=2.935, loss_scale=1, train_wall=11, gb_free=2.8, wall=106837 2021-06-20 00:19:33 | INFO | train_inner | epoch 004: 317 / 3002 loss=2.414, ppl=5.33, wps=5899.3, ups=0.09, wpb=64813, bsz=128, num_updates=9265, lr=9.99339e-05, gnorm=2.124, loss_scale=1, train_wall=11, gb_free=2.8, wall=106848 2021-06-20 00:19:45 | INFO | train_inner | epoch 004: 318 / 3002 loss=2.496, ppl=5.64, wps=5846.7, ups=0.09, wpb=64796, bsz=128, num_updates=9266, lr=9.99339e-05, gnorm=15.59, loss_scale=1, train_wall=11, gb_free=2.8, wall=106859 2021-06-20 00:19:56 | INFO | train_inner | epoch 004: 319 / 3002 loss=2.582, ppl=5.99, wps=5844.5, ups=0.09, wpb=64833, bsz=128, num_updates=9267, lr=9.99339e-05, gnorm=2.537, loss_scale=1, train_wall=11, gb_free=2.8, wall=106870 2021-06-20 00:20:07 | INFO | train_inner | epoch 004: 320 / 3002 loss=2.781, ppl=6.87, wps=5857.1, ups=0.09, wpb=64792, bsz=128, num_updates=9268, lr=9.99339e-05, gnorm=5.251, loss_scale=1, train_wall=11, gb_free=2.8, wall=106881 2021-06-20 00:20:18 | INFO | train_inner | epoch 004: 321 / 3002 loss=2.766, ppl=6.8, wps=5917.4, ups=0.09, wpb=64777, bsz=128, num_updates=9269, lr=9.99338e-05, gnorm=2.287, loss_scale=1, train_wall=10, gb_free=2.8, wall=106892 2021-06-20 00:20:29 | INFO | train_inner | epoch 004: 322 / 3002 loss=2.646, ppl=6.26, wps=5814.4, ups=0.09, wpb=64860, bsz=128, num_updates=9270, lr=9.99338e-05, gnorm=2.685, loss_scale=1, train_wall=11, gb_free=2.8, wall=106903 2021-06-20 00:20:40 | INFO | train_inner | epoch 004: 323 / 3002 loss=2.581, ppl=5.99, wps=5862.4, ups=0.09, wpb=64809, bsz=128, num_updates=9271, lr=9.99338e-05, gnorm=2.499, loss_scale=1, train_wall=11, gb_free=2.8, wall=106914 2021-06-20 00:20:51 | INFO | train_inner | epoch 004: 324 / 3002 loss=2.537, ppl=5.8, wps=5926.2, ups=0.09, wpb=64937, bsz=128, num_updates=9272, lr=9.99338e-05, gnorm=2.517, loss_scale=1, train_wall=10, gb_free=2.8, wall=106925 2021-06-20 00:21:02 | INFO | train_inner | epoch 004: 325 / 3002 loss=2.71, ppl=6.55, wps=5845.4, ups=0.09, wpb=64833, bsz=128, num_updates=9273, lr=9.99338e-05, gnorm=21.364, loss_scale=1, train_wall=11, gb_free=2.8, wall=106936 2021-06-20 00:21:13 | INFO | train_inner | epoch 004: 326 / 3002 loss=2.727, ppl=6.62, wps=5846.4, ups=0.09, wpb=64849, bsz=128, num_updates=9274, lr=9.99338e-05, gnorm=2.289, loss_scale=1, train_wall=11, gb_free=2.8, wall=106947 2021-06-20 00:21:24 | INFO | train_inner | epoch 004: 327 / 3002 loss=2.655, ppl=6.3, wps=5838.5, ups=0.09, wpb=64854, bsz=128, num_updates=9275, lr=9.99338e-05, gnorm=4.001, loss_scale=1, train_wall=11, gb_free=2.8, wall=106958 2021-06-20 00:21:35 | INFO | train_inner | epoch 004: 328 / 3002 loss=2.563, ppl=5.91, wps=5892.5, ups=0.09, wpb=64855, bsz=128, num_updates=9276, lr=9.99338e-05, gnorm=2.384, loss_scale=1, train_wall=11, gb_free=2.8, wall=106969 2021-06-20 00:21:46 | INFO | train_inner | epoch 004: 329 / 3002 loss=2.822, ppl=7.07, wps=5871.6, ups=0.09, wpb=64815, bsz=128, num_updates=9277, lr=9.99338e-05, gnorm=3.1, loss_scale=1, train_wall=11, gb_free=2.8, wall=106981 2021-06-20 00:21:57 | INFO | train_inner | epoch 004: 330 / 3002 loss=2.505, ppl=5.68, wps=5881.6, ups=0.09, wpb=64882, bsz=128, num_updates=9278, lr=9.99338e-05, gnorm=2.454, loss_scale=1, train_wall=11, gb_free=2.8, wall=106992 2021-06-20 00:22:08 | INFO | train_inner | epoch 004: 331 / 3002 loss=2.556, ppl=5.88, wps=5892.8, ups=0.09, wpb=64895, bsz=128, num_updates=9279, lr=9.99338e-05, gnorm=7.276, loss_scale=1, train_wall=11, gb_free=2.8, wall=107003 2021-06-20 00:22:19 | INFO | train_inner | epoch 004: 332 / 3002 loss=2.674, ppl=6.38, wps=5881, ups=0.09, wpb=64760, bsz=128, num_updates=9280, lr=9.99338e-05, gnorm=2.383, loss_scale=1, train_wall=11, gb_free=2.8, wall=107014 2021-06-20 00:22:30 | INFO | train_inner | epoch 004: 333 / 3002 loss=2.772, ppl=6.83, wps=5910.3, ups=0.09, wpb=64662, bsz=128, num_updates=9281, lr=9.99337e-05, gnorm=2.817, loss_scale=1, train_wall=10, gb_free=2.8, wall=107025 2021-06-20 00:22:41 | INFO | train_inner | epoch 004: 334 / 3002 loss=2.514, ppl=5.71, wps=5811.7, ups=0.09, wpb=64817, bsz=128, num_updates=9282, lr=9.99337e-05, gnorm=2.359, loss_scale=1, train_wall=11, gb_free=2.8, wall=107036 2021-06-20 00:22:52 | INFO | train_inner | epoch 004: 335 / 3002 loss=2.629, ppl=6.18, wps=5853.6, ups=0.09, wpb=64889, bsz=128, num_updates=9283, lr=9.99337e-05, gnorm=2.326, loss_scale=1, train_wall=11, gb_free=2.8, wall=107047 2021-06-20 00:23:04 | INFO | train_inner | epoch 004: 336 / 3002 loss=2.67, ppl=6.36, wps=5810.6, ups=0.09, wpb=64805, bsz=128, num_updates=9284, lr=9.99337e-05, gnorm=2.533, loss_scale=1, train_wall=11, gb_free=2.8, wall=107058 2021-06-20 00:23:15 | INFO | train_inner | epoch 004: 337 / 3002 loss=2.684, ppl=6.43, wps=5917.5, ups=0.09, wpb=64818, bsz=128, num_updates=9285, lr=9.99337e-05, gnorm=2.359, loss_scale=1, train_wall=10, gb_free=2.8, wall=107069 2021-06-20 00:23:26 | INFO | train_inner | epoch 004: 338 / 3002 loss=2.512, ppl=5.7, wps=5848.6, ups=0.09, wpb=64841, bsz=128, num_updates=9286, lr=9.99337e-05, gnorm=2.196, loss_scale=1, train_wall=11, gb_free=2.8, wall=107080 2021-06-20 00:23:37 | INFO | train_inner | epoch 004: 339 / 3002 loss=2.568, ppl=5.93, wps=5906.9, ups=0.09, wpb=64837, bsz=128, num_updates=9287, lr=9.99337e-05, gnorm=2.309, loss_scale=1, train_wall=11, gb_free=2.8, wall=107091 2021-06-20 00:23:48 | INFO | train_inner | epoch 004: 340 / 3002 loss=2.721, ppl=6.59, wps=5847.7, ups=0.09, wpb=64871, bsz=128, num_updates=9288, lr=9.99337e-05, gnorm=2.432, loss_scale=1, train_wall=11, gb_free=2.8, wall=107102 2021-06-20 00:23:59 | INFO | train_inner | epoch 004: 341 / 3002 loss=2.764, ppl=6.79, wps=5802.9, ups=0.09, wpb=64777, bsz=128, num_updates=9289, lr=9.99337e-05, gnorm=2.156, loss_scale=1, train_wall=11, gb_free=2.8, wall=107113 2021-06-20 00:24:10 | INFO | train_inner | epoch 004: 342 / 3002 loss=2.529, ppl=5.77, wps=5786.4, ups=0.09, wpb=64905, bsz=128, num_updates=9290, lr=9.99337e-05, gnorm=25.173, loss_scale=1, train_wall=11, gb_free=2.8, wall=107124 2021-06-20 00:24:21 | INFO | train_inner | epoch 004: 343 / 3002 loss=2.655, ppl=6.3, wps=5892.1, ups=0.09, wpb=64910, bsz=128, num_updates=9291, lr=9.99337e-05, gnorm=2.137, loss_scale=1, train_wall=11, gb_free=2.8, wall=107135 2021-06-20 00:24:32 | INFO | train_inner | epoch 004: 344 / 3002 loss=2.686, ppl=6.43, wps=5845.7, ups=0.09, wpb=64852, bsz=128, num_updates=9292, lr=9.99337e-05, gnorm=4.135, loss_scale=1, train_wall=11, gb_free=2.8, wall=107147 2021-06-20 00:24:43 | INFO | train_inner | epoch 004: 345 / 3002 loss=2.425, ppl=5.37, wps=5947.3, ups=0.09, wpb=64865, bsz=128, num_updates=9293, lr=9.99337e-05, gnorm=2.792, loss_scale=1, train_wall=10, gb_free=2.8, wall=107157 2021-06-20 00:24:54 | INFO | train_inner | epoch 004: 346 / 3002 loss=2.696, ppl=6.48, wps=5832, ups=0.09, wpb=64831, bsz=128, num_updates=9294, lr=9.99336e-05, gnorm=2.638, loss_scale=1, train_wall=11, gb_free=2.8, wall=107169 2021-06-20 00:25:05 | INFO | train_inner | epoch 004: 347 / 3002 loss=2.562, ppl=5.9, wps=5893.1, ups=0.09, wpb=64902, bsz=128, num_updates=9295, lr=9.99336e-05, gnorm=3.358, loss_scale=1, train_wall=11, gb_free=2.8, wall=107180 2021-06-20 00:25:16 | INFO | train_inner | epoch 004: 348 / 3002 loss=2.643, ppl=6.25, wps=5867, ups=0.09, wpb=64768, bsz=128, num_updates=9296, lr=9.99336e-05, gnorm=4.069, loss_scale=1, train_wall=11, gb_free=2.8, wall=107191 2021-06-20 00:25:27 | INFO | train_inner | epoch 004: 349 / 3002 loss=2.813, ppl=7.03, wps=5893.6, ups=0.09, wpb=64849, bsz=128, num_updates=9297, lr=9.99336e-05, gnorm=2.208, loss_scale=1, train_wall=11, gb_free=2.8, wall=107202 2021-06-20 00:25:38 | INFO | train_inner | epoch 004: 350 / 3002 loss=2.674, ppl=6.38, wps=5892.2, ups=0.09, wpb=64910, bsz=128, num_updates=9298, lr=9.99336e-05, gnorm=5.482, loss_scale=1, train_wall=11, gb_free=2.8, wall=107213 2021-06-20 00:25:49 | INFO | train_inner | epoch 004: 351 / 3002 loss=2.564, ppl=5.92, wps=5801.4, ups=0.09, wpb=64867, bsz=128, num_updates=9299, lr=9.99336e-05, gnorm=2.459, loss_scale=1, train_wall=11, gb_free=2.8, wall=107224 2021-06-20 00:26:00 | INFO | train_inner | epoch 004: 352 / 3002 loss=2.606, ppl=6.09, wps=5870.9, ups=0.09, wpb=64824, bsz=128, num_updates=9300, lr=9.99336e-05, gnorm=2.322, loss_scale=1, train_wall=11, gb_free=2.8, wall=107235 2021-06-20 00:26:11 | INFO | train_inner | epoch 004: 353 / 3002 loss=2.62, ppl=6.15, wps=5966.7, ups=0.09, wpb=64836, bsz=128, num_updates=9301, lr=9.99336e-05, gnorm=2.191, loss_scale=1, train_wall=10, gb_free=2.8, wall=107246 2021-06-20 00:26:22 | INFO | train_inner | epoch 004: 354 / 3002 loss=2.871, ppl=7.32, wps=5808.5, ups=0.09, wpb=64715, bsz=128, num_updates=9302, lr=9.99336e-05, gnorm=2.916, loss_scale=2, train_wall=11, gb_free=2.8, wall=107257 2021-06-20 00:26:34 | INFO | train_inner | epoch 004: 355 / 3002 loss=2.696, ppl=6.48, wps=5830.8, ups=0.09, wpb=64916, bsz=128, num_updates=9303, lr=9.99336e-05, gnorm=2.359, loss_scale=2, train_wall=11, gb_free=2.8, wall=107268 2021-06-20 00:26:45 | INFO | train_inner | epoch 004: 356 / 3002 loss=2.626, ppl=6.17, wps=5906.1, ups=0.09, wpb=64891, bsz=128, num_updates=9304, lr=9.99336e-05, gnorm=2.282, loss_scale=2, train_wall=11, gb_free=2.8, wall=107279 2021-06-20 00:26:56 | INFO | train_inner | epoch 004: 357 / 3002 loss=2.704, ppl=6.51, wps=5823.7, ups=0.09, wpb=64894, bsz=128, num_updates=9305, lr=9.99336e-05, gnorm=2.428, loss_scale=2, train_wall=11, gb_free=2.8, wall=107290 2021-06-20 00:27:07 | INFO | train_inner | epoch 004: 358 / 3002 loss=2.682, ppl=6.42, wps=5765.2, ups=0.09, wpb=64845, bsz=128, num_updates=9306, lr=9.99335e-05, gnorm=5.246, loss_scale=2, train_wall=11, gb_free=2.8, wall=107301 2021-06-20 00:27:18 | INFO | train_inner | epoch 004: 359 / 3002 loss=2.491, ppl=5.62, wps=5848, ups=0.09, wpb=64836, bsz=128, num_updates=9307, lr=9.99335e-05, gnorm=2.689, loss_scale=2, train_wall=11, gb_free=2.8, wall=107312 2021-06-20 00:27:29 | INFO | train_inner | epoch 004: 360 / 3002 loss=2.553, ppl=5.87, wps=5795.7, ups=0.09, wpb=64818, bsz=128, num_updates=9308, lr=9.99335e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=107324 2021-06-20 00:27:40 | INFO | train_inner | epoch 004: 361 / 3002 loss=2.65, ppl=6.28, wps=5802.2, ups=0.09, wpb=64831, bsz=128, num_updates=9309, lr=9.99335e-05, gnorm=3.325, loss_scale=2, train_wall=11, gb_free=2.8, wall=107335 2021-06-20 00:27:51 | INFO | train_inner | epoch 004: 362 / 3002 loss=2.657, ppl=6.31, wps=5940.6, ups=0.09, wpb=64802, bsz=128, num_updates=9310, lr=9.99335e-05, gnorm=2.641, loss_scale=2, train_wall=10, gb_free=2.8, wall=107346 2021-06-20 00:28:02 | INFO | train_inner | epoch 004: 363 / 3002 loss=2.698, ppl=6.49, wps=5890.8, ups=0.09, wpb=64851, bsz=128, num_updates=9311, lr=9.99335e-05, gnorm=2.84, loss_scale=2, train_wall=11, gb_free=2.8, wall=107357 2021-06-20 00:28:13 | INFO | train_inner | epoch 004: 364 / 3002 loss=2.768, ppl=6.81, wps=5880.2, ups=0.09, wpb=64812, bsz=128, num_updates=9312, lr=9.99335e-05, gnorm=3.446, loss_scale=2, train_wall=11, gb_free=2.8, wall=107368 2021-06-20 00:28:24 | INFO | train_inner | epoch 004: 365 / 3002 loss=2.666, ppl=6.35, wps=5942.6, ups=0.09, wpb=64775, bsz=128, num_updates=9313, lr=9.99335e-05, gnorm=20.947, loss_scale=2, train_wall=10, gb_free=2.8, wall=107379 2021-06-20 00:28:35 | INFO | train_inner | epoch 004: 366 / 3002 loss=2.624, ppl=6.17, wps=5954.5, ups=0.09, wpb=64891, bsz=128, num_updates=9314, lr=9.99335e-05, gnorm=3.057, loss_scale=2, train_wall=10, gb_free=2.8, wall=107390 2021-06-20 00:28:46 | INFO | train_inner | epoch 004: 367 / 3002 loss=2.679, ppl=6.4, wps=5867.6, ups=0.09, wpb=64832, bsz=128, num_updates=9315, lr=9.99335e-05, gnorm=2.472, loss_scale=2, train_wall=11, gb_free=2.8, wall=107401 2021-06-20 00:28:57 | INFO | train_inner | epoch 004: 368 / 3002 loss=2.609, ppl=6.1, wps=5846.5, ups=0.09, wpb=64773, bsz=128, num_updates=9316, lr=9.99335e-05, gnorm=2.887, loss_scale=2, train_wall=11, gb_free=2.8, wall=107412 2021-06-20 00:29:08 | INFO | train_inner | epoch 004: 369 / 3002 loss=2.513, ppl=5.71, wps=5975.2, ups=0.09, wpb=64882, bsz=128, num_updates=9317, lr=9.99335e-05, gnorm=2.295, loss_scale=2, train_wall=10, gb_free=2.8, wall=107423 2021-06-20 00:29:19 | INFO | train_inner | epoch 004: 370 / 3002 loss=2.725, ppl=6.61, wps=5820.6, ups=0.09, wpb=64755, bsz=128, num_updates=9318, lr=9.99335e-05, gnorm=2.659, loss_scale=2, train_wall=11, gb_free=2.8, wall=107434 2021-06-20 00:29:31 | INFO | train_inner | epoch 004: 371 / 3002 loss=2.763, ppl=6.79, wps=5780.2, ups=0.09, wpb=64846, bsz=128, num_updates=9319, lr=9.99334e-05, gnorm=5.275, loss_scale=2, train_wall=11, gb_free=2.8, wall=107445 2021-06-20 00:29:42 | INFO | train_inner | epoch 004: 372 / 3002 loss=2.594, ppl=6.04, wps=5734.8, ups=0.09, wpb=64750, bsz=128, num_updates=9320, lr=9.99334e-05, gnorm=13.491, loss_scale=2, train_wall=11, gb_free=2.8, wall=107456 2021-06-20 00:29:53 | INFO | train_inner | epoch 004: 373 / 3002 loss=2.78, ppl=6.87, wps=5857.3, ups=0.09, wpb=64851, bsz=128, num_updates=9321, lr=9.99334e-05, gnorm=2.396, loss_scale=2, train_wall=11, gb_free=2.8, wall=107467 2021-06-20 00:30:04 | INFO | train_inner | epoch 004: 374 / 3002 loss=2.732, ppl=6.64, wps=5918.3, ups=0.09, wpb=64815, bsz=128, num_updates=9322, lr=9.99334e-05, gnorm=3.121, loss_scale=2, train_wall=10, gb_free=2.8, wall=107478 2021-06-20 00:30:15 | INFO | train_inner | epoch 004: 375 / 3002 loss=2.679, ppl=6.4, wps=5930.8, ups=0.09, wpb=64831, bsz=128, num_updates=9323, lr=9.99334e-05, gnorm=2.283, loss_scale=2, train_wall=10, gb_free=2.8, wall=107489 2021-06-20 00:30:26 | INFO | train_inner | epoch 004: 376 / 3002 loss=2.607, ppl=6.09, wps=5785.4, ups=0.09, wpb=64838, bsz=128, num_updates=9324, lr=9.99334e-05, gnorm=2.979, loss_scale=2, train_wall=11, gb_free=2.8, wall=107500 2021-06-20 00:30:37 | INFO | train_inner | epoch 004: 377 / 3002 loss=2.781, ppl=6.88, wps=5758, ups=0.09, wpb=64804, bsz=128, num_updates=9325, lr=9.99334e-05, gnorm=2.377, loss_scale=2, train_wall=11, gb_free=2.8, wall=107512 2021-06-20 00:30:48 | INFO | train_inner | epoch 004: 378 / 3002 loss=2.734, ppl=6.65, wps=5805.4, ups=0.09, wpb=64732, bsz=128, num_updates=9326, lr=9.99334e-05, gnorm=2.322, loss_scale=2, train_wall=11, gb_free=2.8, wall=107523 2021-06-20 00:30:59 | INFO | train_inner | epoch 004: 379 / 3002 loss=2.553, ppl=5.87, wps=5849.7, ups=0.09, wpb=64810, bsz=128, num_updates=9327, lr=9.99334e-05, gnorm=2.589, loss_scale=2, train_wall=11, gb_free=2.8, wall=107534 2021-06-20 00:31:11 | INFO | train_inner | epoch 004: 380 / 3002 loss=2.606, ppl=6.09, wps=5807.3, ups=0.09, wpb=64868, bsz=128, num_updates=9328, lr=9.99334e-05, gnorm=2.242, loss_scale=2, train_wall=11, gb_free=2.8, wall=107545 2021-06-20 00:31:22 | INFO | train_inner | epoch 004: 381 / 3002 loss=2.737, ppl=6.67, wps=5839.6, ups=0.09, wpb=64764, bsz=128, num_updates=9329, lr=9.99334e-05, gnorm=2.244, loss_scale=2, train_wall=11, gb_free=2.8, wall=107556 2021-06-20 00:31:33 | INFO | train_inner | epoch 004: 382 / 3002 loss=2.554, ppl=5.87, wps=5841.2, ups=0.09, wpb=64780, bsz=128, num_updates=9330, lr=9.99334e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=107567 2021-06-20 00:31:44 | INFO | train_inner | epoch 004: 383 / 3002 loss=2.58, ppl=5.98, wps=5936.2, ups=0.09, wpb=64861, bsz=128, num_updates=9331, lr=9.99333e-05, gnorm=2.184, loss_scale=2, train_wall=10, gb_free=2.8, wall=107578 2021-06-20 00:31:55 | INFO | train_inner | epoch 004: 384 / 3002 loss=2.568, ppl=5.93, wps=5753.3, ups=0.09, wpb=64828, bsz=128, num_updates=9332, lr=9.99333e-05, gnorm=2.153, loss_scale=2, train_wall=11, gb_free=2.8, wall=107589 2021-06-20 00:32:06 | INFO | train_inner | epoch 004: 385 / 3002 loss=2.541, ppl=5.82, wps=5802, ups=0.09, wpb=64783, bsz=128, num_updates=9333, lr=9.99333e-05, gnorm=2.303, loss_scale=2, train_wall=11, gb_free=2.8, wall=107601 2021-06-20 00:32:17 | INFO | train_inner | epoch 004: 386 / 3002 loss=2.678, ppl=6.4, wps=5911, ups=0.09, wpb=64889, bsz=128, num_updates=9334, lr=9.99333e-05, gnorm=2.325, loss_scale=2, train_wall=11, gb_free=2.8, wall=107611 2021-06-20 00:32:28 | INFO | train_inner | epoch 004: 387 / 3002 loss=2.755, ppl=6.75, wps=5855.9, ups=0.09, wpb=64795, bsz=128, num_updates=9335, lr=9.99333e-05, gnorm=2.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=107623 2021-06-20 00:32:39 | INFO | train_inner | epoch 004: 388 / 3002 loss=2.551, ppl=5.86, wps=5816.3, ups=0.09, wpb=64813, bsz=128, num_updates=9336, lr=9.99333e-05, gnorm=3.02, loss_scale=2, train_wall=11, gb_free=2.8, wall=107634 2021-06-20 00:32:50 | INFO | train_inner | epoch 004: 389 / 3002 loss=2.436, ppl=5.41, wps=5844.6, ups=0.09, wpb=64766, bsz=128, num_updates=9337, lr=9.99333e-05, gnorm=2.139, loss_scale=2, train_wall=11, gb_free=2.8, wall=107645 2021-06-20 00:33:02 | INFO | train_inner | epoch 004: 390 / 3002 loss=2.482, ppl=5.59, wps=5820.7, ups=0.09, wpb=64867, bsz=128, num_updates=9338, lr=9.99333e-05, gnorm=2.143, loss_scale=2, train_wall=11, gb_free=2.8, wall=107656 2021-06-20 00:33:13 | INFO | train_inner | epoch 004: 391 / 3002 loss=2.581, ppl=5.98, wps=5916.7, ups=0.09, wpb=64862, bsz=128, num_updates=9339, lr=9.99333e-05, gnorm=2.453, loss_scale=2, train_wall=10, gb_free=2.8, wall=107667 2021-06-20 00:33:24 | INFO | train_inner | epoch 004: 392 / 3002 loss=2.475, ppl=5.56, wps=5902.5, ups=0.09, wpb=64813, bsz=128, num_updates=9340, lr=9.99333e-05, gnorm=2.32, loss_scale=2, train_wall=11, gb_free=2.8, wall=107678 2021-06-20 00:33:35 | INFO | train_inner | epoch 004: 393 / 3002 loss=2.53, ppl=5.77, wps=5745.3, ups=0.09, wpb=64864, bsz=128, num_updates=9341, lr=9.99333e-05, gnorm=2.185, loss_scale=2, train_wall=11, gb_free=2.8, wall=107689 2021-06-20 00:33:46 | INFO | train_inner | epoch 004: 394 / 3002 loss=2.655, ppl=6.3, wps=5958.9, ups=0.09, wpb=64814, bsz=128, num_updates=9342, lr=9.99333e-05, gnorm=2.125, loss_scale=2, train_wall=10, gb_free=2.8, wall=107700 2021-06-20 00:33:57 | INFO | train_inner | epoch 004: 395 / 3002 loss=2.717, ppl=6.58, wps=5985.2, ups=0.09, wpb=64749, bsz=128, num_updates=9343, lr=9.99333e-05, gnorm=2.165, loss_scale=2, train_wall=10, gb_free=2.8, wall=107711 2021-06-20 00:34:08 | INFO | train_inner | epoch 004: 396 / 3002 loss=2.617, ppl=6.13, wps=5836.9, ups=0.09, wpb=64932, bsz=128, num_updates=9344, lr=9.99332e-05, gnorm=2.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=107722 2021-06-20 00:34:19 | INFO | train_inner | epoch 004: 397 / 3002 loss=2.828, ppl=7.1, wps=5849.8, ups=0.09, wpb=64849, bsz=128, num_updates=9345, lr=9.99332e-05, gnorm=2.151, loss_scale=2, train_wall=11, gb_free=2.8, wall=107733 2021-06-20 00:34:30 | INFO | train_inner | epoch 004: 398 / 3002 loss=2.556, ppl=5.88, wps=5799.9, ups=0.09, wpb=64875, bsz=128, num_updates=9346, lr=9.99332e-05, gnorm=2.047, loss_scale=2, train_wall=11, gb_free=2.8, wall=107744 2021-06-20 00:34:41 | INFO | train_inner | epoch 004: 399 / 3002 loss=2.768, ppl=6.81, wps=5842, ups=0.09, wpb=64734, bsz=128, num_updates=9347, lr=9.99332e-05, gnorm=2.449, loss_scale=2, train_wall=11, gb_free=2.8, wall=107755 2021-06-20 00:34:52 | INFO | train_inner | epoch 004: 400 / 3002 loss=2.607, ppl=6.09, wps=5851.4, ups=0.09, wpb=64834, bsz=128, num_updates=9348, lr=9.99332e-05, gnorm=2.22, loss_scale=2, train_wall=11, gb_free=2.8, wall=107766 2021-06-20 00:35:03 | INFO | train_inner | epoch 004: 401 / 3002 loss=2.424, ppl=5.37, wps=5796.2, ups=0.09, wpb=64770, bsz=128, num_updates=9349, lr=9.99332e-05, gnorm=2.299, loss_scale=2, train_wall=11, gb_free=2.8, wall=107778 2021-06-20 00:35:14 | INFO | train_inner | epoch 004: 402 / 3002 loss=2.615, ppl=6.13, wps=5847.4, ups=0.09, wpb=64710, bsz=128, num_updates=9350, lr=9.99332e-05, gnorm=2.303, loss_scale=2, train_wall=11, gb_free=2.8, wall=107789 2021-06-20 00:35:25 | INFO | train_inner | epoch 004: 403 / 3002 loss=2.548, ppl=5.85, wps=5903, ups=0.09, wpb=64834, bsz=128, num_updates=9351, lr=9.99332e-05, gnorm=2.05, loss_scale=2, train_wall=11, gb_free=2.8, wall=107800 2021-06-20 00:35:36 | INFO | train_inner | epoch 004: 404 / 3002 loss=2.546, ppl=5.84, wps=5932.3, ups=0.09, wpb=64842, bsz=128, num_updates=9352, lr=9.99332e-05, gnorm=2.168, loss_scale=2, train_wall=10, gb_free=2.8, wall=107811 2021-06-20 00:35:47 | INFO | train_inner | epoch 004: 405 / 3002 loss=2.712, ppl=6.55, wps=5785.2, ups=0.09, wpb=64820, bsz=128, num_updates=9353, lr=9.99332e-05, gnorm=2.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=107822 2021-06-20 00:35:58 | INFO | train_inner | epoch 004: 406 / 3002 loss=2.583, ppl=5.99, wps=5903.1, ups=0.09, wpb=64868, bsz=128, num_updates=9354, lr=9.99332e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=107833 2021-06-20 00:36:09 | INFO | train_inner | epoch 004: 407 / 3002 loss=2.532, ppl=5.78, wps=5969.4, ups=0.09, wpb=64887, bsz=128, num_updates=9355, lr=9.99332e-05, gnorm=2.12, loss_scale=2, train_wall=10, gb_free=2.8, wall=107844 2021-06-20 00:36:20 | INFO | train_inner | epoch 004: 408 / 3002 loss=2.549, ppl=5.85, wps=5861.1, ups=0.09, wpb=64852, bsz=128, num_updates=9356, lr=9.99331e-05, gnorm=2.281, loss_scale=2, train_wall=11, gb_free=2.8, wall=107855 2021-06-20 00:36:32 | INFO | train_inner | epoch 004: 409 / 3002 loss=2.643, ppl=6.25, wps=5784.2, ups=0.09, wpb=64830, bsz=128, num_updates=9357, lr=9.99331e-05, gnorm=2.081, loss_scale=2, train_wall=11, gb_free=2.8, wall=107866 2021-06-20 00:36:43 | INFO | train_inner | epoch 004: 410 / 3002 loss=2.6, ppl=6.06, wps=5802, ups=0.09, wpb=64760, bsz=128, num_updates=9358, lr=9.99331e-05, gnorm=2.386, loss_scale=2, train_wall=11, gb_free=2.8, wall=107877 2021-06-20 00:36:54 | INFO | train_inner | epoch 004: 411 / 3002 loss=2.669, ppl=6.36, wps=5886.4, ups=0.09, wpb=64808, bsz=128, num_updates=9359, lr=9.99331e-05, gnorm=2.582, loss_scale=2, train_wall=11, gb_free=2.8, wall=107888 2021-06-20 00:37:05 | INFO | train_inner | epoch 004: 412 / 3002 loss=2.575, ppl=5.96, wps=5936.7, ups=0.09, wpb=64836, bsz=128, num_updates=9360, lr=9.99331e-05, gnorm=2.317, loss_scale=2, train_wall=10, gb_free=2.8, wall=107899 2021-06-20 00:37:16 | INFO | train_inner | epoch 004: 413 / 3002 loss=2.545, ppl=5.84, wps=5765.5, ups=0.09, wpb=64856, bsz=128, num_updates=9361, lr=9.99331e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=107910 2021-06-20 00:37:27 | INFO | train_inner | epoch 004: 414 / 3002 loss=2.574, ppl=5.96, wps=5969.7, ups=0.09, wpb=64817, bsz=128, num_updates=9362, lr=9.99331e-05, gnorm=5.134, loss_scale=2, train_wall=10, gb_free=2.8, wall=107921 2021-06-20 00:37:38 | INFO | train_inner | epoch 004: 415 / 3002 loss=2.566, ppl=5.92, wps=5823.1, ups=0.09, wpb=64859, bsz=128, num_updates=9363, lr=9.99331e-05, gnorm=2.103, loss_scale=2, train_wall=11, gb_free=2.8, wall=107932 2021-06-20 00:37:49 | INFO | train_inner | epoch 004: 416 / 3002 loss=2.58, ppl=5.98, wps=5851.6, ups=0.09, wpb=64872, bsz=128, num_updates=9364, lr=9.99331e-05, gnorm=2.404, loss_scale=2, train_wall=11, gb_free=2.8, wall=107943 2021-06-20 00:38:00 | INFO | train_inner | epoch 004: 417 / 3002 loss=2.48, ppl=5.58, wps=5932.9, ups=0.09, wpb=64907, bsz=128, num_updates=9365, lr=9.99331e-05, gnorm=2.599, loss_scale=2, train_wall=11, gb_free=2.8, wall=107954 2021-06-20 00:38:11 | INFO | train_inner | epoch 004: 418 / 3002 loss=2.57, ppl=5.94, wps=5821.9, ups=0.09, wpb=64835, bsz=128, num_updates=9366, lr=9.99331e-05, gnorm=2.474, loss_scale=2, train_wall=11, gb_free=2.8, wall=107965 2021-06-20 00:38:22 | INFO | train_inner | epoch 004: 419 / 3002 loss=2.568, ppl=5.93, wps=5851.9, ups=0.09, wpb=64803, bsz=128, num_updates=9367, lr=9.99331e-05, gnorm=2.468, loss_scale=2, train_wall=11, gb_free=2.8, wall=107976 2021-06-20 00:38:33 | INFO | train_inner | epoch 004: 420 / 3002 loss=2.65, ppl=6.28, wps=5859.2, ups=0.09, wpb=64892, bsz=128, num_updates=9368, lr=9.99331e-05, gnorm=2.246, loss_scale=2, train_wall=11, gb_free=2.8, wall=107988 2021-06-20 00:38:44 | INFO | train_inner | epoch 004: 421 / 3002 loss=2.669, ppl=6.36, wps=5807.9, ups=0.09, wpb=64916, bsz=128, num_updates=9369, lr=9.9933e-05, gnorm=9.313, loss_scale=2, train_wall=11, gb_free=2.8, wall=107999 2021-06-20 00:38:55 | INFO | train_inner | epoch 004: 422 / 3002 loss=2.524, ppl=5.75, wps=5981, ups=0.09, wpb=64777, bsz=128, num_updates=9370, lr=9.9933e-05, gnorm=2.195, loss_scale=2, train_wall=10, gb_free=2.8, wall=108010 2021-06-20 00:39:06 | INFO | train_inner | epoch 004: 423 / 3002 loss=2.499, ppl=5.65, wps=5792.4, ups=0.09, wpb=64754, bsz=128, num_updates=9371, lr=9.9933e-05, gnorm=2.384, loss_scale=2, train_wall=11, gb_free=2.8, wall=108021 2021-06-20 00:39:17 | INFO | train_inner | epoch 004: 424 / 3002 loss=2.648, ppl=6.27, wps=5981.6, ups=0.09, wpb=64867, bsz=128, num_updates=9372, lr=9.9933e-05, gnorm=2.388, loss_scale=2, train_wall=10, gb_free=2.8, wall=108032 2021-06-20 00:39:28 | INFO | train_inner | epoch 004: 425 / 3002 loss=2.585, ppl=6, wps=5762.3, ups=0.09, wpb=64784, bsz=128, num_updates=9373, lr=9.9933e-05, gnorm=2.591, loss_scale=2, train_wall=11, gb_free=2.8, wall=108043 2021-06-20 00:39:39 | INFO | train_inner | epoch 004: 426 / 3002 loss=2.585, ppl=6, wps=5925.3, ups=0.09, wpb=64860, bsz=128, num_updates=9374, lr=9.9933e-05, gnorm=2.444, loss_scale=2, train_wall=10, gb_free=2.8, wall=108054 2021-06-20 00:39:50 | INFO | train_inner | epoch 004: 427 / 3002 loss=2.395, ppl=5.26, wps=5922.1, ups=0.09, wpb=64877, bsz=128, num_updates=9375, lr=9.9933e-05, gnorm=3.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=108065 2021-06-20 00:40:01 | INFO | train_inner | epoch 004: 428 / 3002 loss=2.66, ppl=6.32, wps=5870.3, ups=0.09, wpb=64830, bsz=128, num_updates=9376, lr=9.9933e-05, gnorm=2.18, loss_scale=2, train_wall=11, gb_free=2.8, wall=108076 2021-06-20 00:40:13 | INFO | train_inner | epoch 004: 429 / 3002 loss=2.678, ppl=6.4, wps=5811.5, ups=0.09, wpb=64812, bsz=128, num_updates=9377, lr=9.9933e-05, gnorm=2.061, loss_scale=2, train_wall=11, gb_free=2.8, wall=108087 2021-06-20 00:40:24 | INFO | train_inner | epoch 004: 430 / 3002 loss=2.65, ppl=6.28, wps=5832, ups=0.09, wpb=64856, bsz=128, num_updates=9378, lr=9.9933e-05, gnorm=2.148, loss_scale=2, train_wall=11, gb_free=2.8, wall=108098 2021-06-20 00:40:35 | INFO | train_inner | epoch 004: 431 / 3002 loss=2.737, ppl=6.67, wps=5788.4, ups=0.09, wpb=64766, bsz=128, num_updates=9379, lr=9.9933e-05, gnorm=6.462, loss_scale=2, train_wall=11, gb_free=2.8, wall=108109 2021-06-20 00:40:46 | INFO | train_inner | epoch 004: 432 / 3002 loss=2.688, ppl=6.45, wps=5860.1, ups=0.09, wpb=64783, bsz=128, num_updates=9380, lr=9.9933e-05, gnorm=2.185, loss_scale=2, train_wall=11, gb_free=2.8, wall=108120 2021-06-20 00:40:57 | INFO | train_inner | epoch 004: 433 / 3002 loss=2.748, ppl=6.72, wps=5934.6, ups=0.09, wpb=64836, bsz=128, num_updates=9381, lr=9.99329e-05, gnorm=3.639, loss_scale=2, train_wall=10, gb_free=2.8, wall=108131 2021-06-20 00:41:08 | INFO | train_inner | epoch 004: 434 / 3002 loss=2.613, ppl=6.12, wps=5868.9, ups=0.09, wpb=64903, bsz=128, num_updates=9382, lr=9.99329e-05, gnorm=3.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=108142 2021-06-20 00:41:19 | INFO | train_inner | epoch 004: 435 / 3002 loss=2.591, ppl=6.02, wps=5916.5, ups=0.09, wpb=64831, bsz=128, num_updates=9383, lr=9.99329e-05, gnorm=2.123, loss_scale=2, train_wall=10, gb_free=2.8, wall=108153 2021-06-20 00:41:30 | INFO | train_inner | epoch 004: 436 / 3002 loss=2.71, ppl=6.55, wps=5738.5, ups=0.09, wpb=64789, bsz=128, num_updates=9384, lr=9.99329e-05, gnorm=2.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=108165 2021-06-20 00:41:41 | INFO | train_inner | epoch 004: 437 / 3002 loss=2.611, ppl=6.11, wps=5866.7, ups=0.09, wpb=64779, bsz=128, num_updates=9385, lr=9.99329e-05, gnorm=2.858, loss_scale=2, train_wall=11, gb_free=2.8, wall=108176 2021-06-20 00:41:52 | INFO | train_inner | epoch 004: 438 / 3002 loss=2.736, ppl=6.66, wps=5856.1, ups=0.09, wpb=64869, bsz=128, num_updates=9386, lr=9.99329e-05, gnorm=2.357, loss_scale=2, train_wall=11, gb_free=2.8, wall=108187 2021-06-20 00:42:03 | INFO | train_inner | epoch 004: 439 / 3002 loss=2.698, ppl=6.49, wps=5982.2, ups=0.09, wpb=64829, bsz=128, num_updates=9387, lr=9.99329e-05, gnorm=6.58, loss_scale=2, train_wall=10, gb_free=2.8, wall=108197 2021-06-20 00:42:14 | INFO | train_inner | epoch 004: 440 / 3002 loss=2.632, ppl=6.2, wps=5965.1, ups=0.09, wpb=64857, bsz=128, num_updates=9388, lr=9.99329e-05, gnorm=2.228, loss_scale=2, train_wall=10, gb_free=2.8, wall=108208 2021-06-20 00:42:25 | INFO | train_inner | epoch 004: 441 / 3002 loss=2.547, ppl=5.84, wps=5771, ups=0.09, wpb=64724, bsz=128, num_updates=9389, lr=9.99329e-05, gnorm=2.184, loss_scale=2, train_wall=11, gb_free=2.8, wall=108220 2021-06-20 00:42:36 | INFO | train_inner | epoch 004: 442 / 3002 loss=2.648, ppl=6.27, wps=5872.4, ups=0.09, wpb=64729, bsz=128, num_updates=9390, lr=9.99329e-05, gnorm=2.202, loss_scale=2, train_wall=11, gb_free=2.8, wall=108231 2021-06-20 00:42:47 | INFO | train_inner | epoch 004: 443 / 3002 loss=2.62, ppl=6.15, wps=5914.7, ups=0.09, wpb=64846, bsz=128, num_updates=9391, lr=9.99329e-05, gnorm=2.105, loss_scale=2, train_wall=11, gb_free=2.8, wall=108242 2021-06-20 00:42:58 | INFO | train_inner | epoch 004: 444 / 3002 loss=2.709, ppl=6.54, wps=5883.8, ups=0.09, wpb=64715, bsz=128, num_updates=9392, lr=9.99329e-05, gnorm=2.952, loss_scale=2, train_wall=11, gb_free=2.8, wall=108253 2021-06-20 00:43:09 | INFO | train_inner | epoch 004: 445 / 3002 loss=2.51, ppl=5.7, wps=5851, ups=0.09, wpb=64812, bsz=128, num_updates=9393, lr=9.99329e-05, gnorm=2.148, loss_scale=2, train_wall=11, gb_free=2.8, wall=108264 2021-06-20 00:43:21 | INFO | train_inner | epoch 004: 446 / 3002 loss=2.544, ppl=5.83, wps=5771, ups=0.09, wpb=64839, bsz=128, num_updates=9394, lr=9.99328e-05, gnorm=2.166, loss_scale=2, train_wall=11, gb_free=2.8, wall=108275 2021-06-20 00:43:31 | INFO | train_inner | epoch 004: 447 / 3002 loss=2.444, ppl=5.44, wps=5968.1, ups=0.09, wpb=64875, bsz=128, num_updates=9395, lr=9.99328e-05, gnorm=2.279, loss_scale=2, train_wall=10, gb_free=2.8, wall=108286 2021-06-20 00:43:42 | INFO | train_inner | epoch 004: 448 / 3002 loss=2.611, ppl=6.11, wps=5893.2, ups=0.09, wpb=64816, bsz=128, num_updates=9396, lr=9.99328e-05, gnorm=2.589, loss_scale=2, train_wall=11, gb_free=2.8, wall=108297 2021-06-20 00:43:54 | INFO | train_inner | epoch 004: 449 / 3002 loss=2.524, ppl=5.75, wps=5748.1, ups=0.09, wpb=64793, bsz=128, num_updates=9397, lr=9.99328e-05, gnorm=2.135, loss_scale=2, train_wall=11, gb_free=2.8, wall=108308 2021-06-20 00:44:05 | INFO | train_inner | epoch 004: 450 / 3002 loss=2.45, ppl=5.46, wps=5862.7, ups=0.09, wpb=64851, bsz=128, num_updates=9398, lr=9.99328e-05, gnorm=3.454, loss_scale=2, train_wall=11, gb_free=2.8, wall=108319 2021-06-20 00:44:16 | INFO | train_inner | epoch 004: 451 / 3002 loss=2.631, ppl=6.2, wps=5818, ups=0.09, wpb=64846, bsz=128, num_updates=9399, lr=9.99328e-05, gnorm=2.468, loss_scale=2, train_wall=11, gb_free=2.8, wall=108330 2021-06-20 00:44:27 | INFO | train_inner | epoch 004: 452 / 3002 loss=2.519, ppl=5.73, wps=5807.6, ups=0.09, wpb=64814, bsz=128, num_updates=9400, lr=9.99328e-05, gnorm=2.271, loss_scale=2, train_wall=11, gb_free=2.8, wall=108341 2021-06-20 00:44:38 | INFO | train_inner | epoch 004: 453 / 3002 loss=2.61, ppl=6.1, wps=5761.7, ups=0.09, wpb=64836, bsz=128, num_updates=9401, lr=9.99328e-05, gnorm=4.066, loss_scale=2, train_wall=11, gb_free=2.8, wall=108353 2021-06-20 00:44:49 | INFO | train_inner | epoch 004: 454 / 3002 loss=2.45, ppl=5.47, wps=5845.3, ups=0.09, wpb=64775, bsz=128, num_updates=9402, lr=9.99328e-05, gnorm=2.146, loss_scale=2, train_wall=11, gb_free=2.8, wall=108364 2021-06-20 00:45:01 | INFO | train_inner | epoch 004: 455 / 3002 loss=2.581, ppl=5.98, wps=5815.7, ups=0.09, wpb=64903, bsz=128, num_updates=9403, lr=9.99328e-05, gnorm=2.731, loss_scale=2, train_wall=11, gb_free=2.8, wall=108375 2021-06-20 00:45:11 | INFO | train_inner | epoch 004: 456 / 3002 loss=2.681, ppl=6.41, wps=5942.4, ups=0.09, wpb=64798, bsz=128, num_updates=9404, lr=9.99328e-05, gnorm=2.104, loss_scale=2, train_wall=10, gb_free=2.8, wall=108386 2021-06-20 00:45:23 | INFO | train_inner | epoch 004: 457 / 3002 loss=2.647, ppl=6.26, wps=5811, ups=0.09, wpb=64764, bsz=128, num_updates=9405, lr=9.99328e-05, gnorm=2.165, loss_scale=2, train_wall=11, gb_free=2.8, wall=108397 2021-06-20 00:45:33 | INFO | train_inner | epoch 004: 458 / 3002 loss=2.551, ppl=5.86, wps=5951.2, ups=0.09, wpb=64852, bsz=128, num_updates=9406, lr=9.99327e-05, gnorm=2.111, loss_scale=2, train_wall=10, gb_free=2.8, wall=108408 2021-06-20 00:45:45 | INFO | train_inner | epoch 004: 459 / 3002 loss=2.645, ppl=6.26, wps=5840.1, ups=0.09, wpb=64777, bsz=128, num_updates=9407, lr=9.99327e-05, gnorm=2.175, loss_scale=2, train_wall=11, gb_free=2.8, wall=108419 2021-06-20 00:45:56 | INFO | train_inner | epoch 004: 460 / 3002 loss=2.614, ppl=6.12, wps=5928, ups=0.09, wpb=64733, bsz=128, num_updates=9408, lr=9.99327e-05, gnorm=2.419, loss_scale=2, train_wall=10, gb_free=2.8, wall=108430 2021-06-20 00:46:07 | INFO | train_inner | epoch 004: 461 / 3002 loss=2.586, ppl=6.01, wps=5869.3, ups=0.09, wpb=64859, bsz=128, num_updates=9409, lr=9.99327e-05, gnorm=2.094, loss_scale=2, train_wall=11, gb_free=2.8, wall=108441 2021-06-20 00:46:18 | INFO | train_inner | epoch 004: 462 / 3002 loss=2.422, ppl=5.36, wps=5881, ups=0.09, wpb=64792, bsz=128, num_updates=9410, lr=9.99327e-05, gnorm=2.321, loss_scale=2, train_wall=11, gb_free=2.8, wall=108452 2021-06-20 00:46:29 | INFO | train_inner | epoch 004: 463 / 3002 loss=2.672, ppl=6.37, wps=5868.4, ups=0.09, wpb=64779, bsz=128, num_updates=9411, lr=9.99327e-05, gnorm=2.778, loss_scale=2, train_wall=11, gb_free=2.8, wall=108463 2021-06-20 00:46:40 | INFO | train_inner | epoch 004: 464 / 3002 loss=2.576, ppl=5.96, wps=5906.3, ups=0.09, wpb=64747, bsz=128, num_updates=9412, lr=9.99327e-05, gnorm=17.074, loss_scale=2, train_wall=10, gb_free=2.8, wall=108474 2021-06-20 00:46:51 | INFO | train_inner | epoch 004: 465 / 3002 loss=2.735, ppl=6.66, wps=5813.8, ups=0.09, wpb=64809, bsz=128, num_updates=9413, lr=9.99327e-05, gnorm=2.185, loss_scale=2, train_wall=11, gb_free=2.8, wall=108485 2021-06-20 00:47:02 | INFO | train_inner | epoch 004: 466 / 3002 loss=2.605, ppl=6.09, wps=5964.9, ups=0.09, wpb=64804, bsz=128, num_updates=9414, lr=9.99327e-05, gnorm=2.099, loss_scale=2, train_wall=10, gb_free=2.8, wall=108496 2021-06-20 00:47:13 | INFO | train_inner | epoch 004: 467 / 3002 loss=2.465, ppl=5.52, wps=5852.3, ups=0.09, wpb=64881, bsz=128, num_updates=9415, lr=9.99327e-05, gnorm=2.13, loss_scale=2, train_wall=11, gb_free=2.8, wall=108507 2021-06-20 00:47:24 | INFO | train_inner | epoch 004: 468 / 3002 loss=2.512, ppl=5.71, wps=5929.9, ups=0.09, wpb=64814, bsz=128, num_updates=9416, lr=9.99327e-05, gnorm=2.241, loss_scale=2, train_wall=10, gb_free=2.8, wall=108518 2021-06-20 00:47:35 | INFO | train_inner | epoch 004: 469 / 3002 loss=2.731, ppl=6.64, wps=5805.7, ups=0.09, wpb=64814, bsz=128, num_updates=9417, lr=9.99327e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=108529 2021-06-20 00:47:46 | INFO | train_inner | epoch 004: 470 / 3002 loss=2.599, ppl=6.06, wps=5704.1, ups=0.09, wpb=64774, bsz=128, num_updates=9418, lr=9.99327e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=108540 2021-06-20 00:47:57 | INFO | train_inner | epoch 004: 471 / 3002 loss=2.794, ppl=6.94, wps=5873.3, ups=0.09, wpb=64849, bsz=128, num_updates=9419, lr=9.99326e-05, gnorm=2.822, loss_scale=2, train_wall=11, gb_free=2.8, wall=108552 2021-06-20 00:48:08 | INFO | train_inner | epoch 004: 472 / 3002 loss=2.554, ppl=5.87, wps=5780, ups=0.09, wpb=64874, bsz=128, num_updates=9420, lr=9.99326e-05, gnorm=2.333, loss_scale=2, train_wall=11, gb_free=2.8, wall=108563 2021-06-20 00:48:19 | INFO | train_inner | epoch 004: 473 / 3002 loss=2.52, ppl=5.73, wps=5856.4, ups=0.09, wpb=64894, bsz=128, num_updates=9421, lr=9.99326e-05, gnorm=3.897, loss_scale=2, train_wall=11, gb_free=2.8, wall=108574 2021-06-20 00:48:31 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 2021-06-20 00:48:42 | INFO | train_inner | epoch 004: 475 / 3002 loss=2.563, ppl=5.91, wps=2906.8, ups=0.04, wpb=64802, bsz=128, num_updates=9422, lr=9.99326e-05, gnorm=2.925, loss_scale=1, train_wall=21, gb_free=2.8, wall=108596 2021-06-20 00:48:53 | INFO | train_inner | epoch 004: 476 / 3002 loss=2.609, ppl=6.1, wps=5844.3, ups=0.09, wpb=64838, bsz=128, num_updates=9423, lr=9.99326e-05, gnorm=2.412, loss_scale=1, train_wall=11, gb_free=2.8, wall=108607 2021-06-20 00:49:04 | INFO | train_inner | epoch 004: 477 / 3002 loss=2.596, ppl=6.04, wps=5906.6, ups=0.09, wpb=64897, bsz=128, num_updates=9424, lr=9.99326e-05, gnorm=2.413, loss_scale=1, train_wall=11, gb_free=2.8, wall=108618 2021-06-20 00:49:15 | INFO | train_inner | epoch 004: 478 / 3002 loss=2.552, ppl=5.87, wps=5925.5, ups=0.09, wpb=64868, bsz=128, num_updates=9425, lr=9.99326e-05, gnorm=2.152, loss_scale=1, train_wall=11, gb_free=2.8, wall=108629 2021-06-20 00:49:26 | INFO | train_inner | epoch 004: 479 / 3002 loss=2.864, ppl=7.28, wps=5720.3, ups=0.09, wpb=64705, bsz=128, num_updates=9426, lr=9.99326e-05, gnorm=2.305, loss_scale=1, train_wall=11, gb_free=2.8, wall=108640 2021-06-20 00:49:37 | INFO | train_inner | epoch 004: 480 / 3002 loss=2.588, ppl=6.01, wps=5708.6, ups=0.09, wpb=64851, bsz=128, num_updates=9427, lr=9.99326e-05, gnorm=3.71, loss_scale=1, train_wall=11, gb_free=2.8, wall=108652 2021-06-20 00:49:49 | INFO | train_inner | epoch 004: 481 / 3002 loss=2.762, ppl=6.78, wps=5832.4, ups=0.09, wpb=64773, bsz=128, num_updates=9428, lr=9.99326e-05, gnorm=2.371, loss_scale=1, train_wall=11, gb_free=2.8, wall=108663 2021-06-20 00:50:00 | INFO | train_inner | epoch 004: 482 / 3002 loss=2.726, ppl=6.62, wps=5906, ups=0.09, wpb=64835, bsz=128, num_updates=9429, lr=9.99326e-05, gnorm=2.681, loss_scale=1, train_wall=11, gb_free=2.8, wall=108674 2021-06-20 00:50:11 | INFO | train_inner | epoch 004: 483 / 3002 loss=2.673, ppl=6.38, wps=5810.1, ups=0.09, wpb=64737, bsz=128, num_updates=9430, lr=9.99326e-05, gnorm=2.098, loss_scale=1, train_wall=11, gb_free=2.8, wall=108685 2021-06-20 00:50:22 | INFO | train_inner | epoch 004: 484 / 3002 loss=2.723, ppl=6.6, wps=5734.2, ups=0.09, wpb=64842, bsz=128, num_updates=9431, lr=9.99325e-05, gnorm=2.158, loss_scale=1, train_wall=11, gb_free=2.8, wall=108696 2021-06-20 00:50:33 | INFO | train_inner | epoch 004: 485 / 3002 loss=2.644, ppl=6.25, wps=5757.7, ups=0.09, wpb=64868, bsz=128, num_updates=9432, lr=9.99325e-05, gnorm=2.072, loss_scale=1, train_wall=11, gb_free=2.8, wall=108708 2021-06-20 00:50:45 | INFO | train_inner | epoch 004: 486 / 3002 loss=2.683, ppl=6.42, wps=5759.8, ups=0.09, wpb=64775, bsz=128, num_updates=9433, lr=9.99325e-05, gnorm=2.416, loss_scale=1, train_wall=11, gb_free=2.8, wall=108719 2021-06-20 00:50:56 | INFO | train_inner | epoch 004: 487 / 3002 loss=2.572, ppl=5.95, wps=5811.7, ups=0.09, wpb=64811, bsz=128, num_updates=9434, lr=9.99325e-05, gnorm=2.278, loss_scale=1, train_wall=11, gb_free=2.8, wall=108730 2021-06-20 00:51:07 | INFO | train_inner | epoch 004: 488 / 3002 loss=2.74, ppl=6.68, wps=5777.6, ups=0.09, wpb=64739, bsz=128, num_updates=9435, lr=9.99325e-05, gnorm=2.353, loss_scale=1, train_wall=11, gb_free=2.8, wall=108741 2021-06-20 00:51:18 | INFO | train_inner | epoch 004: 489 / 3002 loss=2.465, ppl=5.52, wps=5911.1, ups=0.09, wpb=64802, bsz=128, num_updates=9436, lr=9.99325e-05, gnorm=2.628, loss_scale=1, train_wall=11, gb_free=2.8, wall=108752 2021-06-20 00:51:29 | INFO | train_inner | epoch 004: 490 / 3002 loss=2.597, ppl=6.05, wps=5843.9, ups=0.09, wpb=64847, bsz=128, num_updates=9437, lr=9.99325e-05, gnorm=2.042, loss_scale=1, train_wall=11, gb_free=2.8, wall=108763 2021-06-20 00:51:40 | INFO | train_inner | epoch 004: 491 / 3002 loss=2.581, ppl=5.98, wps=5914.9, ups=0.09, wpb=64837, bsz=128, num_updates=9438, lr=9.99325e-05, gnorm=2.652, loss_scale=1, train_wall=10, gb_free=2.8, wall=108774 2021-06-20 00:51:51 | INFO | train_inner | epoch 004: 492 / 3002 loss=2.628, ppl=6.18, wps=5792.4, ups=0.09, wpb=64842, bsz=128, num_updates=9439, lr=9.99325e-05, gnorm=2.129, loss_scale=1, train_wall=11, gb_free=2.8, wall=108785 2021-06-20 00:52:02 | INFO | train_inner | epoch 004: 493 / 3002 loss=2.531, ppl=5.78, wps=5740.1, ups=0.09, wpb=64809, bsz=128, num_updates=9440, lr=9.99325e-05, gnorm=2.08, loss_scale=1, train_wall=11, gb_free=2.8, wall=108797 2021-06-20 00:52:13 | INFO | train_inner | epoch 004: 494 / 3002 loss=2.607, ppl=6.09, wps=5859.9, ups=0.09, wpb=64869, bsz=128, num_updates=9441, lr=9.99325e-05, gnorm=2.317, loss_scale=1, train_wall=11, gb_free=2.8, wall=108808 2021-06-20 00:52:24 | INFO | train_inner | epoch 004: 495 / 3002 loss=2.55, ppl=5.86, wps=5916, ups=0.09, wpb=64890, bsz=128, num_updates=9442, lr=9.99325e-05, gnorm=2.223, loss_scale=1, train_wall=11, gb_free=2.8, wall=108819 2021-06-20 00:52:36 | INFO | train_inner | epoch 004: 496 / 3002 loss=2.623, ppl=6.16, wps=5776.4, ups=0.09, wpb=64811, bsz=128, num_updates=9443, lr=9.99325e-05, gnorm=2.219, loss_scale=1, train_wall=11, gb_free=2.8, wall=108830 2021-06-20 00:52:47 | INFO | train_inner | epoch 004: 497 / 3002 loss=2.709, ppl=6.54, wps=5878.8, ups=0.09, wpb=64814, bsz=128, num_updates=9444, lr=9.99324e-05, gnorm=2.212, loss_scale=1, train_wall=11, gb_free=2.8, wall=108841 2021-06-20 00:52:58 | INFO | train_inner | epoch 004: 498 / 3002 loss=2.621, ppl=6.15, wps=5829.4, ups=0.09, wpb=64828, bsz=128, num_updates=9445, lr=9.99324e-05, gnorm=2.178, loss_scale=1, train_wall=11, gb_free=2.8, wall=108852 2021-06-20 00:53:09 | INFO | train_inner | epoch 004: 499 / 3002 loss=2.684, ppl=6.43, wps=5880.4, ups=0.09, wpb=64836, bsz=128, num_updates=9446, lr=9.99324e-05, gnorm=2.232, loss_scale=1, train_wall=11, gb_free=2.8, wall=108863 2021-06-20 00:53:20 | INFO | train_inner | epoch 004: 500 / 3002 loss=2.641, ppl=6.24, wps=5850, ups=0.09, wpb=64809, bsz=128, num_updates=9447, lr=9.99324e-05, gnorm=2.764, loss_scale=1, train_wall=11, gb_free=2.8, wall=108874 2021-06-20 00:53:31 | INFO | train_inner | epoch 004: 501 / 3002 loss=2.654, ppl=6.29, wps=5855.3, ups=0.09, wpb=64826, bsz=128, num_updates=9448, lr=9.99324e-05, gnorm=2.214, loss_scale=1, train_wall=11, gb_free=2.8, wall=108885 2021-06-20 00:53:42 | INFO | train_inner | epoch 004: 502 / 3002 loss=2.61, ppl=6.1, wps=5848.4, ups=0.09, wpb=64765, bsz=128, num_updates=9449, lr=9.99324e-05, gnorm=2.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=108896 2021-06-20 00:53:53 | INFO | train_inner | epoch 004: 503 / 3002 loss=2.731, ppl=6.64, wps=5871.6, ups=0.09, wpb=64799, bsz=128, num_updates=9450, lr=9.99324e-05, gnorm=2.19, loss_scale=1, train_wall=11, gb_free=2.8, wall=108907 2021-06-20 00:54:04 | INFO | train_inner | epoch 004: 504 / 3002 loss=2.69, ppl=6.45, wps=5860.5, ups=0.09, wpb=64825, bsz=128, num_updates=9451, lr=9.99324e-05, gnorm=3.985, loss_scale=1, train_wall=11, gb_free=2.8, wall=108918 2021-06-20 00:54:15 | INFO | train_inner | epoch 004: 505 / 3002 loss=2.637, ppl=6.22, wps=5820.1, ups=0.09, wpb=64867, bsz=128, num_updates=9452, lr=9.99324e-05, gnorm=2.226, loss_scale=1, train_wall=11, gb_free=2.8, wall=108930 2021-06-20 00:54:26 | INFO | train_inner | epoch 004: 506 / 3002 loss=2.753, ppl=6.74, wps=5867, ups=0.09, wpb=64789, bsz=128, num_updates=9453, lr=9.99324e-05, gnorm=2.084, loss_scale=1, train_wall=11, gb_free=2.8, wall=108941 2021-06-20 00:54:37 | INFO | train_inner | epoch 004: 507 / 3002 loss=2.337, ppl=5.05, wps=5948.5, ups=0.09, wpb=64905, bsz=128, num_updates=9454, lr=9.99324e-05, gnorm=2.104, loss_scale=1, train_wall=10, gb_free=2.8, wall=108952 2021-06-20 00:54:48 | INFO | train_inner | epoch 004: 508 / 3002 loss=2.605, ppl=6.08, wps=5927.8, ups=0.09, wpb=64763, bsz=128, num_updates=9455, lr=9.99324e-05, gnorm=2.106, loss_scale=1, train_wall=10, gb_free=2.8, wall=108962 2021-06-20 00:54:59 | INFO | train_inner | epoch 004: 509 / 3002 loss=2.699, ppl=6.49, wps=5898.3, ups=0.09, wpb=64873, bsz=128, num_updates=9456, lr=9.99323e-05, gnorm=2.228, loss_scale=1, train_wall=11, gb_free=2.8, wall=108973 2021-06-20 00:55:10 | INFO | train_inner | epoch 004: 510 / 3002 loss=2.473, ppl=5.55, wps=5877.8, ups=0.09, wpb=64794, bsz=128, num_updates=9457, lr=9.99323e-05, gnorm=2.045, loss_scale=1, train_wall=11, gb_free=2.8, wall=108985 2021-06-20 00:55:21 | INFO | train_inner | epoch 004: 511 / 3002 loss=2.654, ppl=6.3, wps=5764.4, ups=0.09, wpb=64870, bsz=128, num_updates=9458, lr=9.99323e-05, gnorm=2.084, loss_scale=1, train_wall=11, gb_free=2.8, wall=108996 2021-06-20 00:55:33 | INFO | train_inner | epoch 004: 512 / 3002 loss=2.558, ppl=5.89, wps=5833.2, ups=0.09, wpb=64916, bsz=128, num_updates=9459, lr=9.99323e-05, gnorm=2.059, loss_scale=1, train_wall=11, gb_free=2.8, wall=109007 2021-06-20 00:55:44 | INFO | train_inner | epoch 004: 513 / 3002 loss=2.678, ppl=6.4, wps=5816.8, ups=0.09, wpb=64774, bsz=128, num_updates=9460, lr=9.99323e-05, gnorm=2.136, loss_scale=1, train_wall=11, gb_free=2.8, wall=109018 2021-06-20 00:55:55 | INFO | train_inner | epoch 004: 514 / 3002 loss=2.603, ppl=6.08, wps=5792.9, ups=0.09, wpb=64895, bsz=128, num_updates=9461, lr=9.99323e-05, gnorm=4.072, loss_scale=1, train_wall=11, gb_free=2.8, wall=109029 2021-06-20 00:56:06 | INFO | train_inner | epoch 004: 515 / 3002 loss=2.543, ppl=5.83, wps=5734.6, ups=0.09, wpb=64787, bsz=128, num_updates=9462, lr=9.99323e-05, gnorm=1.995, loss_scale=1, train_wall=11, gb_free=2.8, wall=109041 2021-06-20 00:56:17 | INFO | train_inner | epoch 004: 516 / 3002 loss=2.674, ppl=6.38, wps=5848.9, ups=0.09, wpb=64796, bsz=128, num_updates=9463, lr=9.99323e-05, gnorm=2.221, loss_scale=1, train_wall=11, gb_free=2.8, wall=109052 2021-06-20 00:56:28 | INFO | train_inner | epoch 004: 517 / 3002 loss=2.483, ppl=5.59, wps=5835.2, ups=0.09, wpb=64829, bsz=128, num_updates=9464, lr=9.99323e-05, gnorm=2.166, loss_scale=1, train_wall=11, gb_free=2.8, wall=109063 2021-06-20 00:56:40 | INFO | train_inner | epoch 004: 518 / 3002 loss=2.696, ppl=6.48, wps=5825.4, ups=0.09, wpb=64810, bsz=128, num_updates=9465, lr=9.99323e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=109074 2021-06-20 00:56:51 | INFO | train_inner | epoch 004: 519 / 3002 loss=2.496, ppl=5.64, wps=5722.3, ups=0.09, wpb=64876, bsz=128, num_updates=9466, lr=9.99323e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=109085 2021-06-20 00:57:02 | INFO | train_inner | epoch 004: 520 / 3002 loss=2.603, ppl=6.08, wps=5752.4, ups=0.09, wpb=64804, bsz=128, num_updates=9467, lr=9.99323e-05, gnorm=2.138, loss_scale=1, train_wall=11, gb_free=2.8, wall=109096 2021-06-20 00:57:13 | INFO | train_inner | epoch 004: 521 / 3002 loss=2.557, ppl=5.89, wps=5964.1, ups=0.09, wpb=64860, bsz=128, num_updates=9468, lr=9.99323e-05, gnorm=2.081, loss_scale=1, train_wall=10, gb_free=2.8, wall=109107 2021-06-20 00:57:24 | INFO | train_inner | epoch 004: 522 / 3002 loss=2.401, ppl=5.28, wps=5941.9, ups=0.09, wpb=64808, bsz=128, num_updates=9469, lr=9.99322e-05, gnorm=2.144, loss_scale=1, train_wall=10, gb_free=2.8, wall=109118 2021-06-20 00:57:35 | INFO | train_inner | epoch 004: 523 / 3002 loss=2.448, ppl=5.46, wps=5862.6, ups=0.09, wpb=64927, bsz=128, num_updates=9470, lr=9.99322e-05, gnorm=2.867, loss_scale=1, train_wall=11, gb_free=2.8, wall=109129 2021-06-20 00:57:46 | INFO | train_inner | epoch 004: 524 / 3002 loss=2.689, ppl=6.45, wps=5864.8, ups=0.09, wpb=64847, bsz=128, num_updates=9471, lr=9.99322e-05, gnorm=2.076, loss_scale=1, train_wall=11, gb_free=2.8, wall=109140 2021-06-20 00:57:57 | INFO | train_inner | epoch 004: 525 / 3002 loss=2.629, ppl=6.19, wps=5819.8, ups=0.09, wpb=64784, bsz=128, num_updates=9472, lr=9.99322e-05, gnorm=2.322, loss_scale=1, train_wall=11, gb_free=2.8, wall=109152 2021-06-20 00:58:08 | INFO | train_inner | epoch 004: 526 / 3002 loss=2.519, ppl=5.73, wps=5770.6, ups=0.09, wpb=64835, bsz=128, num_updates=9473, lr=9.99322e-05, gnorm=25.692, loss_scale=1, train_wall=11, gb_free=2.8, wall=109163 2021-06-20 00:58:19 | INFO | train_inner | epoch 004: 527 / 3002 loss=2.746, ppl=6.71, wps=5960.7, ups=0.09, wpb=64764, bsz=128, num_updates=9474, lr=9.99322e-05, gnorm=4.611, loss_scale=1, train_wall=10, gb_free=2.8, wall=109174 2021-06-20 00:58:30 | INFO | train_inner | epoch 004: 528 / 3002 loss=2.572, ppl=5.94, wps=5821.3, ups=0.09, wpb=64884, bsz=128, num_updates=9475, lr=9.99322e-05, gnorm=3.616, loss_scale=1, train_wall=11, gb_free=2.8, wall=109185 2021-06-20 00:58:42 | INFO | train_inner | epoch 004: 529 / 3002 loss=2.598, ppl=6.05, wps=5801.6, ups=0.09, wpb=64872, bsz=128, num_updates=9476, lr=9.99322e-05, gnorm=2.261, loss_scale=1, train_wall=11, gb_free=2.8, wall=109196 2021-06-20 00:58:53 | INFO | train_inner | epoch 004: 530 / 3002 loss=2.679, ppl=6.41, wps=5855.3, ups=0.09, wpb=64767, bsz=128, num_updates=9477, lr=9.99322e-05, gnorm=2.277, loss_scale=1, train_wall=11, gb_free=2.8, wall=109207 2021-06-20 00:59:04 | INFO | train_inner | epoch 004: 531 / 3002 loss=2.613, ppl=6.12, wps=5771.1, ups=0.09, wpb=64802, bsz=128, num_updates=9478, lr=9.99322e-05, gnorm=2.278, loss_scale=1, train_wall=11, gb_free=2.8, wall=109218 2021-06-20 00:59:15 | INFO | train_inner | epoch 004: 532 / 3002 loss=2.664, ppl=6.34, wps=5824.4, ups=0.09, wpb=64888, bsz=128, num_updates=9479, lr=9.99322e-05, gnorm=2.038, loss_scale=1, train_wall=11, gb_free=2.8, wall=109229 2021-06-20 00:59:26 | INFO | train_inner | epoch 004: 533 / 3002 loss=2.559, ppl=5.89, wps=5871, ups=0.09, wpb=64806, bsz=128, num_updates=9480, lr=9.99322e-05, gnorm=2.041, loss_scale=1, train_wall=11, gb_free=2.8, wall=109240 2021-06-20 00:59:37 | INFO | train_inner | epoch 004: 534 / 3002 loss=2.672, ppl=6.37, wps=5929.1, ups=0.09, wpb=64792, bsz=128, num_updates=9481, lr=9.99321e-05, gnorm=2.248, loss_scale=1, train_wall=10, gb_free=2.8, wall=109251 2021-06-20 00:59:48 | INFO | train_inner | epoch 004: 535 / 3002 loss=2.646, ppl=6.26, wps=5869.8, ups=0.09, wpb=64855, bsz=128, num_updates=9482, lr=9.99321e-05, gnorm=2.104, loss_scale=1, train_wall=11, gb_free=2.8, wall=109262 2021-06-20 00:59:59 | INFO | train_inner | epoch 004: 536 / 3002 loss=2.542, ppl=5.82, wps=5921.9, ups=0.09, wpb=64800, bsz=128, num_updates=9483, lr=9.99321e-05, gnorm=2.177, loss_scale=1, train_wall=10, gb_free=2.8, wall=109273 2021-06-20 01:00:10 | INFO | train_inner | epoch 004: 537 / 3002 loss=2.575, ppl=5.96, wps=5891.8, ups=0.09, wpb=64805, bsz=128, num_updates=9484, lr=9.99321e-05, gnorm=2.744, loss_scale=1, train_wall=11, gb_free=2.8, wall=109284 2021-06-20 01:00:21 | INFO | train_inner | epoch 004: 538 / 3002 loss=2.611, ppl=6.11, wps=5842.7, ups=0.09, wpb=64834, bsz=128, num_updates=9485, lr=9.99321e-05, gnorm=2.564, loss_scale=1, train_wall=11, gb_free=2.8, wall=109295 2021-06-20 01:00:32 | INFO | train_inner | epoch 004: 539 / 3002 loss=2.554, ppl=5.87, wps=5865, ups=0.09, wpb=64746, bsz=128, num_updates=9486, lr=9.99321e-05, gnorm=2.951, loss_scale=1, train_wall=11, gb_free=2.8, wall=109306 2021-06-20 01:00:43 | INFO | train_inner | epoch 004: 540 / 3002 loss=2.51, ppl=5.69, wps=5864.8, ups=0.09, wpb=64884, bsz=128, num_updates=9487, lr=9.99321e-05, gnorm=2.489, loss_scale=1, train_wall=11, gb_free=2.8, wall=109318 2021-06-20 01:00:54 | INFO | train_inner | epoch 004: 541 / 3002 loss=2.668, ppl=6.36, wps=5951.5, ups=0.09, wpb=64888, bsz=128, num_updates=9488, lr=9.99321e-05, gnorm=2.395, loss_scale=1, train_wall=10, gb_free=2.8, wall=109328 2021-06-20 01:01:05 | INFO | train_inner | epoch 004: 542 / 3002 loss=2.621, ppl=6.15, wps=5915.1, ups=0.09, wpb=64865, bsz=128, num_updates=9489, lr=9.99321e-05, gnorm=2.049, loss_scale=1, train_wall=11, gb_free=2.8, wall=109339 2021-06-20 01:01:16 | INFO | train_inner | epoch 004: 543 / 3002 loss=2.549, ppl=5.85, wps=5850.2, ups=0.09, wpb=64805, bsz=128, num_updates=9490, lr=9.99321e-05, gnorm=2.11, loss_scale=1, train_wall=11, gb_free=2.8, wall=109350 2021-06-20 01:01:27 | INFO | train_inner | epoch 004: 544 / 3002 loss=2.632, ppl=6.2, wps=5814.5, ups=0.09, wpb=64783, bsz=128, num_updates=9491, lr=9.99321e-05, gnorm=2.348, loss_scale=1, train_wall=11, gb_free=2.8, wall=109362 2021-06-20 01:01:38 | INFO | train_inner | epoch 004: 545 / 3002 loss=2.697, ppl=6.49, wps=5881.4, ups=0.09, wpb=64831, bsz=128, num_updates=9492, lr=9.99321e-05, gnorm=2.625, loss_scale=1, train_wall=11, gb_free=2.8, wall=109373 2021-06-20 01:01:49 | INFO | train_inner | epoch 004: 546 / 3002 loss=2.533, ppl=5.79, wps=5915.6, ups=0.09, wpb=64899, bsz=128, num_updates=9493, lr=9.99321e-05, gnorm=2.371, loss_scale=1, train_wall=11, gb_free=2.8, wall=109384 2021-06-20 01:02:00 | INFO | train_inner | epoch 004: 547 / 3002 loss=2.475, ppl=5.56, wps=5833.2, ups=0.09, wpb=64886, bsz=128, num_updates=9494, lr=9.9932e-05, gnorm=2.185, loss_scale=1, train_wall=11, gb_free=2.8, wall=109395 2021-06-20 01:02:11 | INFO | train_inner | epoch 004: 548 / 3002 loss=2.591, ppl=6.03, wps=5848.7, ups=0.09, wpb=64861, bsz=128, num_updates=9495, lr=9.9932e-05, gnorm=2.741, loss_scale=1, train_wall=11, gb_free=2.8, wall=109406 2021-06-20 01:02:23 | INFO | train_inner | epoch 004: 549 / 3002 loss=2.666, ppl=6.35, wps=5846.4, ups=0.09, wpb=64863, bsz=128, num_updates=9496, lr=9.9932e-05, gnorm=2.097, loss_scale=1, train_wall=11, gb_free=2.8, wall=109417 2021-06-20 01:02:34 | INFO | train_inner | epoch 004: 550 / 3002 loss=2.586, ppl=6, wps=5868.5, ups=0.09, wpb=64891, bsz=128, num_updates=9497, lr=9.9932e-05, gnorm=2.447, loss_scale=1, train_wall=11, gb_free=2.8, wall=109428 2021-06-20 01:02:45 | INFO | train_inner | epoch 004: 551 / 3002 loss=2.529, ppl=5.77, wps=5786.6, ups=0.09, wpb=64758, bsz=128, num_updates=9498, lr=9.9932e-05, gnorm=2.175, loss_scale=1, train_wall=11, gb_free=2.8, wall=109439 2021-06-20 01:02:56 | INFO | train_inner | epoch 004: 552 / 3002 loss=2.525, ppl=5.75, wps=5931.1, ups=0.09, wpb=64766, bsz=128, num_updates=9499, lr=9.9932e-05, gnorm=2.228, loss_scale=1, train_wall=10, gb_free=2.8, wall=109450 2021-06-20 01:03:07 | INFO | train_inner | epoch 004: 553 / 3002 loss=2.479, ppl=5.57, wps=5815.1, ups=0.09, wpb=64828, bsz=128, num_updates=9500, lr=9.9932e-05, gnorm=2.169, loss_scale=1, train_wall=11, gb_free=2.8, wall=109461 2021-06-20 01:03:18 | INFO | train_inner | epoch 004: 554 / 3002 loss=2.63, ppl=6.19, wps=5874.3, ups=0.09, wpb=64848, bsz=128, num_updates=9501, lr=9.9932e-05, gnorm=2.349, loss_scale=1, train_wall=11, gb_free=2.8, wall=109472 2021-06-20 01:03:29 | INFO | train_inner | epoch 004: 555 / 3002 loss=2.452, ppl=5.47, wps=5754, ups=0.09, wpb=64805, bsz=128, num_updates=9502, lr=9.9932e-05, gnorm=3.079, loss_scale=1, train_wall=11, gb_free=2.8, wall=109484 2021-06-20 01:03:40 | INFO | train_inner | epoch 004: 556 / 3002 loss=2.559, ppl=5.89, wps=5853, ups=0.09, wpb=64772, bsz=128, num_updates=9503, lr=9.9932e-05, gnorm=2.155, loss_scale=1, train_wall=11, gb_free=2.8, wall=109495 2021-06-20 01:03:52 | INFO | train_inner | epoch 004: 557 / 3002 loss=2.582, ppl=5.99, wps=5748.7, ups=0.09, wpb=64803, bsz=128, num_updates=9504, lr=9.9932e-05, gnorm=2.987, loss_scale=1, train_wall=11, gb_free=2.8, wall=109506 2021-06-20 01:04:03 | INFO | train_inner | epoch 004: 558 / 3002 loss=2.642, ppl=6.24, wps=5901.7, ups=0.09, wpb=64849, bsz=128, num_updates=9505, lr=9.9932e-05, gnorm=6.128, loss_scale=1, train_wall=11, gb_free=2.8, wall=109517 2021-06-20 01:04:14 | INFO | train_inner | epoch 004: 559 / 3002 loss=2.656, ppl=6.3, wps=5792.9, ups=0.09, wpb=64857, bsz=128, num_updates=9506, lr=9.99319e-05, gnorm=2.51, loss_scale=1, train_wall=11, gb_free=2.8, wall=109528 2021-06-20 01:04:25 | INFO | train_inner | epoch 004: 560 / 3002 loss=2.597, ppl=6.05, wps=5715.5, ups=0.09, wpb=64800, bsz=128, num_updates=9507, lr=9.99319e-05, gnorm=2.158, loss_scale=1, train_wall=11, gb_free=2.8, wall=109539 2021-06-20 01:04:36 | INFO | train_inner | epoch 004: 561 / 3002 loss=2.473, ppl=5.55, wps=5913.1, ups=0.09, wpb=64913, bsz=128, num_updates=9508, lr=9.99319e-05, gnorm=2.16, loss_scale=1, train_wall=11, gb_free=2.8, wall=109550 2021-06-20 01:04:47 | INFO | train_inner | epoch 004: 562 / 3002 loss=2.599, ppl=6.06, wps=5945.3, ups=0.09, wpb=64891, bsz=128, num_updates=9509, lr=9.99319e-05, gnorm=2.155, loss_scale=1, train_wall=10, gb_free=2.8, wall=109561 2021-06-20 01:04:58 | INFO | train_inner | epoch 004: 563 / 3002 loss=2.671, ppl=6.37, wps=5797.3, ups=0.09, wpb=64884, bsz=128, num_updates=9510, lr=9.99319e-05, gnorm=2.105, loss_scale=1, train_wall=11, gb_free=2.8, wall=109572 2021-06-20 01:05:09 | INFO | train_inner | epoch 004: 564 / 3002 loss=2.613, ppl=6.12, wps=5847.3, ups=0.09, wpb=64772, bsz=128, num_updates=9511, lr=9.99319e-05, gnorm=2.314, loss_scale=1, train_wall=11, gb_free=2.8, wall=109584 2021-06-20 01:05:20 | INFO | train_inner | epoch 004: 565 / 3002 loss=2.698, ppl=6.49, wps=5878.7, ups=0.09, wpb=64824, bsz=128, num_updates=9512, lr=9.99319e-05, gnorm=2.211, loss_scale=1, train_wall=11, gb_free=2.8, wall=109595 2021-06-20 01:05:31 | INFO | train_inner | epoch 004: 566 / 3002 loss=2.461, ppl=5.51, wps=5810, ups=0.09, wpb=64868, bsz=128, num_updates=9513, lr=9.99319e-05, gnorm=3.937, loss_scale=1, train_wall=11, gb_free=2.8, wall=109606 2021-06-20 01:05:43 | INFO | train_inner | epoch 004: 567 / 3002 loss=2.607, ppl=6.09, wps=5796.7, ups=0.09, wpb=64802, bsz=128, num_updates=9514, lr=9.99319e-05, gnorm=2.937, loss_scale=1, train_wall=11, gb_free=2.8, wall=109617 2021-06-20 01:05:53 | INFO | train_inner | epoch 004: 568 / 3002 loss=2.581, ppl=5.98, wps=5947.4, ups=0.09, wpb=64839, bsz=128, num_updates=9515, lr=9.99319e-05, gnorm=2.191, loss_scale=1, train_wall=10, gb_free=2.8, wall=109628 2021-06-20 01:06:05 | INFO | train_inner | epoch 004: 569 / 3002 loss=2.612, ppl=6.11, wps=5773.6, ups=0.09, wpb=64766, bsz=128, num_updates=9516, lr=9.99319e-05, gnorm=2.248, loss_scale=1, train_wall=11, gb_free=2.8, wall=109639 2021-06-20 01:06:16 | INFO | train_inner | epoch 004: 570 / 3002 loss=2.708, ppl=6.54, wps=5881, ups=0.09, wpb=64950, bsz=128, num_updates=9517, lr=9.99319e-05, gnorm=2.136, loss_scale=1, train_wall=11, gb_free=2.8, wall=109650 2021-06-20 01:06:27 | INFO | train_inner | epoch 004: 571 / 3002 loss=2.538, ppl=5.81, wps=5922.4, ups=0.09, wpb=64902, bsz=128, num_updates=9518, lr=9.99319e-05, gnorm=2.244, loss_scale=1, train_wall=11, gb_free=2.8, wall=109661 2021-06-20 01:06:38 | INFO | train_inner | epoch 004: 572 / 3002 loss=2.518, ppl=5.73, wps=5862.2, ups=0.09, wpb=64932, bsz=128, num_updates=9519, lr=9.99318e-05, gnorm=2.486, loss_scale=1, train_wall=11, gb_free=2.8, wall=109672 2021-06-20 01:06:49 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 2021-06-20 01:07:00 | INFO | train_inner | epoch 004: 574 / 3002 loss=2.528, ppl=5.77, wps=2901.3, ups=0.04, wpb=64786, bsz=128, num_updates=9520, lr=9.99318e-05, gnorm=2.404, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=109694 2021-06-20 01:07:11 | INFO | train_inner | epoch 004: 575 / 3002 loss=2.584, ppl=6, wps=5923.9, ups=0.09, wpb=64902, bsz=128, num_updates=9521, lr=9.99318e-05, gnorm=2.253, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109705 2021-06-20 01:07:22 | INFO | train_inner | epoch 004: 576 / 3002 loss=2.693, ppl=6.47, wps=6041.5, ups=0.09, wpb=64812, bsz=128, num_updates=9522, lr=9.99318e-05, gnorm=2.223, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109716 2021-06-20 01:07:33 | INFO | train_inner | epoch 004: 577 / 3002 loss=2.61, ppl=6.11, wps=5842.1, ups=0.09, wpb=64818, bsz=128, num_updates=9523, lr=9.99318e-05, gnorm=2.299, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109727 2021-06-20 01:07:44 | INFO | train_inner | epoch 004: 578 / 3002 loss=2.471, ppl=5.55, wps=5806.1, ups=0.09, wpb=64842, bsz=128, num_updates=9524, lr=9.99318e-05, gnorm=18.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109738 2021-06-20 01:07:55 | INFO | train_inner | epoch 004: 579 / 3002 loss=2.508, ppl=5.69, wps=5963.8, ups=0.09, wpb=64832, bsz=128, num_updates=9525, lr=9.99318e-05, gnorm=2.172, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109749 2021-06-20 01:08:06 | INFO | train_inner | epoch 004: 580 / 3002 loss=2.622, ppl=6.16, wps=5847.1, ups=0.09, wpb=64884, bsz=128, num_updates=9526, lr=9.99318e-05, gnorm=2.046, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109760 2021-06-20 01:08:17 | INFO | train_inner | epoch 004: 581 / 3002 loss=2.487, ppl=5.6, wps=5797.9, ups=0.09, wpb=64848, bsz=128, num_updates=9527, lr=9.99318e-05, gnorm=2.317, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109772 2021-06-20 01:08:28 | INFO | train_inner | epoch 004: 582 / 3002 loss=2.526, ppl=5.76, wps=5848.9, ups=0.09, wpb=64783, bsz=128, num_updates=9528, lr=9.99318e-05, gnorm=2.064, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109783 2021-06-20 01:08:39 | INFO | train_inner | epoch 004: 583 / 3002 loss=2.596, ppl=6.04, wps=5887.7, ups=0.09, wpb=64784, bsz=128, num_updates=9529, lr=9.99318e-05, gnorm=2.175, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109794 2021-06-20 01:08:50 | INFO | train_inner | epoch 004: 584 / 3002 loss=2.566, ppl=5.92, wps=5853.1, ups=0.09, wpb=64817, bsz=128, num_updates=9530, lr=9.99318e-05, gnorm=5.772, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109805 2021-06-20 01:09:01 | INFO | train_inner | epoch 004: 585 / 3002 loss=2.597, ppl=6.05, wps=5841.1, ups=0.09, wpb=64829, bsz=128, num_updates=9531, lr=9.99317e-05, gnorm=2.134, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109816 2021-06-20 01:09:13 | INFO | train_inner | epoch 004: 586 / 3002 loss=2.501, ppl=5.66, wps=5847.1, ups=0.09, wpb=64793, bsz=128, num_updates=9532, lr=9.99317e-05, gnorm=2.101, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109827 2021-06-20 01:09:24 | INFO | train_inner | epoch 004: 587 / 3002 loss=2.539, ppl=5.81, wps=5855.5, ups=0.09, wpb=64870, bsz=128, num_updates=9533, lr=9.99317e-05, gnorm=2.098, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109838 2021-06-20 01:09:35 | INFO | train_inner | epoch 004: 588 / 3002 loss=2.549, ppl=5.85, wps=5779.9, ups=0.09, wpb=64838, bsz=128, num_updates=9534, lr=9.99317e-05, gnorm=2.146, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109849 2021-06-20 01:09:46 | INFO | train_inner | epoch 004: 589 / 3002 loss=2.638, ppl=6.23, wps=5856.2, ups=0.09, wpb=64866, bsz=128, num_updates=9535, lr=9.99317e-05, gnorm=2.124, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109860 2021-06-20 01:09:57 | INFO | train_inner | epoch 004: 590 / 3002 loss=2.739, ppl=6.68, wps=5940, ups=0.09, wpb=64813, bsz=128, num_updates=9536, lr=9.99317e-05, gnorm=2.199, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109871 2021-06-20 01:10:08 | INFO | train_inner | epoch 004: 591 / 3002 loss=2.594, ppl=6.04, wps=5958.9, ups=0.09, wpb=64772, bsz=128, num_updates=9537, lr=9.99317e-05, gnorm=3.385, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109882 2021-06-20 01:10:19 | INFO | train_inner | epoch 004: 592 / 3002 loss=2.424, ppl=5.37, wps=5904.7, ups=0.09, wpb=64911, bsz=128, num_updates=9538, lr=9.99317e-05, gnorm=2.98, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109893 2021-06-20 01:10:30 | INFO | train_inner | epoch 004: 593 / 3002 loss=2.622, ppl=6.16, wps=5808.2, ups=0.09, wpb=64852, bsz=128, num_updates=9539, lr=9.99317e-05, gnorm=2.562, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109904 2021-06-20 01:10:41 | INFO | train_inner | epoch 004: 594 / 3002 loss=2.622, ppl=6.16, wps=5929.5, ups=0.09, wpb=64816, bsz=128, num_updates=9540, lr=9.99317e-05, gnorm=2.122, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109915 2021-06-20 01:10:52 | INFO | train_inner | epoch 004: 595 / 3002 loss=2.613, ppl=6.12, wps=5779.7, ups=0.09, wpb=64860, bsz=128, num_updates=9541, lr=9.99317e-05, gnorm=1.988, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109926 2021-06-20 01:11:03 | INFO | train_inner | epoch 004: 596 / 3002 loss=2.535, ppl=5.8, wps=5853, ups=0.09, wpb=64844, bsz=128, num_updates=9542, lr=9.99317e-05, gnorm=2.109, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109937 2021-06-20 01:11:14 | INFO | train_inner | epoch 004: 597 / 3002 loss=2.731, ppl=6.64, wps=5870.4, ups=0.09, wpb=64756, bsz=128, num_updates=9543, lr=9.99317e-05, gnorm=2.137, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109948 2021-06-20 01:11:25 | INFO | train_inner | epoch 004: 598 / 3002 loss=2.571, ppl=5.94, wps=5859, ups=0.09, wpb=64816, bsz=128, num_updates=9544, lr=9.99316e-05, gnorm=2.162, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109960 2021-06-20 01:11:36 | INFO | train_inner | epoch 004: 599 / 3002 loss=2.669, ppl=6.36, wps=5789.7, ups=0.09, wpb=64833, bsz=128, num_updates=9545, lr=9.99316e-05, gnorm=3.397, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109971 2021-06-20 01:11:47 | INFO | train_inner | epoch 004: 600 / 3002 loss=2.521, ppl=5.74, wps=5923, ups=0.09, wpb=64868, bsz=128, num_updates=9546, lr=9.99316e-05, gnorm=2.169, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109982 2021-06-20 01:11:58 | INFO | train_inner | epoch 004: 601 / 3002 loss=2.754, ppl=6.75, wps=5955.5, ups=0.09, wpb=64871, bsz=128, num_updates=9547, lr=9.99316e-05, gnorm=2.444, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109993 2021-06-20 01:12:09 | INFO | train_inner | epoch 004: 602 / 3002 loss=2.483, ppl=5.59, wps=5931.4, ups=0.09, wpb=64882, bsz=128, num_updates=9548, lr=9.99316e-05, gnorm=2.037, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110004 2021-06-20 01:12:20 | INFO | train_inner | epoch 004: 603 / 3002 loss=2.542, ppl=5.82, wps=5786.9, ups=0.09, wpb=64872, bsz=128, num_updates=9549, lr=9.99316e-05, gnorm=2.102, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110015 2021-06-20 01:12:31 | INFO | train_inner | epoch 004: 604 / 3002 loss=2.469, ppl=5.53, wps=5917.8, ups=0.09, wpb=64786, bsz=128, num_updates=9550, lr=9.99316e-05, gnorm=2.115, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110026 2021-06-20 01:12:42 | INFO | train_inner | epoch 004: 605 / 3002 loss=2.703, ppl=6.51, wps=5912.9, ups=0.09, wpb=64756, bsz=128, num_updates=9551, lr=9.99316e-05, gnorm=2.169, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110037 2021-06-20 01:12:53 | INFO | train_inner | epoch 004: 606 / 3002 loss=2.495, ppl=5.64, wps=5893.4, ups=0.09, wpb=64855, bsz=128, num_updates=9552, lr=9.99316e-05, gnorm=2.088, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110048 2021-06-20 01:13:04 | INFO | train_inner | epoch 004: 607 / 3002 loss=2.679, ppl=6.4, wps=6003.4, ups=0.09, wpb=64830, bsz=128, num_updates=9553, lr=9.99316e-05, gnorm=2.101, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110058 2021-06-20 01:13:15 | INFO | train_inner | epoch 004: 608 / 3002 loss=2.542, ppl=5.82, wps=5911.7, ups=0.09, wpb=64872, bsz=128, num_updates=9554, lr=9.99316e-05, gnorm=2.535, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110069 2021-06-20 01:13:26 | INFO | train_inner | epoch 004: 609 / 3002 loss=2.586, ppl=6, wps=5927.5, ups=0.09, wpb=64836, bsz=128, num_updates=9555, lr=9.99316e-05, gnorm=2.646, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110080 2021-06-20 01:13:37 | INFO | train_inner | epoch 004: 610 / 3002 loss=2.566, ppl=5.92, wps=5766.8, ups=0.09, wpb=64859, bsz=128, num_updates=9556, lr=9.99315e-05, gnorm=2.171, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110092 2021-06-20 01:13:49 | INFO | train_inner | epoch 004: 611 / 3002 loss=2.562, ppl=5.91, wps=5727.8, ups=0.09, wpb=64848, bsz=128, num_updates=9557, lr=9.99315e-05, gnorm=4.397, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110103 2021-06-20 01:14:00 | INFO | train_inner | epoch 004: 612 / 3002 loss=2.695, ppl=6.47, wps=5800.3, ups=0.09, wpb=64752, bsz=128, num_updates=9558, lr=9.99315e-05, gnorm=2.121, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110114 2021-06-20 01:14:11 | INFO | train_inner | epoch 004: 613 / 3002 loss=2.72, ppl=6.59, wps=5892.7, ups=0.09, wpb=64829, bsz=128, num_updates=9559, lr=9.99315e-05, gnorm=2.205, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110125 2021-06-20 01:14:22 | INFO | train_inner | epoch 004: 614 / 3002 loss=2.659, ppl=6.32, wps=5813.2, ups=0.09, wpb=64678, bsz=128, num_updates=9560, lr=9.99315e-05, gnorm=2.408, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110136 2021-06-20 01:14:33 | INFO | train_inner | epoch 004: 615 / 3002 loss=2.679, ppl=6.41, wps=5934, ups=0.09, wpb=64913, bsz=128, num_updates=9561, lr=9.99315e-05, gnorm=2.098, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110147 2021-06-20 01:14:44 | INFO | train_inner | epoch 004: 616 / 3002 loss=2.542, ppl=5.82, wps=5852.1, ups=0.09, wpb=64930, bsz=128, num_updates=9562, lr=9.99315e-05, gnorm=2.323, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110158 2021-06-20 01:14:55 | INFO | train_inner | epoch 004: 617 / 3002 loss=2.529, ppl=5.77, wps=5825.1, ups=0.09, wpb=64810, bsz=128, num_updates=9563, lr=9.99315e-05, gnorm=2.188, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110169 2021-06-20 01:15:06 | INFO | train_inner | epoch 004: 618 / 3002 loss=2.602, ppl=6.07, wps=5958.6, ups=0.09, wpb=64908, bsz=128, num_updates=9564, lr=9.99315e-05, gnorm=2.055, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110180 2021-06-20 01:15:17 | INFO | train_inner | epoch 004: 619 / 3002 loss=2.632, ppl=6.2, wps=5852.4, ups=0.09, wpb=64858, bsz=128, num_updates=9565, lr=9.99315e-05, gnorm=2.244, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110191 2021-06-20 01:15:28 | INFO | train_inner | epoch 004: 620 / 3002 loss=2.582, ppl=5.99, wps=5966.5, ups=0.09, wpb=64810, bsz=128, num_updates=9566, lr=9.99315e-05, gnorm=2.112, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110202 2021-06-20 01:15:39 | INFO | train_inner | epoch 004: 621 / 3002 loss=2.692, ppl=6.46, wps=6001.8, ups=0.09, wpb=64864, bsz=128, num_updates=9567, lr=9.99315e-05, gnorm=2.818, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110213 2021-06-20 01:15:50 | INFO | train_inner | epoch 004: 622 / 3002 loss=2.435, ppl=5.41, wps=5886.3, ups=0.09, wpb=64766, bsz=128, num_updates=9568, lr=9.99315e-05, gnorm=2.068, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110224 2021-06-20 01:16:01 | INFO | train_inner | epoch 004: 623 / 3002 loss=2.556, ppl=5.88, wps=5814.8, ups=0.09, wpb=64819, bsz=128, num_updates=9569, lr=9.99314e-05, gnorm=2.047, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110235 2021-06-20 01:16:12 | INFO | train_inner | epoch 004: 624 / 3002 loss=2.601, ppl=6.07, wps=5862.3, ups=0.09, wpb=64815, bsz=128, num_updates=9570, lr=9.99314e-05, gnorm=2.151, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110246 2021-06-20 01:16:23 | INFO | train_inner | epoch 004: 625 / 3002 loss=2.52, ppl=5.73, wps=5832.5, ups=0.09, wpb=64910, bsz=128, num_updates=9571, lr=9.99314e-05, gnorm=2.333, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110257 2021-06-20 01:16:34 | INFO | train_inner | epoch 004: 626 / 3002 loss=2.666, ppl=6.35, wps=6008.6, ups=0.09, wpb=64796, bsz=128, num_updates=9572, lr=9.99314e-05, gnorm=2.249, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110268 2021-06-20 01:16:45 | INFO | train_inner | epoch 004: 627 / 3002 loss=2.605, ppl=6.08, wps=5786.4, ups=0.09, wpb=64840, bsz=128, num_updates=9573, lr=9.99314e-05, gnorm=2.084, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110279 2021-06-20 01:16:56 | INFO | train_inner | epoch 004: 628 / 3002 loss=2.575, ppl=5.96, wps=5917.8, ups=0.09, wpb=64909, bsz=128, num_updates=9574, lr=9.99314e-05, gnorm=2.266, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110290 2021-06-20 01:17:07 | INFO | train_inner | epoch 004: 629 / 3002 loss=2.724, ppl=6.61, wps=5881.9, ups=0.09, wpb=64811, bsz=128, num_updates=9575, lr=9.99314e-05, gnorm=2.3, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110301 2021-06-20 01:17:18 | INFO | train_inner | epoch 004: 630 / 3002 loss=2.591, ppl=6.02, wps=5836.7, ups=0.09, wpb=64822, bsz=128, num_updates=9576, lr=9.99314e-05, gnorm=2.139, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110312 2021-06-20 01:17:29 | INFO | train_inner | epoch 004: 631 / 3002 loss=2.679, ppl=6.41, wps=5825.4, ups=0.09, wpb=64791, bsz=128, num_updates=9577, lr=9.99314e-05, gnorm=2.134, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110324 2021-06-20 01:17:40 | INFO | train_inner | epoch 004: 632 / 3002 loss=2.578, ppl=5.97, wps=5847.8, ups=0.09, wpb=64840, bsz=128, num_updates=9578, lr=9.99314e-05, gnorm=2.121, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110335 2021-06-20 01:17:51 | INFO | train_inner | epoch 004: 633 / 3002 loss=2.62, ppl=6.15, wps=5862.4, ups=0.09, wpb=64822, bsz=128, num_updates=9579, lr=9.99314e-05, gnorm=2.467, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110346 2021-06-20 01:18:02 | INFO | train_inner | epoch 004: 634 / 3002 loss=2.499, ppl=5.65, wps=5903.8, ups=0.09, wpb=64826, bsz=128, num_updates=9580, lr=9.99314e-05, gnorm=2.174, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110357 2021-06-20 01:18:13 | INFO | train_inner | epoch 004: 635 / 3002 loss=2.656, ppl=6.3, wps=5876.4, ups=0.09, wpb=64847, bsz=128, num_updates=9581, lr=9.99313e-05, gnorm=2.178, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110368 2021-06-20 01:18:24 | INFO | train_inner | epoch 004: 636 / 3002 loss=2.521, ppl=5.74, wps=5906.6, ups=0.09, wpb=64854, bsz=128, num_updates=9582, lr=9.99313e-05, gnorm=2.176, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110379 2021-06-20 01:18:35 | INFO | train_inner | epoch 004: 637 / 3002 loss=2.55, ppl=5.86, wps=6029.4, ups=0.09, wpb=64851, bsz=128, num_updates=9583, lr=9.99313e-05, gnorm=2.337, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110389 2021-06-20 01:18:46 | INFO | train_inner | epoch 004: 638 / 3002 loss=2.767, ppl=6.81, wps=5820.7, ups=0.09, wpb=64791, bsz=128, num_updates=9584, lr=9.99313e-05, gnorm=2.477, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110401 2021-06-20 01:18:57 | INFO | train_inner | epoch 004: 639 / 3002 loss=2.625, ppl=6.17, wps=5942.6, ups=0.09, wpb=64872, bsz=128, num_updates=9585, lr=9.99313e-05, gnorm=2.108, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110412 2021-06-20 01:19:08 | INFO | train_inner | epoch 004: 640 / 3002 loss=2.613, ppl=6.12, wps=5972.1, ups=0.09, wpb=64840, bsz=128, num_updates=9586, lr=9.99313e-05, gnorm=2.15, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110422 2021-06-20 01:19:19 | INFO | train_inner | epoch 004: 641 / 3002 loss=2.528, ppl=5.77, wps=5958.4, ups=0.09, wpb=64781, bsz=128, num_updates=9587, lr=9.99313e-05, gnorm=2.773, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110433 2021-06-20 01:19:30 | INFO | train_inner | epoch 004: 642 / 3002 loss=2.68, ppl=6.41, wps=5792.3, ups=0.09, wpb=64735, bsz=128, num_updates=9588, lr=9.99313e-05, gnorm=2.065, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110444 2021-06-20 01:19:41 | INFO | train_inner | epoch 004: 643 / 3002 loss=2.355, ppl=5.11, wps=5749, ups=0.09, wpb=64846, bsz=128, num_updates=9589, lr=9.99313e-05, gnorm=2.204, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110456 2021-06-20 01:19:52 | INFO | train_inner | epoch 004: 644 / 3002 loss=2.567, ppl=5.92, wps=5864.7, ups=0.09, wpb=64844, bsz=128, num_updates=9590, lr=9.99313e-05, gnorm=4.867, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110467 2021-06-20 01:20:04 | INFO | train_inner | epoch 004: 645 / 3002 loss=2.47, ppl=5.54, wps=5750.9, ups=0.09, wpb=64779, bsz=128, num_updates=9591, lr=9.99313e-05, gnorm=2.108, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110478 2021-06-20 01:20:15 | INFO | train_inner | epoch 004: 646 / 3002 loss=2.595, ppl=6.04, wps=5845.1, ups=0.09, wpb=64860, bsz=128, num_updates=9592, lr=9.99313e-05, gnorm=2.19, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110489 2021-06-20 01:20:26 | INFO | train_inner | epoch 004: 647 / 3002 loss=2.567, ppl=5.93, wps=5925.5, ups=0.09, wpb=64917, bsz=128, num_updates=9593, lr=9.99313e-05, gnorm=2.133, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110500 2021-06-20 01:20:37 | INFO | train_inner | epoch 004: 648 / 3002 loss=2.551, ppl=5.86, wps=5890.3, ups=0.09, wpb=64822, bsz=128, num_updates=9594, lr=9.99312e-05, gnorm=2.105, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110511 2021-06-20 01:20:48 | INFO | train_inner | epoch 004: 649 / 3002 loss=2.646, ppl=6.26, wps=5974.8, ups=0.09, wpb=64865, bsz=128, num_updates=9595, lr=9.99312e-05, gnorm=2.161, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110522 2021-06-20 01:20:59 | INFO | train_inner | epoch 004: 650 / 3002 loss=2.696, ppl=6.48, wps=5896.7, ups=0.09, wpb=64828, bsz=128, num_updates=9596, lr=9.99312e-05, gnorm=2.09, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110533 2021-06-20 01:21:10 | INFO | train_inner | epoch 004: 651 / 3002 loss=2.526, ppl=5.76, wps=5811.7, ups=0.09, wpb=64842, bsz=128, num_updates=9597, lr=9.99312e-05, gnorm=2.02, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110544 2021-06-20 01:21:21 | INFO | train_inner | epoch 004: 652 / 3002 loss=2.485, ppl=5.6, wps=5734.2, ups=0.09, wpb=64834, bsz=128, num_updates=9598, lr=9.99312e-05, gnorm=2.307, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110555 2021-06-20 01:21:32 | INFO | train_inner | epoch 004: 653 / 3002 loss=2.55, ppl=5.86, wps=5833.1, ups=0.09, wpb=64848, bsz=128, num_updates=9599, lr=9.99312e-05, gnorm=2.556, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110567 2021-06-20 01:21:43 | INFO | train_inner | epoch 004: 654 / 3002 loss=2.511, ppl=5.7, wps=5826.2, ups=0.09, wpb=64873, bsz=128, num_updates=9600, lr=9.99312e-05, gnorm=4.799, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110578 2021-06-20 01:21:54 | INFO | train_inner | epoch 004: 655 / 3002 loss=2.668, ppl=6.36, wps=5844.4, ups=0.09, wpb=64796, bsz=128, num_updates=9601, lr=9.99312e-05, gnorm=2.185, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110589 2021-06-20 01:22:06 | INFO | train_inner | epoch 004: 656 / 3002 loss=2.604, ppl=6.08, wps=5745.8, ups=0.09, wpb=64748, bsz=128, num_updates=9602, lr=9.99312e-05, gnorm=2.52, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110600 2021-06-20 01:22:17 | INFO | train_inner | epoch 004: 657 / 3002 loss=2.579, ppl=5.98, wps=5890.5, ups=0.09, wpb=64846, bsz=128, num_updates=9603, lr=9.99312e-05, gnorm=2.325, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110611 2021-06-20 01:22:28 | INFO | train_inner | epoch 004: 658 / 3002 loss=2.571, ppl=5.94, wps=5858.2, ups=0.09, wpb=64859, bsz=128, num_updates=9604, lr=9.99312e-05, gnorm=2.068, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110622 2021-06-20 01:22:39 | INFO | train_inner | epoch 004: 659 / 3002 loss=2.647, ppl=6.26, wps=5795.4, ups=0.09, wpb=64771, bsz=128, num_updates=9605, lr=9.99312e-05, gnorm=2.039, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110633 2021-06-20 01:22:50 | INFO | train_inner | epoch 004: 660 / 3002 loss=2.483, ppl=5.59, wps=5832.9, ups=0.09, wpb=64730, bsz=128, num_updates=9606, lr=9.99311e-05, gnorm=2.67, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110644 2021-06-20 01:23:01 | INFO | train_inner | epoch 004: 661 / 3002 loss=2.687, ppl=6.44, wps=6034.4, ups=0.09, wpb=64834, bsz=128, num_updates=9607, lr=9.99311e-05, gnorm=3.469, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110655 2021-06-20 01:23:12 | INFO | train_inner | epoch 004: 662 / 3002 loss=2.566, ppl=5.92, wps=5851.6, ups=0.09, wpb=64785, bsz=128, num_updates=9608, lr=9.99311e-05, gnorm=2.219, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110666 2021-06-20 01:23:23 | INFO | train_inner | epoch 004: 663 / 3002 loss=2.428, ppl=5.38, wps=5896.7, ups=0.09, wpb=64835, bsz=128, num_updates=9609, lr=9.99311e-05, gnorm=2.333, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110677 2021-06-20 01:23:34 | INFO | train_inner | epoch 004: 664 / 3002 loss=2.597, ppl=6.05, wps=5800.6, ups=0.09, wpb=64845, bsz=128, num_updates=9610, lr=9.99311e-05, gnorm=4.445, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110688 2021-06-20 01:23:45 | INFO | train_inner | epoch 004: 665 / 3002 loss=2.684, ppl=6.43, wps=5871, ups=0.09, wpb=64867, bsz=128, num_updates=9611, lr=9.99311e-05, gnorm=4.749, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110699 2021-06-20 01:23:56 | INFO | train_inner | epoch 004: 666 / 3002 loss=2.644, ppl=6.25, wps=5845.4, ups=0.09, wpb=64872, bsz=128, num_updates=9612, lr=9.99311e-05, gnorm=2.05, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110710 2021-06-20 01:24:07 | INFO | train_inner | epoch 004: 667 / 3002 loss=2.493, ppl=5.63, wps=5809.8, ups=0.09, wpb=64817, bsz=128, num_updates=9613, lr=9.99311e-05, gnorm=2.561, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110722 2021-06-20 01:24:18 | INFO | train_inner | epoch 004: 668 / 3002 loss=2.632, ppl=6.2, wps=6025.7, ups=0.09, wpb=64777, bsz=128, num_updates=9614, lr=9.99311e-05, gnorm=2.173, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110732 2021-06-20 01:24:29 | INFO | train_inner | epoch 004: 669 / 3002 loss=2.591, ppl=6.03, wps=5871.3, ups=0.09, wpb=64844, bsz=128, num_updates=9615, lr=9.99311e-05, gnorm=2.102, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110743 2021-06-20 01:24:40 | INFO | train_inner | epoch 004: 670 / 3002 loss=2.688, ppl=6.45, wps=5698.4, ups=0.09, wpb=64771, bsz=128, num_updates=9616, lr=9.99311e-05, gnorm=2.184, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110755 2021-06-20 01:24:52 | INFO | train_inner | epoch 004: 671 / 3002 loss=2.587, ppl=6.01, wps=5811.4, ups=0.09, wpb=64813, bsz=128, num_updates=9617, lr=9.99311e-05, gnorm=2.725, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110766 2021-06-20 01:25:03 | INFO | train_inner | epoch 004: 672 / 3002 loss=2.518, ppl=5.73, wps=5838.5, ups=0.09, wpb=64870, bsz=128, num_updates=9618, lr=9.99311e-05, gnorm=2.14, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110777 2021-06-20 01:25:14 | INFO | train_inner | epoch 004: 673 / 3002 loss=2.5, ppl=5.66, wps=5856, ups=0.09, wpb=64879, bsz=128, num_updates=9619, lr=9.9931e-05, gnorm=2.461, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110788 2021-06-20 01:25:25 | INFO | train_inner | epoch 004: 674 / 3002 loss=2.563, ppl=5.91, wps=5895, ups=0.09, wpb=64846, bsz=128, num_updates=9620, lr=9.9931e-05, gnorm=2.099, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110799 2021-06-20 01:25:36 | INFO | train_inner | epoch 004: 675 / 3002 loss=2.521, ppl=5.74, wps=5849.9, ups=0.09, wpb=64837, bsz=128, num_updates=9621, lr=9.9931e-05, gnorm=3.071, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110810 2021-06-20 01:25:47 | INFO | train_inner | epoch 004: 676 / 3002 loss=2.654, ppl=6.29, wps=5878.8, ups=0.09, wpb=64774, bsz=128, num_updates=9622, lr=9.9931e-05, gnorm=2.312, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110821 2021-06-20 01:25:58 | INFO | train_inner | epoch 004: 677 / 3002 loss=2.839, ppl=7.15, wps=5823.4, ups=0.09, wpb=64706, bsz=128, num_updates=9623, lr=9.9931e-05, gnorm=2.494, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110832 2021-06-20 01:26:09 | INFO | train_inner | epoch 004: 678 / 3002 loss=2.549, ppl=5.85, wps=5836.2, ups=0.09, wpb=64800, bsz=128, num_updates=9624, lr=9.9931e-05, gnorm=2.897, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110843 2021-06-20 01:26:20 | INFO | train_inner | epoch 004: 679 / 3002 loss=2.648, ppl=6.27, wps=5725.7, ups=0.09, wpb=64841, bsz=128, num_updates=9625, lr=9.9931e-05, gnorm=2.232, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110855 2021-06-20 01:26:32 | INFO | train_inner | epoch 004: 680 / 3002 loss=2.57, ppl=5.94, wps=5804.8, ups=0.09, wpb=64838, bsz=128, num_updates=9626, lr=9.9931e-05, gnorm=2.3, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110866 2021-06-20 01:26:43 | INFO | train_inner | epoch 004: 681 / 3002 loss=2.593, ppl=6.03, wps=5854, ups=0.09, wpb=64832, bsz=128, num_updates=9627, lr=9.9931e-05, gnorm=2.19, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110877 2021-06-20 01:26:54 | INFO | train_inner | epoch 004: 682 / 3002 loss=2.495, ppl=5.64, wps=5700.6, ups=0.09, wpb=64805, bsz=128, num_updates=9628, lr=9.9931e-05, gnorm=3.7, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110888 2021-06-20 01:27:05 | INFO | train_inner | epoch 004: 683 / 3002 loss=2.76, ppl=6.77, wps=5996.5, ups=0.09, wpb=64767, bsz=128, num_updates=9629, lr=9.9931e-05, gnorm=2.272, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110899 2021-06-20 01:27:16 | INFO | train_inner | epoch 004: 684 / 3002 loss=2.531, ppl=5.78, wps=5851.8, ups=0.09, wpb=64780, bsz=128, num_updates=9630, lr=9.9931e-05, gnorm=2.249, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110910 2021-06-20 01:27:27 | INFO | train_inner | epoch 004: 685 / 3002 loss=2.647, ppl=6.26, wps=5853.1, ups=0.09, wpb=64838, bsz=128, num_updates=9631, lr=9.99309e-05, gnorm=2.02, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110921 2021-06-20 01:27:38 | INFO | train_inner | epoch 004: 686 / 3002 loss=2.621, ppl=6.15, wps=5806.1, ups=0.09, wpb=64789, bsz=128, num_updates=9632, lr=9.99309e-05, gnorm=2.027, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110933 2021-06-20 01:27:49 | INFO | train_inner | epoch 004: 687 / 3002 loss=2.517, ppl=5.72, wps=5943.4, ups=0.09, wpb=64793, bsz=128, num_updates=9633, lr=9.99309e-05, gnorm=3.404, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110943 2021-06-20 01:28:00 | INFO | train_inner | epoch 004: 688 / 3002 loss=2.54, ppl=5.81, wps=5811.4, ups=0.09, wpb=64868, bsz=128, num_updates=9634, lr=9.99309e-05, gnorm=2.158, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110955 2021-06-20 01:28:12 | INFO | train_inner | epoch 004: 689 / 3002 loss=2.498, ppl=5.65, wps=5740, ups=0.09, wpb=64922, bsz=128, num_updates=9635, lr=9.99309e-05, gnorm=2.09, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110966 2021-06-20 01:28:23 | INFO | train_inner | epoch 004: 690 / 3002 loss=2.431, ppl=5.39, wps=5797.5, ups=0.09, wpb=64912, bsz=128, num_updates=9636, lr=9.99309e-05, gnorm=2.013, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110977 2021-06-20 01:28:34 | INFO | train_inner | epoch 004: 691 / 3002 loss=2.652, ppl=6.29, wps=5924.3, ups=0.09, wpb=64861, bsz=128, num_updates=9637, lr=9.99309e-05, gnorm=23.92, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110988 2021-06-20 01:28:45 | INFO | train_inner | epoch 004: 692 / 3002 loss=2.484, ppl=5.59, wps=5833.6, ups=0.09, wpb=64883, bsz=128, num_updates=9638, lr=9.99309e-05, gnorm=2.229, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110999 2021-06-20 01:28:56 | INFO | train_inner | epoch 004: 693 / 3002 loss=2.755, ppl=6.75, wps=5892.3, ups=0.09, wpb=64818, bsz=128, num_updates=9639, lr=9.99309e-05, gnorm=2.42, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=111010 2021-06-20 01:29:07 | INFO | train_inner | epoch 004: 694 / 3002 loss=2.486, ppl=5.6, wps=5854.8, ups=0.09, wpb=64846, bsz=128, num_updates=9640, lr=9.99309e-05, gnorm=2.106, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=111021 2021-06-20 01:29:18 | INFO | train_inner | epoch 004: 695 / 3002 loss=2.496, ppl=5.64, wps=5914.5, ups=0.09, wpb=64763, bsz=128, num_updates=9641, lr=9.99309e-05, gnorm=1.965, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=111032 2021-06-20 01:29:29 | INFO | train_inner | epoch 004: 696 / 3002 loss=2.658, ppl=6.31, wps=5681.5, ups=0.09, wpb=64771, bsz=128, num_updates=9642, lr=9.99309e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=111044 2021-06-20 01:29:40 | INFO | train_inner | epoch 004: 697 / 3002 loss=2.583, ppl=5.99, wps=5989.8, ups=0.09, wpb=64916, bsz=128, num_updates=9643, lr=9.99309e-05, gnorm=2.18, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=111054 2021-06-20 01:29:51 | INFO | train_inner | epoch 004: 698 / 3002 loss=2.738, ppl=6.67, wps=5804.8, ups=0.09, wpb=64837, bsz=128, num_updates=9644, lr=9.99308e-05, gnorm=2.205, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=111066 2021-06-20 01:30:02 | INFO | train_inner | epoch 004: 699 / 3002 loss=2.615, ppl=6.13, wps=5961.8, ups=0.09, wpb=64869, bsz=128, num_updates=9645, lr=9.99308e-05, gnorm=2.431, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=111076 2021-06-20 01:30:13 | INFO | train_inner | epoch 004: 700 / 3002 loss=2.696, ppl=6.48, wps=5865.2, ups=0.09, wpb=64741, bsz=128, num_updates=9646, lr=9.99308e-05, gnorm=2.094, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=111088 2021-06-20 01:30:24 | INFO | train_inner | epoch 004: 701 / 3002 loss=2.618, ppl=6.14, wps=5840.5, ups=0.09, wpb=64915, bsz=128, num_updates=9647, lr=9.99308e-05, gnorm=14.524, loss_scale=1, train_wall=11, gb_free=2.8, wall=111099 2021-06-20 01:30:35 | INFO | train_inner | epoch 004: 702 / 3002 loss=2.619, ppl=6.14, wps=5854.6, ups=0.09, wpb=64841, bsz=128, num_updates=9648, lr=9.99308e-05, gnorm=2.288, loss_scale=1, train_wall=11, gb_free=2.8, wall=111110 2021-06-20 01:30:47 | INFO | train_inner | epoch 004: 703 / 3002 loss=2.528, ppl=5.77, wps=5820.3, ups=0.09, wpb=64802, bsz=128, num_updates=9649, lr=9.99308e-05, gnorm=2.152, loss_scale=1, train_wall=11, gb_free=2.8, wall=111121 2021-06-20 01:30:58 | INFO | train_inner | epoch 004: 704 / 3002 loss=2.738, ppl=6.67, wps=5834.6, ups=0.09, wpb=64824, bsz=128, num_updates=9650, lr=9.99308e-05, gnorm=2.139, loss_scale=1, train_wall=11, gb_free=2.8, wall=111132 2021-06-20 01:31:09 | INFO | train_inner | epoch 004: 705 / 3002 loss=2.588, ppl=6.01, wps=5888.5, ups=0.09, wpb=64921, bsz=128, num_updates=9651, lr=9.99308e-05, gnorm=2.096, loss_scale=1, train_wall=11, gb_free=2.8, wall=111143 2021-06-20 01:31:20 | INFO | train_inner | epoch 004: 706 / 3002 loss=2.767, ppl=6.81, wps=5839.2, ups=0.09, wpb=64779, bsz=128, num_updates=9652, lr=9.99308e-05, gnorm=2.164, loss_scale=1, train_wall=11, gb_free=2.8, wall=111154 2021-06-20 01:31:31 | INFO | train_inner | epoch 004: 707 / 3002 loss=2.623, ppl=6.16, wps=5798.9, ups=0.09, wpb=64858, bsz=128, num_updates=9653, lr=9.99308e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=111165 2021-06-20 01:31:42 | INFO | train_inner | epoch 004: 708 / 3002 loss=2.541, ppl=5.82, wps=5894.8, ups=0.09, wpb=64807, bsz=128, num_updates=9654, lr=9.99308e-05, gnorm=2.378, loss_scale=1, train_wall=11, gb_free=2.8, wall=111176 2021-06-20 01:31:53 | INFO | train_inner | epoch 004: 709 / 3002 loss=2.529, ppl=5.77, wps=5928.3, ups=0.09, wpb=64806, bsz=128, num_updates=9655, lr=9.99308e-05, gnorm=2.14, loss_scale=1, train_wall=10, gb_free=2.8, wall=111187 2021-06-20 01:32:04 | INFO | train_inner | epoch 004: 710 / 3002 loss=2.724, ppl=6.61, wps=5847.5, ups=0.09, wpb=64783, bsz=128, num_updates=9656, lr=9.99307e-05, gnorm=6.462, loss_scale=1, train_wall=11, gb_free=2.8, wall=111198 2021-06-20 01:32:15 | INFO | train_inner | epoch 004: 711 / 3002 loss=2.636, ppl=6.22, wps=5946.4, ups=0.09, wpb=64854, bsz=128, num_updates=9657, lr=9.99307e-05, gnorm=4.893, loss_scale=1, train_wall=10, gb_free=2.8, wall=111209 2021-06-20 01:32:26 | INFO | train_inner | epoch 004: 712 / 3002 loss=2.576, ppl=5.96, wps=5699.7, ups=0.09, wpb=64878, bsz=128, num_updates=9658, lr=9.99307e-05, gnorm=2.571, loss_scale=1, train_wall=11, gb_free=2.8, wall=111221 2021-06-20 01:32:37 | INFO | train_inner | epoch 004: 713 / 3002 loss=2.556, ppl=5.88, wps=5827.8, ups=0.09, wpb=64842, bsz=128, num_updates=9659, lr=9.99307e-05, gnorm=2.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=111232 2021-06-20 01:32:49 | INFO | train_inner | epoch 004: 714 / 3002 loss=2.504, ppl=5.67, wps=5772.5, ups=0.09, wpb=64831, bsz=128, num_updates=9660, lr=9.99307e-05, gnorm=2.536, loss_scale=1, train_wall=11, gb_free=2.8, wall=111243 2021-06-20 01:33:00 | INFO | train_inner | epoch 004: 715 / 3002 loss=2.623, ppl=6.16, wps=5842.7, ups=0.09, wpb=64811, bsz=128, num_updates=9661, lr=9.99307e-05, gnorm=2.274, loss_scale=1, train_wall=11, gb_free=2.8, wall=111254 2021-06-20 01:33:11 | INFO | train_inner | epoch 004: 716 / 3002 loss=2.651, ppl=6.28, wps=5884.1, ups=0.09, wpb=64851, bsz=128, num_updates=9662, lr=9.99307e-05, gnorm=2.208, loss_scale=1, train_wall=11, gb_free=2.8, wall=111265 2021-06-20 01:33:22 | INFO | train_inner | epoch 004: 717 / 3002 loss=2.598, ppl=6.06, wps=5822, ups=0.09, wpb=64833, bsz=128, num_updates=9663, lr=9.99307e-05, gnorm=2.107, loss_scale=1, train_wall=11, gb_free=2.8, wall=111276 2021-06-20 01:33:33 | INFO | train_inner | epoch 004: 718 / 3002 loss=2.597, ppl=6.05, wps=5921.7, ups=0.09, wpb=64754, bsz=128, num_updates=9664, lr=9.99307e-05, gnorm=3.353, loss_scale=1, train_wall=11, gb_free=2.8, wall=111287 2021-06-20 01:33:44 | INFO | train_inner | epoch 004: 719 / 3002 loss=2.669, ppl=6.36, wps=5788.7, ups=0.09, wpb=64717, bsz=128, num_updates=9665, lr=9.99307e-05, gnorm=2.172, loss_scale=1, train_wall=11, gb_free=2.8, wall=111298 2021-06-20 01:33:55 | INFO | train_inner | epoch 004: 720 / 3002 loss=2.633, ppl=6.2, wps=5929.8, ups=0.09, wpb=64864, bsz=128, num_updates=9666, lr=9.99307e-05, gnorm=3.841, loss_scale=1, train_wall=10, gb_free=2.8, wall=111309 2021-06-20 01:34:06 | INFO | train_inner | epoch 004: 721 / 3002 loss=2.565, ppl=5.92, wps=5886.7, ups=0.09, wpb=64821, bsz=128, num_updates=9667, lr=9.99307e-05, gnorm=2.012, loss_scale=1, train_wall=11, gb_free=2.8, wall=111320 2021-06-20 01:34:17 | INFO | train_inner | epoch 004: 722 / 3002 loss=2.544, ppl=5.83, wps=5818.8, ups=0.09, wpb=64881, bsz=128, num_updates=9668, lr=9.99307e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=111331 2021-06-20 01:34:28 | INFO | train_inner | epoch 004: 723 / 3002 loss=2.633, ppl=6.2, wps=5912.7, ups=0.09, wpb=64800, bsz=128, num_updates=9669, lr=9.99306e-05, gnorm=2.13, loss_scale=1, train_wall=10, gb_free=2.8, wall=111342 2021-06-20 01:34:39 | INFO | train_inner | epoch 004: 724 / 3002 loss=2.567, ppl=5.93, wps=6006.6, ups=0.09, wpb=64847, bsz=128, num_updates=9670, lr=9.99306e-05, gnorm=2.51, loss_scale=1, train_wall=10, gb_free=2.8, wall=111353 2021-06-20 01:34:50 | INFO | train_inner | epoch 004: 725 / 3002 loss=2.635, ppl=6.21, wps=5782.3, ups=0.09, wpb=64844, bsz=128, num_updates=9671, lr=9.99306e-05, gnorm=16.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=111364 2021-06-20 01:35:01 | INFO | train_inner | epoch 004: 726 / 3002 loss=2.497, ppl=5.64, wps=5809.4, ups=0.09, wpb=64837, bsz=128, num_updates=9672, lr=9.99306e-05, gnorm=2.121, loss_scale=1, train_wall=11, gb_free=2.8, wall=111376 2021-06-20 01:35:12 | INFO | train_inner | epoch 004: 727 / 3002 loss=2.584, ppl=6, wps=5858.3, ups=0.09, wpb=64856, bsz=128, num_updates=9673, lr=9.99306e-05, gnorm=2.269, loss_scale=1, train_wall=11, gb_free=2.8, wall=111387 2021-06-20 01:35:24 | INFO | train_inner | epoch 004: 728 / 3002 loss=2.702, ppl=6.51, wps=5726, ups=0.09, wpb=64850, bsz=128, num_updates=9674, lr=9.99306e-05, gnorm=3.369, loss_scale=1, train_wall=11, gb_free=2.8, wall=111398 2021-06-20 01:35:34 | INFO | train_inner | epoch 004: 729 / 3002 loss=2.595, ppl=6.04, wps=5973.2, ups=0.09, wpb=64838, bsz=128, num_updates=9675, lr=9.99306e-05, gnorm=2.153, loss_scale=1, train_wall=10, gb_free=2.8, wall=111409 2021-06-20 01:35:46 | INFO | train_inner | epoch 004: 730 / 3002 loss=2.661, ppl=6.32, wps=5793.6, ups=0.09, wpb=64838, bsz=128, num_updates=9676, lr=9.99306e-05, gnorm=2.493, loss_scale=1, train_wall=11, gb_free=2.8, wall=111420 2021-06-20 01:35:57 | INFO | train_inner | epoch 004: 731 / 3002 loss=2.572, ppl=5.95, wps=5796.5, ups=0.09, wpb=64854, bsz=128, num_updates=9677, lr=9.99306e-05, gnorm=2.087, loss_scale=1, train_wall=11, gb_free=2.8, wall=111431 2021-06-20 01:36:08 | INFO | train_inner | epoch 004: 732 / 3002 loss=2.543, ppl=5.83, wps=5897.6, ups=0.09, wpb=64881, bsz=128, num_updates=9678, lr=9.99306e-05, gnorm=2.127, loss_scale=1, train_wall=11, gb_free=2.8, wall=111442 2021-06-20 01:36:19 | INFO | train_inner | epoch 004: 733 / 3002 loss=2.578, ppl=5.97, wps=5866.5, ups=0.09, wpb=64824, bsz=128, num_updates=9679, lr=9.99306e-05, gnorm=2.249, loss_scale=1, train_wall=11, gb_free=2.8, wall=111453 2021-06-20 01:36:30 | INFO | train_inner | epoch 004: 734 / 3002 loss=2.525, ppl=5.76, wps=5895.3, ups=0.09, wpb=64816, bsz=128, num_updates=9680, lr=9.99306e-05, gnorm=2.1, loss_scale=1, train_wall=11, gb_free=2.8, wall=111464 2021-06-20 01:36:41 | INFO | train_inner | epoch 004: 735 / 3002 loss=2.537, ppl=5.81, wps=5842.6, ups=0.09, wpb=64816, bsz=128, num_updates=9681, lr=9.99305e-05, gnorm=2.249, loss_scale=1, train_wall=11, gb_free=2.8, wall=111475 2021-06-20 01:36:52 | INFO | train_inner | epoch 004: 736 / 3002 loss=2.739, ppl=6.68, wps=5838.5, ups=0.09, wpb=64852, bsz=128, num_updates=9682, lr=9.99305e-05, gnorm=3.457, loss_scale=1, train_wall=11, gb_free=2.8, wall=111486 2021-06-20 01:37:03 | INFO | train_inner | epoch 004: 737 / 3002 loss=2.569, ppl=5.93, wps=5965.7, ups=0.09, wpb=64843, bsz=128, num_updates=9683, lr=9.99305e-05, gnorm=20.225, loss_scale=1, train_wall=10, gb_free=2.8, wall=111497 2021-06-20 01:37:14 | INFO | train_inner | epoch 004: 738 / 3002 loss=2.582, ppl=5.99, wps=5841.8, ups=0.09, wpb=64732, bsz=128, num_updates=9684, lr=9.99305e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=111508 2021-06-20 01:37:25 | INFO | train_inner | epoch 004: 739 / 3002 loss=2.785, ppl=6.89, wps=5859.7, ups=0.09, wpb=64801, bsz=128, num_updates=9685, lr=9.99305e-05, gnorm=2.577, loss_scale=1, train_wall=11, gb_free=2.8, wall=111519 2021-06-20 01:37:36 | INFO | train_inner | epoch 004: 740 / 3002 loss=2.572, ppl=5.95, wps=5870.3, ups=0.09, wpb=64749, bsz=128, num_updates=9686, lr=9.99305e-05, gnorm=2.239, loss_scale=1, train_wall=11, gb_free=2.8, wall=111530 2021-06-20 01:37:47 | INFO | train_inner | epoch 004: 741 / 3002 loss=2.591, ppl=6.02, wps=5806, ups=0.09, wpb=64793, bsz=128, num_updates=9687, lr=9.99305e-05, gnorm=2.276, loss_scale=1, train_wall=11, gb_free=2.8, wall=111542 2021-06-20 01:37:58 | INFO | train_inner | epoch 004: 742 / 3002 loss=2.674, ppl=6.38, wps=5815.9, ups=0.09, wpb=64872, bsz=128, num_updates=9688, lr=9.99305e-05, gnorm=2.534, loss_scale=1, train_wall=11, gb_free=2.8, wall=111553 2021-06-20 01:38:10 | INFO | train_inner | epoch 004: 743 / 3002 loss=2.6, ppl=6.06, wps=5802, ups=0.09, wpb=64856, bsz=128, num_updates=9689, lr=9.99305e-05, gnorm=3.52, loss_scale=1, train_wall=11, gb_free=2.8, wall=111564 2021-06-20 01:38:21 | INFO | train_inner | epoch 004: 744 / 3002 loss=2.531, ppl=5.78, wps=5795.2, ups=0.09, wpb=64894, bsz=128, num_updates=9690, lr=9.99305e-05, gnorm=2.38, loss_scale=1, train_wall=11, gb_free=2.8, wall=111575 2021-06-20 01:38:32 | INFO | train_inner | epoch 004: 745 / 3002 loss=2.666, ppl=6.34, wps=5890.2, ups=0.09, wpb=64873, bsz=128, num_updates=9691, lr=9.99305e-05, gnorm=4.02, loss_scale=1, train_wall=11, gb_free=2.8, wall=111586 2021-06-20 01:38:43 | INFO | train_inner | epoch 004: 746 / 3002 loss=2.575, ppl=5.96, wps=5783.5, ups=0.09, wpb=64839, bsz=128, num_updates=9692, lr=9.99305e-05, gnorm=2.902, loss_scale=1, train_wall=11, gb_free=2.8, wall=111597 2021-06-20 01:38:54 | INFO | train_inner | epoch 004: 747 / 3002 loss=2.499, ppl=5.65, wps=5891, ups=0.09, wpb=64908, bsz=128, num_updates=9693, lr=9.99305e-05, gnorm=3.601, loss_scale=1, train_wall=11, gb_free=2.8, wall=111608 2021-06-20 01:39:05 | INFO | train_inner | epoch 004: 748 / 3002 loss=2.569, ppl=5.93, wps=5951.1, ups=0.09, wpb=64895, bsz=128, num_updates=9694, lr=9.99304e-05, gnorm=2.31, loss_scale=1, train_wall=10, gb_free=2.8, wall=111619 2021-06-20 01:39:16 | INFO | train_inner | epoch 004: 749 / 3002 loss=2.641, ppl=6.24, wps=5891.3, ups=0.09, wpb=64894, bsz=128, num_updates=9695, lr=9.99304e-05, gnorm=2.329, loss_scale=1, train_wall=11, gb_free=2.8, wall=111630 2021-06-20 01:39:27 | INFO | train_inner | epoch 004: 750 / 3002 loss=2.512, ppl=5.71, wps=5816, ups=0.09, wpb=64833, bsz=128, num_updates=9696, lr=9.99304e-05, gnorm=2.878, loss_scale=1, train_wall=11, gb_free=2.8, wall=111641 2021-06-20 01:39:38 | INFO | train_inner | epoch 004: 751 / 3002 loss=2.647, ppl=6.26, wps=5732.1, ups=0.09, wpb=64761, bsz=128, num_updates=9697, lr=9.99304e-05, gnorm=2.854, loss_scale=1, train_wall=11, gb_free=2.8, wall=111653 2021-06-20 01:39:49 | INFO | train_inner | epoch 004: 752 / 3002 loss=2.496, ppl=5.64, wps=5848.4, ups=0.09, wpb=64700, bsz=128, num_updates=9698, lr=9.99304e-05, gnorm=8.333, loss_scale=1, train_wall=11, gb_free=2.8, wall=111664 2021-06-20 01:40:00 | INFO | train_inner | epoch 004: 753 / 3002 loss=2.567, ppl=5.93, wps=5961.2, ups=0.09, wpb=64899, bsz=128, num_updates=9699, lr=9.99304e-05, gnorm=2.099, loss_scale=1, train_wall=10, gb_free=2.8, wall=111675 2021-06-20 01:40:11 | INFO | train_inner | epoch 004: 754 / 3002 loss=2.467, ppl=5.53, wps=5853.7, ups=0.09, wpb=64744, bsz=128, num_updates=9700, lr=9.99304e-05, gnorm=2.486, loss_scale=1, train_wall=11, gb_free=2.8, wall=111686 2021-06-20 01:40:22 | INFO | train_inner | epoch 004: 755 / 3002 loss=2.721, ppl=6.59, wps=5908.7, ups=0.09, wpb=64828, bsz=128, num_updates=9701, lr=9.99304e-05, gnorm=2.148, loss_scale=1, train_wall=11, gb_free=2.8, wall=111697 2021-06-20 01:40:33 | INFO | train_inner | epoch 004: 756 / 3002 loss=2.578, ppl=5.97, wps=5915.2, ups=0.09, wpb=64881, bsz=128, num_updates=9702, lr=9.99304e-05, gnorm=2.27, loss_scale=1, train_wall=11, gb_free=2.8, wall=111708 2021-06-20 01:40:45 | INFO | train_inner | epoch 004: 757 / 3002 loss=2.58, ppl=5.98, wps=5758.9, ups=0.09, wpb=64831, bsz=128, num_updates=9703, lr=9.99304e-05, gnorm=2.238, loss_scale=1, train_wall=11, gb_free=2.8, wall=111719 2021-06-20 01:40:56 | INFO | train_inner | epoch 004: 758 / 3002 loss=2.626, ppl=6.17, wps=5817.6, ups=0.09, wpb=64843, bsz=128, num_updates=9704, lr=9.99304e-05, gnorm=2.163, loss_scale=1, train_wall=11, gb_free=2.8, wall=111730 2021-06-20 01:41:07 | INFO | train_inner | epoch 004: 759 / 3002 loss=2.642, ppl=6.24, wps=5863.4, ups=0.09, wpb=64872, bsz=128, num_updates=9705, lr=9.99304e-05, gnorm=2.104, loss_scale=1, train_wall=11, gb_free=2.8, wall=111741 2021-06-20 01:41:18 | INFO | train_inner | epoch 004: 760 / 3002 loss=2.399, ppl=5.27, wps=5779, ups=0.09, wpb=64814, bsz=128, num_updates=9706, lr=9.99303e-05, gnorm=2.113, loss_scale=1, train_wall=11, gb_free=2.8, wall=111752 2021-06-20 01:41:29 | INFO | train_inner | epoch 004: 761 / 3002 loss=2.57, ppl=5.94, wps=5920.3, ups=0.09, wpb=64838, bsz=128, num_updates=9707, lr=9.99303e-05, gnorm=2.39, loss_scale=1, train_wall=11, gb_free=2.8, wall=111763 2021-06-20 01:41:40 | INFO | train_inner | epoch 004: 762 / 3002 loss=2.564, ppl=5.91, wps=5764.8, ups=0.09, wpb=64784, bsz=128, num_updates=9708, lr=9.99303e-05, gnorm=2.641, loss_scale=1, train_wall=11, gb_free=2.8, wall=111775 2021-06-20 01:41:51 | INFO | train_inner | epoch 004: 763 / 3002 loss=2.451, ppl=5.47, wps=5917.4, ups=0.09, wpb=64894, bsz=128, num_updates=9709, lr=9.99303e-05, gnorm=2.158, loss_scale=1, train_wall=10, gb_free=2.8, wall=111786 2021-06-20 01:42:02 | INFO | train_inner | epoch 004: 764 / 3002 loss=2.541, ppl=5.82, wps=5780.7, ups=0.09, wpb=64794, bsz=128, num_updates=9710, lr=9.99303e-05, gnorm=2.842, loss_scale=1, train_wall=11, gb_free=2.8, wall=111797 2021-06-20 01:42:13 | INFO | train_inner | epoch 004: 765 / 3002 loss=2.464, ppl=5.52, wps=5872.7, ups=0.09, wpb=64859, bsz=128, num_updates=9711, lr=9.99303e-05, gnorm=2.344, loss_scale=1, train_wall=11, gb_free=2.8, wall=111808 2021-06-20 01:42:24 | INFO | train_inner | epoch 004: 766 / 3002 loss=2.521, ppl=5.74, wps=5933.1, ups=0.09, wpb=64865, bsz=128, num_updates=9712, lr=9.99303e-05, gnorm=4.592, loss_scale=1, train_wall=10, gb_free=2.8, wall=111819 2021-06-20 01:42:35 | INFO | train_inner | epoch 004: 767 / 3002 loss=2.384, ppl=5.22, wps=5841, ups=0.09, wpb=64804, bsz=128, num_updates=9713, lr=9.99303e-05, gnorm=2.384, loss_scale=1, train_wall=11, gb_free=2.8, wall=111830 2021-06-20 01:42:47 | INFO | train_inner | epoch 004: 768 / 3002 loss=2.586, ppl=6, wps=5795.4, ups=0.09, wpb=64825, bsz=128, num_updates=9714, lr=9.99303e-05, gnorm=2.306, loss_scale=1, train_wall=11, gb_free=2.8, wall=111841 2021-06-20 01:42:58 | INFO | train_inner | epoch 004: 769 / 3002 loss=2.583, ppl=5.99, wps=5928.6, ups=0.09, wpb=64784, bsz=128, num_updates=9715, lr=9.99303e-05, gnorm=2.155, loss_scale=1, train_wall=10, gb_free=2.8, wall=111852 2021-06-20 01:43:09 | INFO | train_inner | epoch 004: 770 / 3002 loss=2.492, ppl=5.63, wps=5829.9, ups=0.09, wpb=64769, bsz=128, num_updates=9716, lr=9.99303e-05, gnorm=2.03, loss_scale=1, train_wall=11, gb_free=2.8, wall=111863 2021-06-20 01:43:20 | INFO | train_inner | epoch 004: 771 / 3002 loss=2.752, ppl=6.74, wps=5821.8, ups=0.09, wpb=64837, bsz=128, num_updates=9717, lr=9.99303e-05, gnorm=2.037, loss_scale=1, train_wall=11, gb_free=2.8, wall=111874 2021-06-20 01:43:31 | INFO | train_inner | epoch 004: 772 / 3002 loss=2.727, ppl=6.62, wps=5752.5, ups=0.09, wpb=64814, bsz=128, num_updates=9718, lr=9.99303e-05, gnorm=2.104, loss_scale=1, train_wall=11, gb_free=2.8, wall=111885 2021-06-20 01:43:42 | INFO | train_inner | epoch 004: 773 / 3002 loss=2.615, ppl=6.13, wps=5813.1, ups=0.09, wpb=64729, bsz=128, num_updates=9719, lr=9.99302e-05, gnorm=2.198, loss_scale=1, train_wall=11, gb_free=2.8, wall=111897 2021-06-20 01:43:53 | INFO | train_inner | epoch 004: 774 / 3002 loss=2.544, ppl=5.83, wps=5759.1, ups=0.09, wpb=64733, bsz=128, num_updates=9720, lr=9.99302e-05, gnorm=2.141, loss_scale=1, train_wall=11, gb_free=2.8, wall=111908 2021-06-20 01:44:04 | INFO | train_inner | epoch 004: 775 / 3002 loss=2.623, ppl=6.16, wps=5907.4, ups=0.09, wpb=64845, bsz=128, num_updates=9721, lr=9.99302e-05, gnorm=2.387, loss_scale=1, train_wall=11, gb_free=2.8, wall=111919 2021-06-20 01:44:15 | INFO | train_inner | epoch 004: 776 / 3002 loss=2.754, ppl=6.75, wps=5866.7, ups=0.09, wpb=64800, bsz=128, num_updates=9722, lr=9.99302e-05, gnorm=2.103, loss_scale=1, train_wall=11, gb_free=2.8, wall=111930 2021-06-20 01:44:27 | INFO | train_inner | epoch 004: 777 / 3002 loss=2.518, ppl=5.73, wps=5756, ups=0.09, wpb=64810, bsz=128, num_updates=9723, lr=9.99302e-05, gnorm=2.209, loss_scale=1, train_wall=11, gb_free=2.8, wall=111941 2021-06-20 01:44:38 | INFO | train_inner | epoch 004: 778 / 3002 loss=2.785, ppl=6.89, wps=5911.7, ups=0.09, wpb=64807, bsz=128, num_updates=9724, lr=9.99302e-05, gnorm=2.135, loss_scale=1, train_wall=11, gb_free=2.8, wall=111952 2021-06-20 01:44:49 | INFO | train_inner | epoch 004: 779 / 3002 loss=2.466, ppl=5.52, wps=5923.9, ups=0.09, wpb=64870, bsz=128, num_updates=9725, lr=9.99302e-05, gnorm=2.168, loss_scale=1, train_wall=11, gb_free=2.8, wall=111963 2021-06-20 01:45:00 | INFO | train_inner | epoch 004: 780 / 3002 loss=2.498, ppl=5.65, wps=5675.4, ups=0.09, wpb=64856, bsz=128, num_updates=9726, lr=9.99302e-05, gnorm=2.139, loss_scale=1, train_wall=11, gb_free=2.8, wall=111974 2021-06-20 01:45:11 | INFO | train_inner | epoch 004: 781 / 3002 loss=2.719, ppl=6.59, wps=5827.4, ups=0.09, wpb=64807, bsz=128, num_updates=9727, lr=9.99302e-05, gnorm=2.246, loss_scale=1, train_wall=11, gb_free=2.8, wall=111986 2021-06-20 01:45:22 | INFO | train_inner | epoch 004: 782 / 3002 loss=2.671, ppl=6.37, wps=5925.8, ups=0.09, wpb=64833, bsz=128, num_updates=9728, lr=9.99302e-05, gnorm=4.814, loss_scale=1, train_wall=10, gb_free=2.8, wall=111996 2021-06-20 01:45:33 | INFO | train_inner | epoch 004: 783 / 3002 loss=2.502, ppl=5.66, wps=5970.7, ups=0.09, wpb=64881, bsz=128, num_updates=9729, lr=9.99302e-05, gnorm=22.29, loss_scale=1, train_wall=10, gb_free=2.8, wall=112007 2021-06-20 01:45:44 | INFO | train_inner | epoch 004: 784 / 3002 loss=2.658, ppl=6.31, wps=5755, ups=0.09, wpb=64840, bsz=128, num_updates=9730, lr=9.99302e-05, gnorm=2.092, loss_scale=1, train_wall=11, gb_free=2.8, wall=112019 2021-06-20 01:45:56 | INFO | train_inner | epoch 004: 785 / 3002 loss=2.573, ppl=5.95, wps=5766.9, ups=0.09, wpb=64838, bsz=128, num_updates=9731, lr=9.99301e-05, gnorm=2.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=112030 2021-06-20 01:46:07 | INFO | train_inner | epoch 004: 786 / 3002 loss=2.509, ppl=5.69, wps=5821.4, ups=0.09, wpb=64812, bsz=128, num_updates=9732, lr=9.99301e-05, gnorm=2.215, loss_scale=1, train_wall=11, gb_free=2.8, wall=112041 2021-06-20 01:46:18 | INFO | train_inner | epoch 004: 787 / 3002 loss=2.634, ppl=6.21, wps=5922, ups=0.09, wpb=64841, bsz=128, num_updates=9733, lr=9.99301e-05, gnorm=2.28, loss_scale=1, train_wall=10, gb_free=2.8, wall=112052 2021-06-20 01:46:29 | INFO | train_inner | epoch 004: 788 / 3002 loss=2.543, ppl=5.83, wps=5793.3, ups=0.09, wpb=64778, bsz=128, num_updates=9734, lr=9.99301e-05, gnorm=2.178, loss_scale=1, train_wall=11, gb_free=2.8, wall=112063 2021-06-20 01:46:40 | INFO | train_inner | epoch 004: 789 / 3002 loss=2.473, ppl=5.55, wps=5929.3, ups=0.09, wpb=64858, bsz=128, num_updates=9735, lr=9.99301e-05, gnorm=2.147, loss_scale=1, train_wall=10, gb_free=2.8, wall=112074 2021-06-20 01:46:51 | INFO | train_inner | epoch 004: 790 / 3002 loss=2.628, ppl=6.18, wps=5728.3, ups=0.09, wpb=64683, bsz=128, num_updates=9736, lr=9.99301e-05, gnorm=2.279, loss_scale=1, train_wall=11, gb_free=2.8, wall=112085 2021-06-20 01:47:02 | INFO | train_inner | epoch 004: 791 / 3002 loss=2.603, ppl=6.08, wps=5924.4, ups=0.09, wpb=64851, bsz=128, num_updates=9737, lr=9.99301e-05, gnorm=2.283, loss_scale=1, train_wall=10, gb_free=2.8, wall=112096 2021-06-20 01:47:13 | INFO | train_inner | epoch 004: 792 / 3002 loss=2.598, ppl=6.05, wps=5990.3, ups=0.09, wpb=64793, bsz=128, num_updates=9738, lr=9.99301e-05, gnorm=2.476, loss_scale=1, train_wall=10, gb_free=2.8, wall=112107 2021-06-20 01:47:24 | INFO | train_inner | epoch 004: 793 / 3002 loss=2.694, ppl=6.47, wps=5862.3, ups=0.09, wpb=64854, bsz=128, num_updates=9739, lr=9.99301e-05, gnorm=3.056, loss_scale=1, train_wall=11, gb_free=2.8, wall=112118 2021-06-20 01:47:35 | INFO | train_inner | epoch 004: 794 / 3002 loss=2.432, ppl=5.4, wps=5872.9, ups=0.09, wpb=64878, bsz=128, num_updates=9740, lr=9.99301e-05, gnorm=4.155, loss_scale=1, train_wall=11, gb_free=2.8, wall=112129 2021-06-20 01:47:46 | INFO | train_inner | epoch 004: 795 / 3002 loss=2.607, ppl=6.09, wps=5916.1, ups=0.09, wpb=64843, bsz=128, num_updates=9741, lr=9.99301e-05, gnorm=2.266, loss_scale=1, train_wall=11, gb_free=2.8, wall=112140 2021-06-20 01:47:57 | INFO | train_inner | epoch 004: 796 / 3002 loss=2.626, ppl=6.17, wps=5875.2, ups=0.09, wpb=64857, bsz=128, num_updates=9742, lr=9.99301e-05, gnorm=2.15, loss_scale=1, train_wall=11, gb_free=2.8, wall=112151 2021-06-20 01:48:08 | INFO | train_inner | epoch 004: 797 / 3002 loss=2.434, ppl=5.4, wps=5780.7, ups=0.09, wpb=64895, bsz=128, num_updates=9743, lr=9.99301e-05, gnorm=3.391, loss_scale=1, train_wall=11, gb_free=2.8, wall=112162 2021-06-20 01:48:19 | INFO | train_inner | epoch 004: 798 / 3002 loss=2.454, ppl=5.48, wps=5890.6, ups=0.09, wpb=64844, bsz=128, num_updates=9744, lr=9.993e-05, gnorm=2.184, loss_scale=1, train_wall=11, gb_free=2.8, wall=112173 2021-06-20 01:48:30 | INFO | train_inner | epoch 004: 799 / 3002 loss=2.565, ppl=5.92, wps=5962.3, ups=0.09, wpb=65001, bsz=128, num_updates=9745, lr=9.993e-05, gnorm=2.102, loss_scale=1, train_wall=10, gb_free=2.8, wall=112184 2021-06-20 01:48:41 | INFO | train_inner | epoch 004: 800 / 3002 loss=2.725, ppl=6.61, wps=5932.5, ups=0.09, wpb=64799, bsz=128, num_updates=9746, lr=9.993e-05, gnorm=2.094, loss_scale=1, train_wall=10, gb_free=2.8, wall=112195 2021-06-20 01:48:52 | INFO | train_inner | epoch 004: 801 / 3002 loss=2.741, ppl=6.68, wps=5918.6, ups=0.09, wpb=64725, bsz=128, num_updates=9747, lr=9.993e-05, gnorm=2.402, loss_scale=1, train_wall=10, gb_free=2.8, wall=112206 2021-06-20 01:49:03 | INFO | train_inner | epoch 004: 802 / 3002 loss=2.62, ppl=6.15, wps=5876.3, ups=0.09, wpb=64839, bsz=128, num_updates=9748, lr=9.993e-05, gnorm=2.135, loss_scale=1, train_wall=11, gb_free=2.8, wall=112217 2021-06-20 01:49:14 | INFO | train_inner | epoch 004: 803 / 3002 loss=2.657, ppl=6.31, wps=5893.3, ups=0.09, wpb=64801, bsz=128, num_updates=9749, lr=9.993e-05, gnorm=1.991, loss_scale=1, train_wall=11, gb_free=2.8, wall=112228 2021-06-20 01:49:25 | INFO | train_inner | epoch 004: 804 / 3002 loss=2.46, ppl=5.5, wps=5858.3, ups=0.09, wpb=64792, bsz=128, num_updates=9750, lr=9.993e-05, gnorm=2.119, loss_scale=1, train_wall=11, gb_free=2.8, wall=112239 2021-06-20 01:49:36 | INFO | train_inner | epoch 004: 805 / 3002 loss=2.656, ppl=6.3, wps=5872.6, ups=0.09, wpb=64844, bsz=128, num_updates=9751, lr=9.993e-05, gnorm=2.15, loss_scale=1, train_wall=11, gb_free=2.8, wall=112250 2021-06-20 01:49:47 | INFO | train_inner | epoch 004: 806 / 3002 loss=2.569, ppl=5.94, wps=5849, ups=0.09, wpb=64817, bsz=128, num_updates=9752, lr=9.993e-05, gnorm=2.08, loss_scale=1, train_wall=11, gb_free=2.8, wall=112261 2021-06-20 01:49:58 | INFO | train_inner | epoch 004: 807 / 3002 loss=2.64, ppl=6.23, wps=5821.6, ups=0.09, wpb=64812, bsz=128, num_updates=9753, lr=9.993e-05, gnorm=2.578, loss_scale=1, train_wall=11, gb_free=2.8, wall=112273 2021-06-20 01:50:09 | INFO | train_inner | epoch 004: 808 / 3002 loss=2.628, ppl=6.18, wps=5842.9, ups=0.09, wpb=64822, bsz=128, num_updates=9754, lr=9.993e-05, gnorm=17.174, loss_scale=1, train_wall=11, gb_free=2.8, wall=112284 2021-06-20 01:50:21 | INFO | train_inner | epoch 004: 809 / 3002 loss=2.499, ppl=5.65, wps=5809.5, ups=0.09, wpb=64825, bsz=128, num_updates=9755, lr=9.993e-05, gnorm=2.176, loss_scale=1, train_wall=11, gb_free=2.8, wall=112295 2021-06-20 01:50:32 | INFO | train_inner | epoch 004: 810 / 3002 loss=2.573, ppl=5.95, wps=5833.2, ups=0.09, wpb=64812, bsz=128, num_updates=9756, lr=9.99299e-05, gnorm=2.876, loss_scale=1, train_wall=11, gb_free=2.8, wall=112306 2021-06-20 01:50:43 | INFO | train_inner | epoch 004: 811 / 3002 loss=2.506, ppl=5.68, wps=5833.8, ups=0.09, wpb=64788, bsz=128, num_updates=9757, lr=9.99299e-05, gnorm=2.443, loss_scale=1, train_wall=11, gb_free=2.8, wall=112317 2021-06-20 01:50:54 | INFO | train_inner | epoch 004: 812 / 3002 loss=2.603, ppl=6.08, wps=5841.7, ups=0.09, wpb=64832, bsz=128, num_updates=9758, lr=9.99299e-05, gnorm=2.061, loss_scale=1, train_wall=11, gb_free=2.8, wall=112328 2021-06-20 01:51:05 | INFO | train_inner | epoch 004: 813 / 3002 loss=2.612, ppl=6.12, wps=5893.4, ups=0.09, wpb=64802, bsz=128, num_updates=9759, lr=9.99299e-05, gnorm=2.147, loss_scale=1, train_wall=11, gb_free=2.8, wall=112339 2021-06-20 01:51:16 | INFO | train_inner | epoch 004: 814 / 3002 loss=2.634, ppl=6.21, wps=5959.8, ups=0.09, wpb=64887, bsz=128, num_updates=9760, lr=9.99299e-05, gnorm=2.099, loss_scale=1, train_wall=10, gb_free=2.8, wall=112350 2021-06-20 01:51:27 | INFO | train_inner | epoch 004: 815 / 3002 loss=2.613, ppl=6.12, wps=5795.9, ups=0.09, wpb=64902, bsz=128, num_updates=9761, lr=9.99299e-05, gnorm=2.304, loss_scale=1, train_wall=11, gb_free=2.8, wall=112361 2021-06-20 01:51:38 | INFO | train_inner | epoch 004: 816 / 3002 loss=2.557, ppl=5.88, wps=5672.5, ups=0.09, wpb=64846, bsz=128, num_updates=9762, lr=9.99299e-05, gnorm=2.402, loss_scale=1, train_wall=11, gb_free=2.8, wall=112373 2021-06-20 01:51:50 | INFO | train_inner | epoch 004: 817 / 3002 loss=2.548, ppl=5.85, wps=5774.9, ups=0.09, wpb=64839, bsz=128, num_updates=9763, lr=9.99299e-05, gnorm=2.02, loss_scale=1, train_wall=11, gb_free=2.8, wall=112384 2021-06-20 01:52:00 | INFO | train_inner | epoch 004: 818 / 3002 loss=2.663, ppl=6.33, wps=5962.5, ups=0.09, wpb=64823, bsz=128, num_updates=9764, lr=9.99299e-05, gnorm=2.132, loss_scale=1, train_wall=10, gb_free=2.8, wall=112395 2021-06-20 01:52:11 | INFO | train_inner | epoch 004: 819 / 3002 loss=2.733, ppl=6.65, wps=5919.3, ups=0.09, wpb=64903, bsz=128, num_updates=9765, lr=9.99299e-05, gnorm=2.162, loss_scale=1, train_wall=10, gb_free=2.8, wall=112406 2021-06-20 01:52:23 | INFO | train_inner | epoch 004: 820 / 3002 loss=2.609, ppl=6.1, wps=5772.8, ups=0.09, wpb=64884, bsz=128, num_updates=9766, lr=9.99299e-05, gnorm=2.175, loss_scale=1, train_wall=11, gb_free=2.8, wall=112417 2021-06-20 01:52:34 | INFO | train_inner | epoch 004: 821 / 3002 loss=2.665, ppl=6.34, wps=5887.7, ups=0.09, wpb=64854, bsz=128, num_updates=9767, lr=9.99299e-05, gnorm=2.137, loss_scale=1, train_wall=11, gb_free=2.8, wall=112428 2021-06-20 01:52:45 | INFO | train_inner | epoch 004: 822 / 3002 loss=2.538, ppl=5.81, wps=5971.4, ups=0.09, wpb=64800, bsz=128, num_updates=9768, lr=9.99299e-05, gnorm=2.017, loss_scale=1, train_wall=10, gb_free=2.8, wall=112439 2021-06-20 01:52:55 | INFO | train_inner | epoch 004: 823 / 3002 loss=2.468, ppl=5.53, wps=5908.3, ups=0.09, wpb=64801, bsz=128, num_updates=9769, lr=9.99298e-05, gnorm=2.246, loss_scale=1, train_wall=11, gb_free=2.8, wall=112450 2021-06-20 01:53:07 | INFO | train_inner | epoch 004: 824 / 3002 loss=2.585, ppl=6, wps=5856.8, ups=0.09, wpb=64791, bsz=128, num_updates=9770, lr=9.99298e-05, gnorm=2.172, loss_scale=1, train_wall=11, gb_free=2.8, wall=112461 2021-06-20 01:53:18 | INFO | train_inner | epoch 004: 825 / 3002 loss=2.738, ppl=6.67, wps=5823.5, ups=0.09, wpb=64793, bsz=128, num_updates=9771, lr=9.99298e-05, gnorm=2.154, loss_scale=1, train_wall=11, gb_free=2.8, wall=112472 2021-06-20 01:53:29 | INFO | train_inner | epoch 004: 826 / 3002 loss=2.509, ppl=5.69, wps=5896.5, ups=0.09, wpb=64813, bsz=128, num_updates=9772, lr=9.99298e-05, gnorm=2.169, loss_scale=1, train_wall=11, gb_free=2.8, wall=112483 2021-06-20 01:53:40 | INFO | train_inner | epoch 004: 827 / 3002 loss=2.559, ppl=5.89, wps=5845.4, ups=0.09, wpb=64723, bsz=128, num_updates=9773, lr=9.99298e-05, gnorm=2.078, loss_scale=1, train_wall=11, gb_free=2.8, wall=112494 2021-06-20 01:53:51 | INFO | train_inner | epoch 004: 828 / 3002 loss=2.43, ppl=5.39, wps=5923.7, ups=0.09, wpb=64801, bsz=128, num_updates=9774, lr=9.99298e-05, gnorm=2.098, loss_scale=1, train_wall=10, gb_free=2.8, wall=112505 2021-06-20 01:54:02 | INFO | train_inner | epoch 004: 829 / 3002 loss=2.553, ppl=5.87, wps=5862.8, ups=0.09, wpb=64853, bsz=128, num_updates=9775, lr=9.99298e-05, gnorm=2.736, loss_scale=2, train_wall=11, gb_free=2.8, wall=112516 2021-06-20 01:54:13 | INFO | train_inner | epoch 004: 830 / 3002 loss=2.631, ppl=6.19, wps=5882.5, ups=0.09, wpb=64844, bsz=128, num_updates=9776, lr=9.99298e-05, gnorm=2.33, loss_scale=2, train_wall=11, gb_free=2.8, wall=112527 2021-06-20 01:54:24 | INFO | train_inner | epoch 004: 831 / 3002 loss=2.708, ppl=6.53, wps=5837.8, ups=0.09, wpb=64901, bsz=128, num_updates=9777, lr=9.99298e-05, gnorm=3.067, loss_scale=2, train_wall=11, gb_free=2.8, wall=112538 2021-06-20 01:54:35 | INFO | train_inner | epoch 004: 832 / 3002 loss=2.579, ppl=5.98, wps=5842.9, ups=0.09, wpb=64832, bsz=128, num_updates=9778, lr=9.99298e-05, gnorm=2.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=112549 2021-06-20 01:54:46 | INFO | train_inner | epoch 004: 833 / 3002 loss=2.491, ppl=5.62, wps=6041.9, ups=0.09, wpb=64908, bsz=128, num_updates=9779, lr=9.99298e-05, gnorm=2.081, loss_scale=2, train_wall=10, gb_free=2.8, wall=112560 2021-06-20 01:54:57 | INFO | train_inner | epoch 004: 834 / 3002 loss=2.465, ppl=5.52, wps=5954.7, ups=0.09, wpb=64860, bsz=128, num_updates=9780, lr=9.99298e-05, gnorm=1.97, loss_scale=2, train_wall=10, gb_free=2.8, wall=112571 2021-06-20 01:55:08 | INFO | train_inner | epoch 004: 835 / 3002 loss=2.56, ppl=5.9, wps=5798.7, ups=0.09, wpb=64853, bsz=128, num_updates=9781, lr=9.99297e-05, gnorm=2.041, loss_scale=2, train_wall=11, gb_free=2.8, wall=112582 2021-06-20 01:55:19 | INFO | train_inner | epoch 004: 836 / 3002 loss=2.736, ppl=6.66, wps=5905.2, ups=0.09, wpb=64791, bsz=128, num_updates=9782, lr=9.99297e-05, gnorm=2.227, loss_scale=2, train_wall=11, gb_free=2.8, wall=112593 2021-06-20 01:55:29 | INFO | train_inner | epoch 004: 837 / 3002 loss=2.417, ppl=5.34, wps=6110.7, ups=0.09, wpb=64886, bsz=128, num_updates=9783, lr=9.99297e-05, gnorm=1.943, loss_scale=2, train_wall=10, gb_free=2.8, wall=112604 2021-06-20 01:55:40 | INFO | train_inner | epoch 004: 838 / 3002 loss=2.555, ppl=5.88, wps=5873.6, ups=0.09, wpb=64751, bsz=128, num_updates=9784, lr=9.99297e-05, gnorm=2.44, loss_scale=2, train_wall=11, gb_free=2.8, wall=112615 2021-06-20 01:55:52 | INFO | train_inner | epoch 004: 839 / 3002 loss=2.648, ppl=6.27, wps=5808.1, ups=0.09, wpb=64790, bsz=128, num_updates=9785, lr=9.99297e-05, gnorm=2.105, loss_scale=2, train_wall=11, gb_free=2.8, wall=112626 2021-06-20 01:56:03 | INFO | train_inner | epoch 004: 840 / 3002 loss=2.426, ppl=5.37, wps=5865.7, ups=0.09, wpb=64826, bsz=128, num_updates=9786, lr=9.99297e-05, gnorm=2.076, loss_scale=2, train_wall=11, gb_free=2.8, wall=112637 2021-06-20 01:56:14 | INFO | train_inner | epoch 004: 841 / 3002 loss=2.71, ppl=6.54, wps=5901.9, ups=0.09, wpb=64819, bsz=128, num_updates=9787, lr=9.99297e-05, gnorm=2.111, loss_scale=2, train_wall=10, gb_free=2.8, wall=112648 2021-06-20 01:56:25 | INFO | train_inner | epoch 004: 842 / 3002 loss=2.794, ppl=6.93, wps=5818.6, ups=0.09, wpb=64885, bsz=128, num_updates=9788, lr=9.99297e-05, gnorm=2.207, loss_scale=2, train_wall=11, gb_free=2.8, wall=112659 2021-06-20 01:56:36 | INFO | train_inner | epoch 004: 843 / 3002 loss=2.459, ppl=5.5, wps=5762.2, ups=0.09, wpb=64838, bsz=128, num_updates=9789, lr=9.99297e-05, gnorm=2.044, loss_scale=2, train_wall=11, gb_free=2.8, wall=112670 2021-06-20 01:56:47 | INFO | train_inner | epoch 004: 844 / 3002 loss=2.581, ppl=5.99, wps=5776.8, ups=0.09, wpb=64723, bsz=128, num_updates=9790, lr=9.99297e-05, gnorm=2.06, loss_scale=2, train_wall=11, gb_free=2.8, wall=112682 2021-06-20 01:56:58 | INFO | train_inner | epoch 004: 845 / 3002 loss=2.47, ppl=5.54, wps=5892.4, ups=0.09, wpb=64843, bsz=128, num_updates=9791, lr=9.99297e-05, gnorm=2.069, loss_scale=2, train_wall=11, gb_free=2.8, wall=112693 2021-06-20 01:57:09 | INFO | train_inner | epoch 004: 846 / 3002 loss=2.62, ppl=6.15, wps=5955.2, ups=0.09, wpb=64798, bsz=128, num_updates=9792, lr=9.99297e-05, gnorm=3.039, loss_scale=2, train_wall=10, gb_free=2.8, wall=112703 2021-06-20 01:57:20 | INFO | train_inner | epoch 004: 847 / 3002 loss=2.802, ppl=6.98, wps=5793.1, ups=0.09, wpb=64796, bsz=128, num_updates=9793, lr=9.99297e-05, gnorm=2.111, loss_scale=2, train_wall=11, gb_free=2.8, wall=112715 2021-06-20 01:57:31 | INFO | train_inner | epoch 004: 848 / 3002 loss=2.69, ppl=6.45, wps=5824.9, ups=0.09, wpb=64889, bsz=128, num_updates=9794, lr=9.99296e-05, gnorm=2.238, loss_scale=2, train_wall=11, gb_free=2.8, wall=112726 2021-06-20 01:57:42 | INFO | train_inner | epoch 004: 849 / 3002 loss=2.601, ppl=6.07, wps=5866.5, ups=0.09, wpb=64833, bsz=128, num_updates=9795, lr=9.99296e-05, gnorm=2.086, loss_scale=2, train_wall=11, gb_free=2.8, wall=112737 2021-06-20 01:57:54 | INFO | train_inner | epoch 004: 850 / 3002 loss=2.692, ppl=6.46, wps=5795.7, ups=0.09, wpb=64766, bsz=128, num_updates=9796, lr=9.99296e-05, gnorm=3.002, loss_scale=2, train_wall=11, gb_free=2.8, wall=112748 2021-06-20 01:58:05 | INFO | train_inner | epoch 004: 851 / 3002 loss=2.686, ppl=6.43, wps=5849.4, ups=0.09, wpb=64652, bsz=128, num_updates=9797, lr=9.99296e-05, gnorm=2.132, loss_scale=2, train_wall=11, gb_free=2.8, wall=112759 2021-06-20 01:58:16 | INFO | train_inner | epoch 004: 852 / 3002 loss=2.46, ppl=5.5, wps=5824.3, ups=0.09, wpb=64865, bsz=128, num_updates=9798, lr=9.99296e-05, gnorm=2.208, loss_scale=2, train_wall=11, gb_free=2.8, wall=112770 2021-06-20 01:58:27 | INFO | train_inner | epoch 004: 853 / 3002 loss=2.559, ppl=5.89, wps=5885.2, ups=0.09, wpb=64869, bsz=128, num_updates=9799, lr=9.99296e-05, gnorm=2.096, loss_scale=2, train_wall=11, gb_free=2.8, wall=112781 2021-06-20 01:58:38 | INFO | train_inner | epoch 004: 854 / 3002 loss=2.853, ppl=7.22, wps=5851, ups=0.09, wpb=64789, bsz=128, num_updates=9800, lr=9.99296e-05, gnorm=2.137, loss_scale=2, train_wall=11, gb_free=2.8, wall=112792 2021-06-20 01:58:49 | INFO | train_inner | epoch 004: 855 / 3002 loss=2.585, ppl=6, wps=5813.2, ups=0.09, wpb=64671, bsz=128, num_updates=9801, lr=9.99296e-05, gnorm=4.555, loss_scale=2, train_wall=11, gb_free=2.8, wall=112803 2021-06-20 01:59:00 | INFO | train_inner | epoch 004: 856 / 3002 loss=2.561, ppl=5.9, wps=5870.7, ups=0.09, wpb=64800, bsz=128, num_updates=9802, lr=9.99296e-05, gnorm=2.124, loss_scale=2, train_wall=11, gb_free=2.8, wall=112814 2021-06-20 01:59:11 | INFO | train_inner | epoch 004: 857 / 3002 loss=2.438, ppl=5.42, wps=5830.9, ups=0.09, wpb=64820, bsz=128, num_updates=9803, lr=9.99296e-05, gnorm=2.142, loss_scale=2, train_wall=11, gb_free=2.8, wall=112826 2021-06-20 01:59:22 | INFO | train_inner | epoch 004: 858 / 3002 loss=2.617, ppl=6.13, wps=6031.9, ups=0.09, wpb=64743, bsz=128, num_updates=9804, lr=9.99296e-05, gnorm=2.069, loss_scale=2, train_wall=10, gb_free=2.8, wall=112836 2021-06-20 01:59:33 | INFO | train_inner | epoch 004: 859 / 3002 loss=2.595, ppl=6.04, wps=5882.2, ups=0.09, wpb=64847, bsz=128, num_updates=9805, lr=9.99296e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=112847 2021-06-20 01:59:44 | INFO | train_inner | epoch 004: 860 / 3002 loss=2.727, ppl=6.62, wps=5845.7, ups=0.09, wpb=64814, bsz=128, num_updates=9806, lr=9.99295e-05, gnorm=2.21, loss_scale=2, train_wall=11, gb_free=2.8, wall=112858 2021-06-20 01:59:55 | INFO | train_inner | epoch 004: 861 / 3002 loss=2.723, ppl=6.6, wps=5963.9, ups=0.09, wpb=64878, bsz=128, num_updates=9807, lr=9.99295e-05, gnorm=2.069, loss_scale=2, train_wall=10, gb_free=2.8, wall=112869 2021-06-20 02:00:06 | INFO | train_inner | epoch 004: 862 / 3002 loss=2.475, ppl=5.56, wps=5854.1, ups=0.09, wpb=64823, bsz=128, num_updates=9808, lr=9.99295e-05, gnorm=2.007, loss_scale=2, train_wall=11, gb_free=2.8, wall=112880 2021-06-20 02:00:17 | INFO | train_inner | epoch 004: 863 / 3002 loss=2.483, ppl=5.59, wps=5938, ups=0.09, wpb=64928, bsz=128, num_updates=9809, lr=9.99295e-05, gnorm=2.09, loss_scale=2, train_wall=10, gb_free=2.8, wall=112891 2021-06-20 02:00:28 | INFO | train_inner | epoch 004: 864 / 3002 loss=2.649, ppl=6.27, wps=5894.6, ups=0.09, wpb=64861, bsz=128, num_updates=9810, lr=9.99295e-05, gnorm=2.372, loss_scale=2, train_wall=11, gb_free=2.8, wall=112902 2021-06-20 02:00:39 | INFO | train_inner | epoch 004: 865 / 3002 loss=2.581, ppl=5.99, wps=5851.1, ups=0.09, wpb=64868, bsz=128, num_updates=9811, lr=9.99295e-05, gnorm=2.633, loss_scale=2, train_wall=11, gb_free=2.8, wall=112913 2021-06-20 02:00:50 | INFO | train_inner | epoch 004: 866 / 3002 loss=2.668, ppl=6.36, wps=5837, ups=0.09, wpb=64876, bsz=128, num_updates=9812, lr=9.99295e-05, gnorm=2.339, loss_scale=2, train_wall=11, gb_free=2.8, wall=112924 2021-06-20 02:01:01 | INFO | train_inner | epoch 004: 867 / 3002 loss=2.757, ppl=6.76, wps=5908, ups=0.09, wpb=64849, bsz=128, num_updates=9813, lr=9.99295e-05, gnorm=2.073, loss_scale=2, train_wall=10, gb_free=2.8, wall=112935 2021-06-20 02:01:12 | INFO | train_inner | epoch 004: 868 / 3002 loss=2.596, ppl=6.04, wps=5983.6, ups=0.09, wpb=64781, bsz=128, num_updates=9814, lr=9.99295e-05, gnorm=2.018, loss_scale=2, train_wall=10, gb_free=2.8, wall=112946 2021-06-20 02:01:23 | INFO | train_inner | epoch 004: 869 / 3002 loss=2.505, ppl=5.68, wps=5765.3, ups=0.09, wpb=64719, bsz=128, num_updates=9815, lr=9.99295e-05, gnorm=2.092, loss_scale=2, train_wall=11, gb_free=2.8, wall=112958 2021-06-20 02:01:34 | INFO | train_inner | epoch 004: 870 / 3002 loss=2.687, ppl=6.44, wps=5975.4, ups=0.09, wpb=64836, bsz=128, num_updates=9816, lr=9.99295e-05, gnorm=2.039, loss_scale=2, train_wall=10, gb_free=2.8, wall=112968 2021-06-20 02:01:45 | INFO | train_inner | epoch 004: 871 / 3002 loss=2.58, ppl=5.98, wps=5847.1, ups=0.09, wpb=64804, bsz=128, num_updates=9817, lr=9.99295e-05, gnorm=2.437, loss_scale=2, train_wall=11, gb_free=2.8, wall=112979 2021-06-20 02:01:56 | INFO | train_inner | epoch 004: 872 / 3002 loss=2.677, ppl=6.4, wps=5813.8, ups=0.09, wpb=64709, bsz=128, num_updates=9818, lr=9.99295e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=112991 2021-06-20 02:02:07 | INFO | train_inner | epoch 004: 873 / 3002 loss=2.565, ppl=5.92, wps=5874.8, ups=0.09, wpb=64839, bsz=128, num_updates=9819, lr=9.99294e-05, gnorm=5.522, loss_scale=2, train_wall=11, gb_free=2.8, wall=113002 2021-06-20 02:02:19 | INFO | train_inner | epoch 004: 874 / 3002 loss=2.557, ppl=5.88, wps=5773.3, ups=0.09, wpb=64833, bsz=128, num_updates=9820, lr=9.99294e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=113013 2021-06-20 02:02:30 | INFO | train_inner | epoch 004: 875 / 3002 loss=2.633, ppl=6.2, wps=5856.8, ups=0.09, wpb=64768, bsz=128, num_updates=9821, lr=9.99294e-05, gnorm=2.24, loss_scale=2, train_wall=11, gb_free=2.8, wall=113024 2021-06-20 02:02:41 | INFO | train_inner | epoch 004: 876 / 3002 loss=2.531, ppl=5.78, wps=5826.1, ups=0.09, wpb=64866, bsz=128, num_updates=9822, lr=9.99294e-05, gnorm=2.814, loss_scale=2, train_wall=11, gb_free=2.8, wall=113035 2021-06-20 02:02:52 | INFO | train_inner | epoch 004: 877 / 3002 loss=2.615, ppl=6.12, wps=5762.8, ups=0.09, wpb=64875, bsz=128, num_updates=9823, lr=9.99294e-05, gnorm=2.058, loss_scale=2, train_wall=11, gb_free=2.8, wall=113046 2021-06-20 02:03:03 | INFO | train_inner | epoch 004: 878 / 3002 loss=2.516, ppl=5.72, wps=5803.8, ups=0.09, wpb=64822, bsz=128, num_updates=9824, lr=9.99294e-05, gnorm=21.087, loss_scale=2, train_wall=11, gb_free=2.8, wall=113057 2021-06-20 02:03:14 | INFO | train_inner | epoch 004: 879 / 3002 loss=2.65, ppl=6.28, wps=5817.4, ups=0.09, wpb=64836, bsz=128, num_updates=9825, lr=9.99294e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=113069 2021-06-20 02:03:25 | INFO | train_inner | epoch 004: 880 / 3002 loss=2.663, ppl=6.33, wps=5856.2, ups=0.09, wpb=64692, bsz=128, num_updates=9826, lr=9.99294e-05, gnorm=2.111, loss_scale=2, train_wall=11, gb_free=2.8, wall=113080 2021-06-20 02:03:36 | INFO | train_inner | epoch 004: 881 / 3002 loss=2.561, ppl=5.9, wps=5840.3, ups=0.09, wpb=64787, bsz=128, num_updates=9827, lr=9.99294e-05, gnorm=2.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=113091 2021-06-20 02:03:48 | INFO | train_inner | epoch 004: 882 / 3002 loss=2.711, ppl=6.55, wps=5806.4, ups=0.09, wpb=64692, bsz=128, num_updates=9828, lr=9.99294e-05, gnorm=2.178, loss_scale=2, train_wall=11, gb_free=2.8, wall=113102 2021-06-20 02:03:59 | INFO | train_inner | epoch 004: 883 / 3002 loss=2.598, ppl=6.05, wps=5869.9, ups=0.09, wpb=64866, bsz=128, num_updates=9829, lr=9.99294e-05, gnorm=2.112, loss_scale=2, train_wall=11, gb_free=2.8, wall=113113 2021-06-20 02:04:10 | INFO | train_inner | epoch 004: 884 / 3002 loss=2.61, ppl=6.11, wps=5846, ups=0.09, wpb=64826, bsz=128, num_updates=9830, lr=9.99294e-05, gnorm=2.146, loss_scale=2, train_wall=11, gb_free=2.8, wall=113124 2021-06-20 02:04:21 | INFO | train_inner | epoch 004: 885 / 3002 loss=2.528, ppl=5.77, wps=5944.1, ups=0.09, wpb=64730, bsz=128, num_updates=9831, lr=9.99293e-05, gnorm=2.105, loss_scale=2, train_wall=10, gb_free=2.8, wall=113135 2021-06-20 02:04:32 | INFO | train_inner | epoch 004: 886 / 3002 loss=2.594, ppl=6.04, wps=5838, ups=0.09, wpb=64799, bsz=128, num_updates=9832, lr=9.99293e-05, gnorm=3.145, loss_scale=2, train_wall=11, gb_free=2.8, wall=113146 2021-06-20 02:04:43 | INFO | train_inner | epoch 004: 887 / 3002 loss=2.714, ppl=6.56, wps=5893, ups=0.09, wpb=64857, bsz=128, num_updates=9833, lr=9.99293e-05, gnorm=2.184, loss_scale=2, train_wall=11, gb_free=2.8, wall=113157 2021-06-20 02:04:54 | INFO | train_inner | epoch 004: 888 / 3002 loss=2.694, ppl=6.47, wps=5881.5, ups=0.09, wpb=64810, bsz=128, num_updates=9834, lr=9.99293e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=113168 2021-06-20 02:05:05 | INFO | train_inner | epoch 004: 889 / 3002 loss=2.688, ppl=6.44, wps=5981.6, ups=0.09, wpb=64864, bsz=128, num_updates=9835, lr=9.99293e-05, gnorm=2.222, loss_scale=2, train_wall=10, gb_free=2.8, wall=113179 2021-06-20 02:05:16 | INFO | train_inner | epoch 004: 890 / 3002 loss=2.694, ppl=6.47, wps=5778.4, ups=0.09, wpb=64718, bsz=128, num_updates=9836, lr=9.99293e-05, gnorm=4.845, loss_scale=2, train_wall=11, gb_free=2.8, wall=113190 2021-06-20 02:05:27 | INFO | train_inner | epoch 004: 891 / 3002 loss=2.68, ppl=6.41, wps=5813.5, ups=0.09, wpb=64819, bsz=128, num_updates=9837, lr=9.99293e-05, gnorm=2.463, loss_scale=2, train_wall=11, gb_free=2.8, wall=113201 2021-06-20 02:05:38 | INFO | train_inner | epoch 004: 892 / 3002 loss=2.608, ppl=6.09, wps=5910.7, ups=0.09, wpb=64766, bsz=128, num_updates=9838, lr=9.99293e-05, gnorm=3.282, loss_scale=2, train_wall=11, gb_free=2.8, wall=113212 2021-06-20 02:05:49 | INFO | train_inner | epoch 004: 893 / 3002 loss=2.59, ppl=6.02, wps=5785.8, ups=0.09, wpb=64773, bsz=128, num_updates=9839, lr=9.99293e-05, gnorm=2.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=113223 2021-06-20 02:06:00 | INFO | train_inner | epoch 004: 894 / 3002 loss=2.669, ppl=6.36, wps=5839.4, ups=0.09, wpb=64887, bsz=128, num_updates=9840, lr=9.99293e-05, gnorm=3.477, loss_scale=2, train_wall=11, gb_free=2.8, wall=113235 2021-06-20 02:06:11 | INFO | train_inner | epoch 004: 895 / 3002 loss=2.9, ppl=7.46, wps=5899.2, ups=0.09, wpb=64708, bsz=128, num_updates=9841, lr=9.99293e-05, gnorm=3.274, loss_scale=2, train_wall=10, gb_free=2.8, wall=113245 2021-06-20 02:06:22 | INFO | train_inner | epoch 004: 896 / 3002 loss=2.605, ppl=6.08, wps=5912.2, ups=0.09, wpb=64819, bsz=128, num_updates=9842, lr=9.99293e-05, gnorm=2.225, loss_scale=2, train_wall=11, gb_free=2.8, wall=113256 2021-06-20 02:06:33 | INFO | train_inner | epoch 004: 897 / 3002 loss=2.754, ppl=6.75, wps=5785.6, ups=0.09, wpb=64770, bsz=128, num_updates=9843, lr=9.99293e-05, gnorm=2.186, loss_scale=2, train_wall=11, gb_free=2.8, wall=113268 2021-06-20 02:06:44 | INFO | train_inner | epoch 004: 898 / 3002 loss=2.591, ppl=6.03, wps=5854.4, ups=0.09, wpb=64758, bsz=128, num_updates=9844, lr=9.99292e-05, gnorm=3.872, loss_scale=2, train_wall=11, gb_free=2.8, wall=113279 2021-06-20 02:06:55 | INFO | train_inner | epoch 004: 899 / 3002 loss=2.603, ppl=6.08, wps=5903.9, ups=0.09, wpb=64838, bsz=128, num_updates=9845, lr=9.99292e-05, gnorm=2.232, loss_scale=2, train_wall=11, gb_free=2.8, wall=113290 2021-06-20 02:07:06 | INFO | train_inner | epoch 004: 900 / 3002 loss=2.533, ppl=5.79, wps=5890.7, ups=0.09, wpb=64848, bsz=128, num_updates=9846, lr=9.99292e-05, gnorm=2.856, loss_scale=2, train_wall=11, gb_free=2.8, wall=113301 2021-06-20 02:07:17 | INFO | train_inner | epoch 004: 901 / 3002 loss=2.6, ppl=6.06, wps=5884.4, ups=0.09, wpb=64856, bsz=128, num_updates=9847, lr=9.99292e-05, gnorm=2.156, loss_scale=2, train_wall=11, gb_free=2.8, wall=113312 2021-06-20 02:07:28 | INFO | train_inner | epoch 004: 902 / 3002 loss=2.658, ppl=6.31, wps=5835.7, ups=0.09, wpb=64827, bsz=128, num_updates=9848, lr=9.99292e-05, gnorm=2.215, loss_scale=2, train_wall=11, gb_free=2.8, wall=113323 2021-06-20 02:07:40 | INFO | train_inner | epoch 004: 903 / 3002 loss=2.602, ppl=6.07, wps=5846.3, ups=0.09, wpb=64782, bsz=128, num_updates=9849, lr=9.99292e-05, gnorm=2.194, loss_scale=2, train_wall=11, gb_free=2.8, wall=113334 2021-06-20 02:07:51 | INFO | train_inner | epoch 004: 904 / 3002 loss=2.653, ppl=6.29, wps=5853.4, ups=0.09, wpb=64842, bsz=128, num_updates=9850, lr=9.99292e-05, gnorm=2.151, loss_scale=2, train_wall=11, gb_free=2.8, wall=113345 2021-06-20 02:08:01 | INFO | train_inner | epoch 004: 905 / 3002 loss=2.668, ppl=6.36, wps=6004.8, ups=0.09, wpb=64767, bsz=128, num_updates=9851, lr=9.99292e-05, gnorm=2.122, loss_scale=2, train_wall=10, gb_free=2.8, wall=113356 2021-06-20 02:08:13 | INFO | train_inner | epoch 004: 906 / 3002 loss=2.772, ppl=6.83, wps=5843.7, ups=0.09, wpb=64825, bsz=128, num_updates=9852, lr=9.99292e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=113367 2021-06-20 02:08:23 | INFO | train_inner | epoch 004: 907 / 3002 loss=2.723, ppl=6.6, wps=5911.1, ups=0.09, wpb=64834, bsz=128, num_updates=9853, lr=9.99292e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=113378 2021-06-20 02:08:35 | INFO | train_inner | epoch 004: 908 / 3002 loss=2.612, ppl=6.11, wps=5819, ups=0.09, wpb=64805, bsz=128, num_updates=9854, lr=9.99292e-05, gnorm=2.11, loss_scale=2, train_wall=11, gb_free=2.8, wall=113389 2021-06-20 02:08:46 | INFO | train_inner | epoch 004: 909 / 3002 loss=2.822, ppl=7.07, wps=5754.2, ups=0.09, wpb=64800, bsz=128, num_updates=9855, lr=9.99292e-05, gnorm=2.18, loss_scale=2, train_wall=11, gb_free=2.8, wall=113400 2021-06-20 02:08:57 | INFO | train_inner | epoch 004: 910 / 3002 loss=2.709, ppl=6.54, wps=5866.4, ups=0.09, wpb=64802, bsz=128, num_updates=9856, lr=9.99291e-05, gnorm=2.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=113411 2021-06-20 02:09:08 | INFO | train_inner | epoch 004: 911 / 3002 loss=2.621, ppl=6.15, wps=5839.6, ups=0.09, wpb=64802, bsz=128, num_updates=9857, lr=9.99291e-05, gnorm=2.07, loss_scale=2, train_wall=11, gb_free=2.8, wall=113422 2021-06-20 02:09:19 | INFO | train_inner | epoch 004: 912 / 3002 loss=2.586, ppl=6, wps=5766.9, ups=0.09, wpb=64771, bsz=128, num_updates=9858, lr=9.99291e-05, gnorm=3.779, loss_scale=2, train_wall=11, gb_free=2.8, wall=113434 2021-06-20 02:09:30 | INFO | train_inner | epoch 004: 913 / 3002 loss=2.525, ppl=5.75, wps=5873, ups=0.09, wpb=64956, bsz=128, num_updates=9859, lr=9.99291e-05, gnorm=2.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=113445 2021-06-20 02:09:41 | INFO | train_inner | epoch 004: 914 / 3002 loss=2.708, ppl=6.53, wps=5864.1, ups=0.09, wpb=64792, bsz=128, num_updates=9860, lr=9.99291e-05, gnorm=2.105, loss_scale=2, train_wall=11, gb_free=2.8, wall=113456 2021-06-20 02:09:52 | INFO | train_inner | epoch 004: 915 / 3002 loss=2.513, ppl=5.71, wps=5938.6, ups=0.09, wpb=64862, bsz=128, num_updates=9861, lr=9.99291e-05, gnorm=2.07, loss_scale=2, train_wall=10, gb_free=2.8, wall=113467 2021-06-20 02:10:03 | INFO | train_inner | epoch 004: 916 / 3002 loss=2.617, ppl=6.13, wps=5922.2, ups=0.09, wpb=64893, bsz=128, num_updates=9862, lr=9.99291e-05, gnorm=1.998, loss_scale=2, train_wall=10, gb_free=2.8, wall=113478 2021-06-20 02:10:14 | INFO | train_inner | epoch 004: 917 / 3002 loss=2.656, ppl=6.3, wps=5802.7, ups=0.09, wpb=64755, bsz=128, num_updates=9863, lr=9.99291e-05, gnorm=2.149, loss_scale=2, train_wall=11, gb_free=2.8, wall=113489 2021-06-20 02:10:26 | INFO | train_inner | epoch 004: 918 / 3002 loss=2.672, ppl=6.37, wps=5821.5, ups=0.09, wpb=64779, bsz=128, num_updates=9864, lr=9.99291e-05, gnorm=2.421, loss_scale=2, train_wall=11, gb_free=2.8, wall=113500 2021-06-20 02:10:36 | INFO | train_inner | epoch 004: 919 / 3002 loss=2.511, ppl=5.7, wps=5957.7, ups=0.09, wpb=64842, bsz=128, num_updates=9865, lr=9.99291e-05, gnorm=2.154, loss_scale=2, train_wall=10, gb_free=2.8, wall=113511 2021-06-20 02:10:47 | INFO | train_inner | epoch 004: 920 / 3002 loss=2.47, ppl=5.54, wps=5911.1, ups=0.09, wpb=64894, bsz=128, num_updates=9866, lr=9.99291e-05, gnorm=2.1, loss_scale=2, train_wall=11, gb_free=2.8, wall=113522 2021-06-20 02:10:58 | INFO | train_inner | epoch 004: 921 / 3002 loss=2.497, ppl=5.65, wps=5857.8, ups=0.09, wpb=64815, bsz=128, num_updates=9867, lr=9.99291e-05, gnorm=2.732, loss_scale=2, train_wall=11, gb_free=2.8, wall=113533 2021-06-20 02:11:10 | INFO | train_inner | epoch 004: 922 / 3002 loss=2.502, ppl=5.66, wps=5864.1, ups=0.09, wpb=64883, bsz=128, num_updates=9868, lr=9.99291e-05, gnorm=2.139, loss_scale=2, train_wall=11, gb_free=2.8, wall=113544 2021-06-20 02:11:21 | INFO | train_inner | epoch 004: 923 / 3002 loss=2.612, ppl=6.11, wps=5754.8, ups=0.09, wpb=64907, bsz=128, num_updates=9869, lr=9.9929e-05, gnorm=2.339, loss_scale=2, train_wall=11, gb_free=2.8, wall=113555 2021-06-20 02:11:32 | INFO | train_inner | epoch 004: 924 / 3002 loss=2.778, ppl=6.86, wps=5867.1, ups=0.09, wpb=64866, bsz=128, num_updates=9870, lr=9.9929e-05, gnorm=2.554, loss_scale=2, train_wall=11, gb_free=2.8, wall=113566 2021-06-20 02:11:43 | INFO | train_inner | epoch 004: 925 / 3002 loss=2.738, ppl=6.67, wps=5823.3, ups=0.09, wpb=64833, bsz=128, num_updates=9871, lr=9.9929e-05, gnorm=2.414, loss_scale=2, train_wall=11, gb_free=2.8, wall=113577 2021-06-20 02:11:54 | INFO | train_inner | epoch 004: 926 / 3002 loss=2.506, ppl=5.68, wps=5800, ups=0.09, wpb=64861, bsz=128, num_updates=9872, lr=9.9929e-05, gnorm=2.064, loss_scale=2, train_wall=11, gb_free=2.8, wall=113589 2021-06-20 02:12:05 | INFO | train_inner | epoch 004: 927 / 3002 loss=2.535, ppl=5.8, wps=5807.6, ups=0.09, wpb=64893, bsz=128, num_updates=9873, lr=9.9929e-05, gnorm=4.601, loss_scale=2, train_wall=11, gb_free=2.8, wall=113600 2021-06-20 02:12:16 | INFO | train_inner | epoch 004: 928 / 3002 loss=2.659, ppl=6.32, wps=5893.1, ups=0.09, wpb=64785, bsz=128, num_updates=9874, lr=9.9929e-05, gnorm=9.52, loss_scale=2, train_wall=11, gb_free=2.8, wall=113611 2021-06-20 02:12:27 | INFO | train_inner | epoch 004: 929 / 3002 loss=2.625, ppl=6.17, wps=5992.6, ups=0.09, wpb=64934, bsz=128, num_updates=9875, lr=9.9929e-05, gnorm=2.41, loss_scale=2, train_wall=10, gb_free=2.8, wall=113622 2021-06-20 02:12:38 | INFO | train_inner | epoch 004: 930 / 3002 loss=2.683, ppl=6.42, wps=5953.2, ups=0.09, wpb=64780, bsz=128, num_updates=9876, lr=9.9929e-05, gnorm=2.549, loss_scale=2, train_wall=10, gb_free=2.8, wall=113632 2021-06-20 02:12:49 | INFO | train_inner | epoch 004: 931 / 3002 loss=2.8, ppl=6.97, wps=5864.2, ups=0.09, wpb=64765, bsz=128, num_updates=9877, lr=9.9929e-05, gnorm=4.022, loss_scale=2, train_wall=11, gb_free=2.8, wall=113643 2021-06-20 02:13:00 | INFO | train_inner | epoch 004: 932 / 3002 loss=2.609, ppl=6.1, wps=5763, ups=0.09, wpb=64820, bsz=128, num_updates=9878, lr=9.9929e-05, gnorm=2.267, loss_scale=2, train_wall=11, gb_free=2.8, wall=113655 2021-06-20 02:13:12 | INFO | train_inner | epoch 004: 933 / 3002 loss=2.558, ppl=5.89, wps=5791.2, ups=0.09, wpb=64842, bsz=128, num_updates=9879, lr=9.9929e-05, gnorm=2.224, loss_scale=2, train_wall=11, gb_free=2.8, wall=113666 2021-06-20 02:13:23 | INFO | train_inner | epoch 004: 934 / 3002 loss=2.437, ppl=5.42, wps=5869, ups=0.09, wpb=64841, bsz=128, num_updates=9880, lr=9.9929e-05, gnorm=2.708, loss_scale=2, train_wall=11, gb_free=2.8, wall=113677 2021-06-20 02:13:34 | INFO | train_inner | epoch 004: 935 / 3002 loss=2.557, ppl=5.88, wps=5781.2, ups=0.09, wpb=64787, bsz=128, num_updates=9881, lr=9.99289e-05, gnorm=2.219, loss_scale=2, train_wall=11, gb_free=2.8, wall=113688 2021-06-20 02:13:45 | INFO | train_inner | epoch 004: 936 / 3002 loss=2.575, ppl=5.96, wps=5836.1, ups=0.09, wpb=64766, bsz=128, num_updates=9882, lr=9.99289e-05, gnorm=2.46, loss_scale=2, train_wall=11, gb_free=2.8, wall=113699 2021-06-20 02:13:56 | INFO | train_inner | epoch 004: 937 / 3002 loss=2.654, ppl=6.3, wps=5920.8, ups=0.09, wpb=64794, bsz=128, num_updates=9883, lr=9.99289e-05, gnorm=2.262, loss_scale=2, train_wall=10, gb_free=2.8, wall=113710 2021-06-20 02:14:07 | INFO | train_inner | epoch 004: 938 / 3002 loss=2.546, ppl=5.84, wps=5823.7, ups=0.09, wpb=64866, bsz=128, num_updates=9884, lr=9.99289e-05, gnorm=2.364, loss_scale=2, train_wall=11, gb_free=2.8, wall=113721 2021-06-20 02:14:18 | INFO | train_inner | epoch 004: 939 / 3002 loss=2.692, ppl=6.46, wps=5889.2, ups=0.09, wpb=64828, bsz=128, num_updates=9885, lr=9.99289e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=113732 2021-06-20 02:14:29 | INFO | train_inner | epoch 004: 940 / 3002 loss=2.671, ppl=6.37, wps=5857.2, ups=0.09, wpb=64837, bsz=128, num_updates=9886, lr=9.99289e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=113743 2021-06-20 02:14:40 | INFO | train_inner | epoch 004: 941 / 3002 loss=2.661, ppl=6.33, wps=5854.7, ups=0.09, wpb=64672, bsz=128, num_updates=9887, lr=9.99289e-05, gnorm=6.406, loss_scale=2, train_wall=11, gb_free=2.8, wall=113754 2021-06-20 02:14:51 | INFO | train_inner | epoch 004: 942 / 3002 loss=2.731, ppl=6.64, wps=5826.7, ups=0.09, wpb=64809, bsz=128, num_updates=9888, lr=9.99289e-05, gnorm=2.311, loss_scale=2, train_wall=11, gb_free=2.8, wall=113766 2021-06-20 02:15:02 | INFO | train_inner | epoch 004: 943 / 3002 loss=2.625, ppl=6.17, wps=5833.1, ups=0.09, wpb=64809, bsz=128, num_updates=9889, lr=9.99289e-05, gnorm=2.033, loss_scale=2, train_wall=11, gb_free=2.8, wall=113777 2021-06-20 02:15:13 | INFO | train_inner | epoch 004: 944 / 3002 loss=2.605, ppl=6.08, wps=5813.9, ups=0.09, wpb=64795, bsz=128, num_updates=9890, lr=9.99289e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=113788 2021-06-20 02:15:25 | INFO | train_inner | epoch 004: 945 / 3002 loss=2.675, ppl=6.39, wps=5835.9, ups=0.09, wpb=64857, bsz=128, num_updates=9891, lr=9.99289e-05, gnorm=2.19, loss_scale=2, train_wall=11, gb_free=2.8, wall=113799 2021-06-20 02:15:36 | INFO | train_inner | epoch 004: 946 / 3002 loss=2.523, ppl=5.75, wps=5892.1, ups=0.09, wpb=64909, bsz=128, num_updates=9892, lr=9.99289e-05, gnorm=4.753, loss_scale=2, train_wall=11, gb_free=2.8, wall=113810 2021-06-20 02:15:47 | INFO | train_inner | epoch 004: 947 / 3002 loss=2.524, ppl=5.75, wps=5896.4, ups=0.09, wpb=64878, bsz=128, num_updates=9893, lr=9.99289e-05, gnorm=2.561, loss_scale=2, train_wall=11, gb_free=2.8, wall=113821 2021-06-20 02:15:58 | INFO | train_inner | epoch 004: 948 / 3002 loss=2.48, ppl=5.58, wps=5769.5, ups=0.09, wpb=64858, bsz=128, num_updates=9894, lr=9.99288e-05, gnorm=2.203, loss_scale=2, train_wall=11, gb_free=2.8, wall=113832 2021-06-20 02:16:09 | INFO | train_inner | epoch 004: 949 / 3002 loss=2.5, ppl=5.66, wps=5864.3, ups=0.09, wpb=64887, bsz=128, num_updates=9895, lr=9.99288e-05, gnorm=2.304, loss_scale=2, train_wall=11, gb_free=2.8, wall=113843 2021-06-20 02:16:20 | INFO | train_inner | epoch 004: 950 / 3002 loss=2.715, ppl=6.57, wps=5852.9, ups=0.09, wpb=64794, bsz=128, num_updates=9896, lr=9.99288e-05, gnorm=2.952, loss_scale=2, train_wall=11, gb_free=2.8, wall=113854 2021-06-20 02:16:31 | INFO | train_inner | epoch 004: 951 / 3002 loss=2.531, ppl=5.78, wps=5823.8, ups=0.09, wpb=64938, bsz=128, num_updates=9897, lr=9.99288e-05, gnorm=2.315, loss_scale=2, train_wall=11, gb_free=2.8, wall=113865 2021-06-20 02:16:42 | INFO | train_inner | epoch 004: 952 / 3002 loss=2.592, ppl=6.03, wps=5992.1, ups=0.09, wpb=64900, bsz=128, num_updates=9898, lr=9.99288e-05, gnorm=2.061, loss_scale=2, train_wall=10, gb_free=2.8, wall=113876 2021-06-20 02:16:53 | INFO | train_inner | epoch 004: 953 / 3002 loss=2.566, ppl=5.92, wps=5729, ups=0.09, wpb=64840, bsz=128, num_updates=9899, lr=9.99288e-05, gnorm=2.039, loss_scale=2, train_wall=11, gb_free=2.8, wall=113888 2021-06-20 02:17:04 | INFO | train_inner | epoch 004: 954 / 3002 loss=2.629, ppl=6.19, wps=5805.4, ups=0.09, wpb=64755, bsz=128, num_updates=9900, lr=9.99288e-05, gnorm=2.864, loss_scale=2, train_wall=11, gb_free=2.8, wall=113899 2021-06-20 02:17:16 | INFO | train_inner | epoch 004: 955 / 3002 loss=2.635, ppl=6.21, wps=5848.1, ups=0.09, wpb=64887, bsz=128, num_updates=9901, lr=9.99288e-05, gnorm=2.338, loss_scale=2, train_wall=11, gb_free=2.8, wall=113910 2021-06-20 02:17:27 | INFO | train_inner | epoch 004: 956 / 3002 loss=2.42, ppl=5.35, wps=5813.3, ups=0.09, wpb=64911, bsz=128, num_updates=9902, lr=9.99288e-05, gnorm=2.04, loss_scale=2, train_wall=11, gb_free=2.8, wall=113921 2021-06-20 02:17:38 | INFO | train_inner | epoch 004: 957 / 3002 loss=2.639, ppl=6.23, wps=5832.8, ups=0.09, wpb=64733, bsz=128, num_updates=9903, lr=9.99288e-05, gnorm=2.126, loss_scale=4, train_wall=11, gb_free=2.8, wall=113932 2021-06-20 02:17:49 | INFO | train_inner | epoch 004: 958 / 3002 loss=2.729, ppl=6.63, wps=5773.7, ups=0.09, wpb=64827, bsz=128, num_updates=9904, lr=9.99288e-05, gnorm=2.203, loss_scale=4, train_wall=11, gb_free=2.8, wall=113943 2021-06-20 02:18:00 | INFO | train_inner | epoch 004: 959 / 3002 loss=2.48, ppl=5.58, wps=5865.2, ups=0.09, wpb=64832, bsz=128, num_updates=9905, lr=9.99288e-05, gnorm=2.01, loss_scale=4, train_wall=11, gb_free=2.8, wall=113954 2021-06-20 02:18:11 | INFO | train_inner | epoch 004: 960 / 3002 loss=2.631, ppl=6.2, wps=5947.9, ups=0.09, wpb=64826, bsz=128, num_updates=9906, lr=9.99287e-05, gnorm=2.021, loss_scale=4, train_wall=10, gb_free=2.8, wall=113965 2021-06-20 02:18:22 | INFO | train_inner | epoch 004: 961 / 3002 loss=2.494, ppl=5.63, wps=5759, ups=0.09, wpb=64844, bsz=128, num_updates=9907, lr=9.99287e-05, gnorm=2.039, loss_scale=4, train_wall=11, gb_free=2.8, wall=113977 2021-06-20 02:18:33 | INFO | train_inner | epoch 004: 962 / 3002 loss=2.509, ppl=5.69, wps=5817.3, ups=0.09, wpb=64858, bsz=128, num_updates=9908, lr=9.99287e-05, gnorm=2.077, loss_scale=4, train_wall=11, gb_free=2.8, wall=113988 2021-06-20 02:18:44 | INFO | train_inner | epoch 004: 963 / 3002 loss=2.521, ppl=5.74, wps=5856.8, ups=0.09, wpb=64802, bsz=128, num_updates=9909, lr=9.99287e-05, gnorm=2.042, loss_scale=4, train_wall=11, gb_free=2.8, wall=113999 2021-06-20 02:18:56 | INFO | train_inner | epoch 004: 964 / 3002 loss=2.511, ppl=5.7, wps=5798.4, ups=0.09, wpb=64792, bsz=128, num_updates=9910, lr=9.99287e-05, gnorm=2.619, loss_scale=4, train_wall=11, gb_free=2.8, wall=114010 2021-06-20 02:19:07 | INFO | train_inner | epoch 004: 965 / 3002 loss=2.543, ppl=5.83, wps=5810.9, ups=0.09, wpb=64852, bsz=128, num_updates=9911, lr=9.99287e-05, gnorm=2.103, loss_scale=4, train_wall=11, gb_free=2.8, wall=114021 2021-06-20 02:19:18 | INFO | train_inner | epoch 004: 966 / 3002 loss=2.63, ppl=6.19, wps=5885.9, ups=0.09, wpb=64821, bsz=128, num_updates=9912, lr=9.99287e-05, gnorm=2.06, loss_scale=4, train_wall=11, gb_free=2.8, wall=114032 2021-06-20 02:19:29 | INFO | train_inner | epoch 004: 967 / 3002 loss=2.439, ppl=5.42, wps=5660.3, ups=0.09, wpb=64782, bsz=128, num_updates=9913, lr=9.99287e-05, gnorm=2.17, loss_scale=4, train_wall=11, gb_free=2.8, wall=114044 2021-06-20 02:19:40 | INFO | train_inner | epoch 004: 968 / 3002 loss=2.572, ppl=5.95, wps=5810.9, ups=0.09, wpb=64816, bsz=128, num_updates=9914, lr=9.99287e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=114055 2021-06-20 02:19:51 | INFO | train_inner | epoch 004: 969 / 3002 loss=2.679, ppl=6.4, wps=6000.1, ups=0.09, wpb=64836, bsz=128, num_updates=9915, lr=9.99287e-05, gnorm=2.074, loss_scale=4, train_wall=10, gb_free=2.8, wall=114066 2021-06-20 02:20:02 | INFO | train_inner | epoch 004: 970 / 3002 loss=2.599, ppl=6.06, wps=5769.6, ups=0.09, wpb=64846, bsz=128, num_updates=9916, lr=9.99287e-05, gnorm=2.092, loss_scale=4, train_wall=11, gb_free=2.8, wall=114077 2021-06-20 02:20:14 | INFO | train_inner | epoch 004: 971 / 3002 loss=2.608, ppl=6.09, wps=5764.3, ups=0.09, wpb=64786, bsz=128, num_updates=9917, lr=9.99287e-05, gnorm=4.07, loss_scale=4, train_wall=11, gb_free=2.8, wall=114088 2021-06-20 02:20:25 | INFO | train_inner | epoch 004: 972 / 3002 loss=2.595, ppl=6.04, wps=5842.6, ups=0.09, wpb=64796, bsz=128, num_updates=9918, lr=9.99287e-05, gnorm=2.077, loss_scale=4, train_wall=11, gb_free=2.8, wall=114099 2021-06-20 02:20:36 | INFO | train_inner | epoch 004: 973 / 3002 loss=2.648, ppl=6.27, wps=5894.8, ups=0.09, wpb=64786, bsz=128, num_updates=9919, lr=9.99286e-05, gnorm=1.984, loss_scale=4, train_wall=11, gb_free=2.8, wall=114110 2021-06-20 02:20:47 | INFO | train_inner | epoch 004: 974 / 3002 loss=2.447, ppl=5.45, wps=5819.7, ups=0.09, wpb=64819, bsz=128, num_updates=9920, lr=9.99286e-05, gnorm=2.035, loss_scale=4, train_wall=11, gb_free=2.8, wall=114121 2021-06-20 02:20:58 | INFO | train_inner | epoch 004: 975 / 3002 loss=2.6, ppl=6.06, wps=5910.9, ups=0.09, wpb=64877, bsz=128, num_updates=9921, lr=9.99286e-05, gnorm=2.131, loss_scale=4, train_wall=11, gb_free=2.8, wall=114132 2021-06-20 02:21:09 | INFO | train_inner | epoch 004: 976 / 3002 loss=2.647, ppl=6.26, wps=5855.2, ups=0.09, wpb=64738, bsz=128, num_updates=9922, lr=9.99286e-05, gnorm=2.021, loss_scale=4, train_wall=11, gb_free=2.8, wall=114143 2021-06-20 02:21:20 | INFO | train_inner | epoch 004: 977 / 3002 loss=2.7, ppl=6.5, wps=5863.9, ups=0.09, wpb=64872, bsz=128, num_updates=9923, lr=9.99286e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=114154 2021-06-20 02:21:31 | INFO | train_inner | epoch 004: 978 / 3002 loss=2.604, ppl=6.08, wps=5785.2, ups=0.09, wpb=64883, bsz=128, num_updates=9924, lr=9.99286e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=114166 2021-06-20 02:21:43 | INFO | train_inner | epoch 004: 979 / 3002 loss=2.462, ppl=5.51, wps=5671.5, ups=0.09, wpb=64786, bsz=128, num_updates=9925, lr=9.99286e-05, gnorm=4.289, loss_scale=4, train_wall=11, gb_free=2.8, wall=114177 2021-06-20 02:21:53 | INFO | train_inner | epoch 004: 980 / 3002 loss=2.48, ppl=5.58, wps=5990, ups=0.09, wpb=64860, bsz=128, num_updates=9926, lr=9.99286e-05, gnorm=2.163, loss_scale=4, train_wall=10, gb_free=2.8, wall=114188 2021-06-20 02:22:04 | INFO | train_inner | epoch 004: 981 / 3002 loss=2.49, ppl=5.62, wps=5975, ups=0.09, wpb=64861, bsz=128, num_updates=9927, lr=9.99286e-05, gnorm=2.138, loss_scale=4, train_wall=10, gb_free=2.8, wall=114199 2021-06-20 02:22:16 | INFO | train_inner | epoch 004: 982 / 3002 loss=2.416, ppl=5.34, wps=5767.6, ups=0.09, wpb=64837, bsz=128, num_updates=9928, lr=9.99286e-05, gnorm=2.084, loss_scale=4, train_wall=11, gb_free=2.8, wall=114210 2021-06-20 02:22:27 | INFO | train_inner | epoch 004: 983 / 3002 loss=2.467, ppl=5.53, wps=5694.9, ups=0.09, wpb=64813, bsz=128, num_updates=9929, lr=9.99286e-05, gnorm=2.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=114221 2021-06-20 02:22:38 | INFO | train_inner | epoch 004: 984 / 3002 loss=2.592, ppl=6.03, wps=5942.7, ups=0.09, wpb=64823, bsz=128, num_updates=9930, lr=9.99286e-05, gnorm=2.133, loss_scale=4, train_wall=10, gb_free=2.8, wall=114232 2021-06-20 02:22:49 | INFO | train_inner | epoch 004: 985 / 3002 loss=2.635, ppl=6.21, wps=5885.4, ups=0.09, wpb=64875, bsz=128, num_updates=9931, lr=9.99285e-05, gnorm=1.997, loss_scale=4, train_wall=11, gb_free=2.8, wall=114243 2021-06-20 02:23:00 | INFO | train_inner | epoch 004: 986 / 3002 loss=2.659, ppl=6.32, wps=5925.9, ups=0.09, wpb=64789, bsz=128, num_updates=9932, lr=9.99285e-05, gnorm=2.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=114254 2021-06-20 02:23:11 | INFO | train_inner | epoch 004: 987 / 3002 loss=2.569, ppl=5.93, wps=5861.4, ups=0.09, wpb=64843, bsz=128, num_updates=9933, lr=9.99285e-05, gnorm=2.134, loss_scale=4, train_wall=11, gb_free=2.8, wall=114265 2021-06-20 02:23:22 | INFO | train_inner | epoch 004: 988 / 3002 loss=2.873, ppl=7.33, wps=5908.5, ups=0.09, wpb=64806, bsz=128, num_updates=9934, lr=9.99285e-05, gnorm=2.028, loss_scale=4, train_wall=10, gb_free=2.8, wall=114276 2021-06-20 02:23:33 | INFO | train_inner | epoch 004: 989 / 3002 loss=2.562, ppl=5.9, wps=5833.4, ups=0.09, wpb=64827, bsz=128, num_updates=9935, lr=9.99285e-05, gnorm=2.055, loss_scale=4, train_wall=11, gb_free=2.8, wall=114287 2021-06-20 02:23:44 | INFO | train_inner | epoch 004: 990 / 3002 loss=2.564, ppl=5.91, wps=5828.6, ups=0.09, wpb=64773, bsz=128, num_updates=9936, lr=9.99285e-05, gnorm=4.284, loss_scale=4, train_wall=11, gb_free=2.8, wall=114298 2021-06-20 02:23:55 | INFO | train_inner | epoch 004: 991 / 3002 loss=2.591, ppl=6.02, wps=5900.5, ups=0.09, wpb=64728, bsz=128, num_updates=9937, lr=9.99285e-05, gnorm=2.032, loss_scale=4, train_wall=11, gb_free=2.8, wall=114309 2021-06-20 02:24:06 | INFO | train_inner | epoch 004: 992 / 3002 loss=2.513, ppl=5.71, wps=5807.1, ups=0.09, wpb=64841, bsz=128, num_updates=9938, lr=9.99285e-05, gnorm=2.069, loss_scale=4, train_wall=11, gb_free=2.8, wall=114321 2021-06-20 02:24:17 | INFO | train_inner | epoch 004: 993 / 3002 loss=2.483, ppl=5.59, wps=5986.3, ups=0.09, wpb=64846, bsz=128, num_updates=9939, lr=9.99285e-05, gnorm=3.215, loss_scale=4, train_wall=10, gb_free=2.8, wall=114331 2021-06-20 02:24:28 | INFO | train_inner | epoch 004: 994 / 3002 loss=2.531, ppl=5.78, wps=5832.8, ups=0.09, wpb=64822, bsz=128, num_updates=9940, lr=9.99285e-05, gnorm=2.056, loss_scale=4, train_wall=11, gb_free=2.8, wall=114343 2021-06-20 02:24:39 | INFO | train_inner | epoch 004: 995 / 3002 loss=2.593, ppl=6.03, wps=5847.8, ups=0.09, wpb=64853, bsz=128, num_updates=9941, lr=9.99285e-05, gnorm=2.506, loss_scale=4, train_wall=11, gb_free=2.8, wall=114354 2021-06-20 02:24:50 | INFO | train_inner | epoch 004: 996 / 3002 loss=2.567, ppl=5.93, wps=5800, ups=0.09, wpb=64875, bsz=128, num_updates=9942, lr=9.99285e-05, gnorm=2.067, loss_scale=4, train_wall=11, gb_free=2.8, wall=114365 2021-06-20 02:25:02 | INFO | train_inner | epoch 004: 997 / 3002 loss=2.506, ppl=5.68, wps=5804.3, ups=0.09, wpb=64800, bsz=128, num_updates=9943, lr=9.99285e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=114376 2021-06-20 02:25:13 | INFO | train_inner | epoch 004: 998 / 3002 loss=2.581, ppl=5.98, wps=5799.6, ups=0.09, wpb=64880, bsz=128, num_updates=9944, lr=9.99284e-05, gnorm=2.108, loss_scale=4, train_wall=11, gb_free=2.8, wall=114387 2021-06-20 02:25:24 | INFO | train_inner | epoch 004: 999 / 3002 loss=2.596, ppl=6.05, wps=5864.3, ups=0.09, wpb=64804, bsz=128, num_updates=9945, lr=9.99284e-05, gnorm=2.062, loss_scale=4, train_wall=11, gb_free=2.8, wall=114398 2021-06-20 02:25:35 | INFO | train_inner | epoch 004: 1000 / 3002 loss=2.575, ppl=5.96, wps=5887.9, ups=0.09, wpb=64887, bsz=128, num_updates=9946, lr=9.99284e-05, gnorm=2.191, loss_scale=4, train_wall=11, gb_free=2.8, wall=114409 2021-06-20 02:25:46 | INFO | train_inner | epoch 004: 1001 / 3002 loss=2.479, ppl=5.58, wps=5848.6, ups=0.09, wpb=64864, bsz=128, num_updates=9947, lr=9.99284e-05, gnorm=2.053, loss_scale=4, train_wall=11, gb_free=2.8, wall=114420 2021-06-20 02:25:57 | INFO | train_inner | epoch 004: 1002 / 3002 loss=2.518, ppl=5.73, wps=5831.8, ups=0.09, wpb=64853, bsz=128, num_updates=9948, lr=9.99284e-05, gnorm=2.052, loss_scale=4, train_wall=11, gb_free=2.8, wall=114431 2021-06-20 02:26:08 | INFO | train_inner | epoch 004: 1003 / 3002 loss=2.578, ppl=5.97, wps=5984.6, ups=0.09, wpb=64902, bsz=128, num_updates=9949, lr=9.99284e-05, gnorm=2.095, loss_scale=4, train_wall=10, gb_free=2.8, wall=114442 2021-06-20 02:26:19 | INFO | train_inner | epoch 004: 1004 / 3002 loss=2.638, ppl=6.22, wps=5903.2, ups=0.09, wpb=64810, bsz=128, num_updates=9950, lr=9.99284e-05, gnorm=2.089, loss_scale=4, train_wall=11, gb_free=2.8, wall=114453 2021-06-20 02:26:30 | INFO | train_inner | epoch 004: 1005 / 3002 loss=2.437, ppl=5.42, wps=6001.3, ups=0.09, wpb=64969, bsz=128, num_updates=9951, lr=9.99284e-05, gnorm=2.015, loss_scale=4, train_wall=10, gb_free=2.8, wall=114464 2021-06-20 02:26:41 | INFO | train_inner | epoch 004: 1006 / 3002 loss=2.685, ppl=6.43, wps=5868.6, ups=0.09, wpb=64803, bsz=128, num_updates=9952, lr=9.99284e-05, gnorm=3.035, loss_scale=4, train_wall=11, gb_free=2.8, wall=114475 2021-06-20 02:26:52 | INFO | train_inner | epoch 004: 1007 / 3002 loss=2.481, ppl=5.58, wps=5860.5, ups=0.09, wpb=64857, bsz=128, num_updates=9953, lr=9.99284e-05, gnorm=2.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=114486 2021-06-20 02:27:03 | INFO | train_inner | epoch 004: 1008 / 3002 loss=2.657, ppl=6.31, wps=5813.7, ups=0.09, wpb=64811, bsz=128, num_updates=9954, lr=9.99284e-05, gnorm=2.112, loss_scale=4, train_wall=11, gb_free=2.8, wall=114497 2021-06-20 02:27:14 | INFO | train_inner | epoch 004: 1009 / 3002 loss=2.583, ppl=5.99, wps=5855.2, ups=0.09, wpb=64839, bsz=128, num_updates=9955, lr=9.99284e-05, gnorm=2.055, loss_scale=4, train_wall=11, gb_free=2.8, wall=114508 2021-06-20 02:27:25 | INFO | train_inner | epoch 004: 1010 / 3002 loss=2.587, ppl=6.01, wps=5848.1, ups=0.09, wpb=64886, bsz=128, num_updates=9956, lr=9.99283e-05, gnorm=2.203, loss_scale=4, train_wall=11, gb_free=2.8, wall=114520 2021-06-20 02:27:36 | INFO | train_inner | epoch 004: 1011 / 3002 loss=2.539, ppl=5.81, wps=5850.9, ups=0.09, wpb=64817, bsz=128, num_updates=9957, lr=9.99283e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=114531 2021-06-20 02:27:47 | INFO | train_inner | epoch 004: 1012 / 3002 loss=2.543, ppl=5.83, wps=5888.5, ups=0.09, wpb=64825, bsz=128, num_updates=9958, lr=9.99283e-05, gnorm=2.02, loss_scale=4, train_wall=11, gb_free=2.8, wall=114542 2021-06-20 02:27:58 | INFO | train_inner | epoch 004: 1013 / 3002 loss=2.558, ppl=5.89, wps=5840, ups=0.09, wpb=64866, bsz=128, num_updates=9959, lr=9.99283e-05, gnorm=2.114, loss_scale=4, train_wall=11, gb_free=2.8, wall=114553 2021-06-20 02:28:09 | INFO | train_inner | epoch 004: 1014 / 3002 loss=2.478, ppl=5.57, wps=5915.2, ups=0.09, wpb=64865, bsz=128, num_updates=9960, lr=9.99283e-05, gnorm=2.086, loss_scale=4, train_wall=11, gb_free=2.8, wall=114564 2021-06-20 02:28:20 | INFO | train_inner | epoch 004: 1015 / 3002 loss=2.477, ppl=5.57, wps=5848.3, ups=0.09, wpb=64801, bsz=128, num_updates=9961, lr=9.99283e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=114575 2021-06-20 02:28:31 | INFO | train_inner | epoch 004: 1016 / 3002 loss=2.611, ppl=6.11, wps=5996.8, ups=0.09, wpb=64819, bsz=128, num_updates=9962, lr=9.99283e-05, gnorm=2.004, loss_scale=4, train_wall=10, gb_free=2.8, wall=114586 2021-06-20 02:28:42 | INFO | train_inner | epoch 004: 1017 / 3002 loss=2.546, ppl=5.84, wps=5825.5, ups=0.09, wpb=64860, bsz=128, num_updates=9963, lr=9.99283e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=114597 2021-06-20 02:28:53 | INFO | train_inner | epoch 004: 1018 / 3002 loss=2.441, ppl=5.43, wps=5906.8, ups=0.09, wpb=64831, bsz=128, num_updates=9964, lr=9.99283e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=114608 2021-06-20 02:29:05 | INFO | train_inner | epoch 004: 1019 / 3002 loss=2.657, ppl=6.31, wps=5780.2, ups=0.09, wpb=64825, bsz=128, num_updates=9965, lr=9.99283e-05, gnorm=2.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=114619 2021-06-20 02:29:16 | INFO | train_inner | epoch 004: 1020 / 3002 loss=2.548, ppl=5.85, wps=5808.3, ups=0.09, wpb=64872, bsz=128, num_updates=9966, lr=9.99283e-05, gnorm=1.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=114630 2021-06-20 02:29:27 | INFO | train_inner | epoch 004: 1021 / 3002 loss=2.573, ppl=5.95, wps=5849.7, ups=0.09, wpb=64844, bsz=128, num_updates=9967, lr=9.99283e-05, gnorm=13.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=114641 2021-06-20 02:29:38 | INFO | train_inner | epoch 004: 1022 / 3002 loss=2.593, ppl=6.03, wps=5925.8, ups=0.09, wpb=64835, bsz=128, num_updates=9968, lr=9.99283e-05, gnorm=1.997, loss_scale=4, train_wall=10, gb_free=2.8, wall=114652 2021-06-20 02:29:49 | INFO | train_inner | epoch 004: 1023 / 3002 loss=2.586, ppl=6.01, wps=5956.1, ups=0.09, wpb=64796, bsz=128, num_updates=9969, lr=9.99282e-05, gnorm=3.898, loss_scale=4, train_wall=10, gb_free=2.8, wall=114663 2021-06-20 02:30:00 | INFO | train_inner | epoch 004: 1024 / 3002 loss=2.692, ppl=6.46, wps=5880.8, ups=0.09, wpb=64763, bsz=128, num_updates=9970, lr=9.99282e-05, gnorm=2.28, loss_scale=4, train_wall=11, gb_free=2.8, wall=114674 2021-06-20 02:30:11 | INFO | train_inner | epoch 004: 1025 / 3002 loss=2.594, ppl=6.04, wps=5892.3, ups=0.09, wpb=64863, bsz=128, num_updates=9971, lr=9.99282e-05, gnorm=1.933, loss_scale=4, train_wall=11, gb_free=2.8, wall=114685 2021-06-20 02:30:22 | INFO | train_inner | epoch 004: 1026 / 3002 loss=2.743, ppl=6.69, wps=5872.7, ups=0.09, wpb=64824, bsz=128, num_updates=9972, lr=9.99282e-05, gnorm=2.095, loss_scale=4, train_wall=11, gb_free=2.8, wall=114696 2021-06-20 02:30:33 | INFO | train_inner | epoch 004: 1027 / 3002 loss=2.439, ppl=5.42, wps=5862.1, ups=0.09, wpb=64872, bsz=128, num_updates=9973, lr=9.99282e-05, gnorm=2.266, loss_scale=4, train_wall=11, gb_free=2.8, wall=114707 2021-06-20 02:30:44 | INFO | train_inner | epoch 004: 1028 / 3002 loss=2.625, ppl=6.17, wps=5784.6, ups=0.09, wpb=64796, bsz=128, num_updates=9974, lr=9.99282e-05, gnorm=2.036, loss_scale=4, train_wall=11, gb_free=2.8, wall=114718 2021-06-20 02:30:55 | INFO | train_inner | epoch 004: 1029 / 3002 loss=2.69, ppl=6.45, wps=5780.5, ups=0.09, wpb=64832, bsz=128, num_updates=9975, lr=9.99282e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=114730 2021-06-20 02:31:06 | INFO | train_inner | epoch 004: 1030 / 3002 loss=2.658, ppl=6.31, wps=5850.5, ups=0.09, wpb=64760, bsz=128, num_updates=9976, lr=9.99282e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=114741 2021-06-20 02:31:17 | INFO | train_inner | epoch 004: 1031 / 3002 loss=2.61, ppl=6.11, wps=5854.5, ups=0.09, wpb=64812, bsz=128, num_updates=9977, lr=9.99282e-05, gnorm=2.15, loss_scale=4, train_wall=11, gb_free=2.8, wall=114752 2021-06-20 02:31:28 | INFO | train_inner | epoch 004: 1032 / 3002 loss=2.651, ppl=6.28, wps=5848.5, ups=0.09, wpb=64821, bsz=128, num_updates=9978, lr=9.99282e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=114763 2021-06-20 02:31:39 | INFO | train_inner | epoch 004: 1033 / 3002 loss=2.445, ppl=5.45, wps=5933, ups=0.09, wpb=64874, bsz=128, num_updates=9979, lr=9.99282e-05, gnorm=2.213, loss_scale=4, train_wall=10, gb_free=2.8, wall=114774 2021-06-20 02:31:50 | INFO | train_inner | epoch 004: 1034 / 3002 loss=2.545, ppl=5.84, wps=5919.7, ups=0.09, wpb=64787, bsz=128, num_updates=9980, lr=9.99282e-05, gnorm=2.07, loss_scale=4, train_wall=10, gb_free=2.8, wall=114785 2021-06-20 02:32:02 | INFO | train_inner | epoch 004: 1035 / 3002 loss=2.478, ppl=5.57, wps=5773.6, ups=0.09, wpb=64844, bsz=128, num_updates=9981, lr=9.99281e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=114796 2021-06-20 02:32:13 | INFO | train_inner | epoch 004: 1036 / 3002 loss=2.691, ppl=6.46, wps=5866, ups=0.09, wpb=64862, bsz=128, num_updates=9982, lr=9.99281e-05, gnorm=2.068, loss_scale=4, train_wall=11, gb_free=2.8, wall=114807 2021-06-20 02:32:24 | INFO | train_inner | epoch 004: 1037 / 3002 loss=2.579, ppl=5.98, wps=5825.6, ups=0.09, wpb=64864, bsz=128, num_updates=9983, lr=9.99281e-05, gnorm=2.139, loss_scale=4, train_wall=11, gb_free=2.8, wall=114818 2021-06-20 02:32:35 | INFO | train_inner | epoch 004: 1038 / 3002 loss=2.393, ppl=5.25, wps=5888.2, ups=0.09, wpb=64869, bsz=128, num_updates=9984, lr=9.99281e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=114829 2021-06-20 02:32:46 | INFO | train_inner | epoch 004: 1039 / 3002 loss=2.569, ppl=5.93, wps=5879.2, ups=0.09, wpb=64763, bsz=128, num_updates=9985, lr=9.99281e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=114840 2021-06-20 02:32:57 | INFO | train_inner | epoch 004: 1040 / 3002 loss=2.499, ppl=5.65, wps=5842.7, ups=0.09, wpb=64792, bsz=128, num_updates=9986, lr=9.99281e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=114851 2021-06-20 02:33:08 | INFO | train_inner | epoch 004: 1041 / 3002 loss=2.872, ppl=7.32, wps=5889.8, ups=0.09, wpb=64870, bsz=128, num_updates=9987, lr=9.99281e-05, gnorm=2.37, loss_scale=4, train_wall=11, gb_free=2.8, wall=114862 2021-06-20 02:33:19 | INFO | train_inner | epoch 004: 1042 / 3002 loss=2.606, ppl=6.09, wps=5868.7, ups=0.09, wpb=64917, bsz=128, num_updates=9988, lr=9.99281e-05, gnorm=2.218, loss_scale=4, train_wall=11, gb_free=2.8, wall=114873 2021-06-20 02:33:30 | INFO | train_inner | epoch 004: 1043 / 3002 loss=2.692, ppl=6.46, wps=5800.5, ups=0.09, wpb=64762, bsz=128, num_updates=9989, lr=9.99281e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=114884 2021-06-20 02:33:41 | INFO | train_inner | epoch 004: 1044 / 3002 loss=2.51, ppl=5.7, wps=5938.7, ups=0.09, wpb=64858, bsz=128, num_updates=9990, lr=9.99281e-05, gnorm=2.063, loss_scale=4, train_wall=10, gb_free=2.8, wall=114895 2021-06-20 02:33:52 | INFO | train_inner | epoch 004: 1045 / 3002 loss=2.477, ppl=5.57, wps=5846, ups=0.09, wpb=64832, bsz=128, num_updates=9991, lr=9.99281e-05, gnorm=2.032, loss_scale=4, train_wall=11, gb_free=2.8, wall=114906 2021-06-20 02:34:03 | INFO | train_inner | epoch 004: 1046 / 3002 loss=2.544, ppl=5.83, wps=5963.8, ups=0.09, wpb=64923, bsz=128, num_updates=9992, lr=9.99281e-05, gnorm=2.142, loss_scale=4, train_wall=10, gb_free=2.8, wall=114917 2021-06-20 02:34:14 | INFO | train_inner | epoch 004: 1047 / 3002 loss=2.604, ppl=6.08, wps=5801.9, ups=0.09, wpb=64897, bsz=128, num_updates=9993, lr=9.99281e-05, gnorm=1.976, loss_scale=4, train_wall=11, gb_free=2.8, wall=114928 2021-06-20 02:34:25 | INFO | train_inner | epoch 004: 1048 / 3002 loss=2.475, ppl=5.56, wps=5962.5, ups=0.09, wpb=64876, bsz=128, num_updates=9994, lr=9.9928e-05, gnorm=2.097, loss_scale=4, train_wall=10, gb_free=2.8, wall=114939 2021-06-20 02:34:36 | INFO | train_inner | epoch 004: 1049 / 3002 loss=2.561, ppl=5.9, wps=5800.9, ups=0.09, wpb=64864, bsz=128, num_updates=9995, lr=9.9928e-05, gnorm=2.114, loss_scale=4, train_wall=11, gb_free=2.8, wall=114951 2021-06-20 02:34:47 | INFO | train_inner | epoch 004: 1050 / 3002 loss=2.512, ppl=5.7, wps=5778.3, ups=0.09, wpb=64802, bsz=128, num_updates=9996, lr=9.9928e-05, gnorm=2.324, loss_scale=4, train_wall=11, gb_free=2.8, wall=114962 2021-06-20 02:34:58 | INFO | train_inner | epoch 004: 1051 / 3002 loss=2.568, ppl=5.93, wps=5924.4, ups=0.09, wpb=64843, bsz=128, num_updates=9997, lr=9.9928e-05, gnorm=2.087, loss_scale=4, train_wall=10, gb_free=2.8, wall=114973 2021-06-20 02:35:10 | INFO | train_inner | epoch 004: 1052 / 3002 loss=2.641, ppl=6.24, wps=5794.5, ups=0.09, wpb=64778, bsz=128, num_updates=9998, lr=9.9928e-05, gnorm=2.068, loss_scale=4, train_wall=11, gb_free=2.8, wall=114984 2021-06-20 02:35:21 | INFO | train_inner | epoch 004: 1053 / 3002 loss=2.453, ppl=5.48, wps=5891.8, ups=0.09, wpb=64885, bsz=128, num_updates=9999, lr=9.9928e-05, gnorm=2.044, loss_scale=4, train_wall=11, gb_free=2.8, wall=114995 2021-06-20 02:35:32 | INFO | train_inner | epoch 004: 1054 / 3002 loss=2.654, ppl=6.29, wps=5838.3, ups=0.09, wpb=64829, bsz=128, num_updates=10000, lr=9.9928e-05, gnorm=2.485, loss_scale=4, train_wall=11, gb_free=2.8, wall=115006 2021-06-20 02:35:43 | INFO | train_inner | epoch 004: 1055 / 3002 loss=2.812, ppl=7.02, wps=5823.8, ups=0.09, wpb=64897, bsz=128, num_updates=10001, lr=9.9928e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=115017 2021-06-20 02:35:54 | INFO | train_inner | epoch 004: 1056 / 3002 loss=2.603, ppl=6.07, wps=5796.2, ups=0.09, wpb=64772, bsz=128, num_updates=10002, lr=9.9928e-05, gnorm=2.055, loss_scale=4, train_wall=11, gb_free=2.8, wall=115028 2021-06-20 02:36:05 | INFO | train_inner | epoch 004: 1057 / 3002 loss=2.586, ppl=6.01, wps=5925, ups=0.09, wpb=64864, bsz=128, num_updates=10003, lr=9.9928e-05, gnorm=2.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=115039 2021-06-20 02:36:16 | INFO | train_inner | epoch 004: 1058 / 3002 loss=2.617, ppl=6.13, wps=5864.6, ups=0.09, wpb=64893, bsz=128, num_updates=10004, lr=9.9928e-05, gnorm=2.395, loss_scale=4, train_wall=11, gb_free=2.8, wall=115050 2021-06-20 02:36:27 | INFO | train_inner | epoch 004: 1059 / 3002 loss=2.483, ppl=5.59, wps=5917.7, ups=0.09, wpb=64864, bsz=128, num_updates=10005, lr=9.9928e-05, gnorm=2.052, loss_scale=4, train_wall=11, gb_free=2.8, wall=115061 2021-06-20 02:36:38 | INFO | train_inner | epoch 004: 1060 / 3002 loss=2.531, ppl=5.78, wps=5841.6, ups=0.09, wpb=64900, bsz=128, num_updates=10006, lr=9.99279e-05, gnorm=2.06, loss_scale=4, train_wall=11, gb_free=2.8, wall=115072 2021-06-20 02:36:49 | INFO | train_inner | epoch 004: 1061 / 3002 loss=2.444, ppl=5.44, wps=5946.9, ups=0.09, wpb=64845, bsz=128, num_updates=10007, lr=9.99279e-05, gnorm=2.094, loss_scale=4, train_wall=10, gb_free=2.8, wall=115083 2021-06-20 02:37:00 | INFO | train_inner | epoch 004: 1062 / 3002 loss=2.654, ppl=6.29, wps=5814.2, ups=0.09, wpb=64764, bsz=128, num_updates=10008, lr=9.99279e-05, gnorm=2.116, loss_scale=4, train_wall=11, gb_free=2.8, wall=115094 2021-06-20 02:37:11 | INFO | train_inner | epoch 004: 1063 / 3002 loss=2.604, ppl=6.08, wps=5907.9, ups=0.09, wpb=64859, bsz=128, num_updates=10009, lr=9.99279e-05, gnorm=2.055, loss_scale=4, train_wall=10, gb_free=2.8, wall=115105 2021-06-20 02:37:22 | INFO | train_inner | epoch 004: 1064 / 3002 loss=2.606, ppl=6.09, wps=5680.5, ups=0.09, wpb=64767, bsz=128, num_updates=10010, lr=9.99279e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=115117 2021-06-20 02:37:33 | INFO | train_inner | epoch 004: 1065 / 3002 loss=2.595, ppl=6.04, wps=5915, ups=0.09, wpb=64900, bsz=128, num_updates=10011, lr=9.99279e-05, gnorm=2.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=115128 2021-06-20 02:37:45 | INFO | train_inner | epoch 004: 1066 / 3002 loss=2.614, ppl=6.12, wps=5800.5, ups=0.09, wpb=64799, bsz=128, num_updates=10012, lr=9.99279e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=115139 2021-06-20 02:37:56 | INFO | train_inner | epoch 004: 1067 / 3002 loss=2.485, ppl=5.6, wps=5816.5, ups=0.09, wpb=64842, bsz=128, num_updates=10013, lr=9.99279e-05, gnorm=2.943, loss_scale=4, train_wall=11, gb_free=2.8, wall=115150 2021-06-20 02:38:07 | INFO | train_inner | epoch 004: 1068 / 3002 loss=2.633, ppl=6.2, wps=5859.6, ups=0.09, wpb=64830, bsz=128, num_updates=10014, lr=9.99279e-05, gnorm=2.058, loss_scale=4, train_wall=11, gb_free=2.8, wall=115161 2021-06-20 02:38:18 | INFO | train_inner | epoch 004: 1069 / 3002 loss=2.601, ppl=6.07, wps=5769.9, ups=0.09, wpb=64808, bsz=128, num_updates=10015, lr=9.99279e-05, gnorm=2.088, loss_scale=4, train_wall=11, gb_free=2.8, wall=115172 2021-06-20 02:38:29 | INFO | train_inner | epoch 004: 1070 / 3002 loss=2.581, ppl=5.99, wps=5936.9, ups=0.09, wpb=64859, bsz=128, num_updates=10016, lr=9.99279e-05, gnorm=2.137, loss_scale=4, train_wall=10, gb_free=2.8, wall=115183 2021-06-20 02:38:40 | INFO | train_inner | epoch 004: 1071 / 3002 loss=2.552, ppl=5.87, wps=5820.1, ups=0.09, wpb=64824, bsz=128, num_updates=10017, lr=9.99279e-05, gnorm=2.082, loss_scale=4, train_wall=11, gb_free=2.8, wall=115194 2021-06-20 02:38:51 | INFO | train_inner | epoch 004: 1072 / 3002 loss=2.663, ppl=6.34, wps=5901.8, ups=0.09, wpb=64803, bsz=128, num_updates=10018, lr=9.99279e-05, gnorm=2.74, loss_scale=4, train_wall=10, gb_free=2.8, wall=115205 2021-06-20 02:39:02 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-20 02:39:13 | INFO | train_inner | epoch 004: 1074 / 3002 loss=2.532, ppl=5.79, wps=2932.8, ups=0.05, wpb=64799, bsz=128, num_updates=10019, lr=9.99278e-05, gnorm=2.16, loss_scale=2, train_wall=21, gb_free=2.8, wall=115228 2021-06-20 02:39:24 | INFO | train_inner | epoch 004: 1075 / 3002 loss=2.557, ppl=5.89, wps=5758.7, ups=0.09, wpb=64856, bsz=128, num_updates=10020, lr=9.99278e-05, gnorm=2.285, loss_scale=2, train_wall=11, gb_free=2.8, wall=115239 2021-06-20 02:39:35 | INFO | train_inner | epoch 004: 1076 / 3002 loss=2.525, ppl=5.76, wps=5908.2, ups=0.09, wpb=64838, bsz=128, num_updates=10021, lr=9.99278e-05, gnorm=2.593, loss_scale=2, train_wall=11, gb_free=2.8, wall=115250 2021-06-20 02:39:47 | INFO | train_inner | epoch 004: 1077 / 3002 loss=2.729, ppl=6.63, wps=5841, ups=0.09, wpb=64800, bsz=128, num_updates=10022, lr=9.99278e-05, gnorm=2.043, loss_scale=2, train_wall=11, gb_free=2.8, wall=115261 2021-06-20 02:39:57 | INFO | train_inner | epoch 004: 1078 / 3002 loss=2.572, ppl=5.95, wps=5933.2, ups=0.09, wpb=64835, bsz=128, num_updates=10023, lr=9.99278e-05, gnorm=2.238, loss_scale=2, train_wall=10, gb_free=2.8, wall=115272 2021-06-20 02:40:09 | INFO | train_inner | epoch 004: 1079 / 3002 loss=2.595, ppl=6.04, wps=5754.8, ups=0.09, wpb=64838, bsz=128, num_updates=10024, lr=9.99278e-05, gnorm=6.347, loss_scale=2, train_wall=11, gb_free=2.8, wall=115283 2021-06-20 02:40:20 | INFO | train_inner | epoch 004: 1080 / 3002 loss=2.636, ppl=6.22, wps=5860.7, ups=0.09, wpb=64842, bsz=128, num_updates=10025, lr=9.99278e-05, gnorm=2.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=115294 2021-06-20 02:40:31 | INFO | train_inner | epoch 004: 1081 / 3002 loss=2.647, ppl=6.26, wps=5919.3, ups=0.09, wpb=64749, bsz=128, num_updates=10026, lr=9.99278e-05, gnorm=2.224, loss_scale=2, train_wall=10, gb_free=2.8, wall=115305 2021-06-20 02:40:42 | INFO | train_inner | epoch 004: 1082 / 3002 loss=2.594, ppl=6.04, wps=5789, ups=0.09, wpb=64741, bsz=128, num_updates=10027, lr=9.99278e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=115316 2021-06-20 02:40:53 | INFO | train_inner | epoch 004: 1083 / 3002 loss=2.557, ppl=5.88, wps=5930.5, ups=0.09, wpb=64886, bsz=128, num_updates=10028, lr=9.99278e-05, gnorm=3.197, loss_scale=2, train_wall=10, gb_free=2.8, wall=115327 2021-06-20 02:41:04 | INFO | train_inner | epoch 004: 1084 / 3002 loss=2.688, ppl=6.44, wps=5766.3, ups=0.09, wpb=64793, bsz=128, num_updates=10029, lr=9.99278e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=115338 2021-06-20 02:41:15 | INFO | train_inner | epoch 004: 1085 / 3002 loss=2.484, ppl=5.59, wps=5739.9, ups=0.09, wpb=64746, bsz=128, num_updates=10030, lr=9.99278e-05, gnorm=2.092, loss_scale=2, train_wall=11, gb_free=2.8, wall=115350 2021-06-20 02:41:27 | INFO | train_inner | epoch 004: 1086 / 3002 loss=2.632, ppl=6.2, wps=5765.7, ups=0.09, wpb=64766, bsz=128, num_updates=10031, lr=9.99277e-05, gnorm=2.158, loss_scale=2, train_wall=11, gb_free=2.8, wall=115361 2021-06-20 02:41:38 | INFO | train_inner | epoch 004: 1087 / 3002 loss=2.516, ppl=5.72, wps=5823.9, ups=0.09, wpb=64805, bsz=128, num_updates=10032, lr=9.99277e-05, gnorm=2.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=115372 2021-06-20 02:41:49 | INFO | train_inner | epoch 004: 1088 / 3002 loss=2.753, ppl=6.74, wps=5960.4, ups=0.09, wpb=64828, bsz=128, num_updates=10033, lr=9.99277e-05, gnorm=2.187, loss_scale=2, train_wall=10, gb_free=2.8, wall=115383 2021-06-20 02:42:00 | INFO | train_inner | epoch 004: 1089 / 3002 loss=2.582, ppl=5.99, wps=5938.3, ups=0.09, wpb=64852, bsz=128, num_updates=10034, lr=9.99277e-05, gnorm=2.23, loss_scale=2, train_wall=10, gb_free=2.8, wall=115394 2021-06-20 02:42:11 | INFO | train_inner | epoch 004: 1090 / 3002 loss=2.545, ppl=5.83, wps=5807.4, ups=0.09, wpb=64767, bsz=128, num_updates=10035, lr=9.99277e-05, gnorm=2.256, loss_scale=2, train_wall=11, gb_free=2.8, wall=115405 2021-06-20 02:42:22 | INFO | train_inner | epoch 004: 1091 / 3002 loss=2.568, ppl=5.93, wps=5935.7, ups=0.09, wpb=64781, bsz=128, num_updates=10036, lr=9.99277e-05, gnorm=2.08, loss_scale=2, train_wall=10, gb_free=2.8, wall=115416 2021-06-20 02:42:33 | INFO | train_inner | epoch 004: 1092 / 3002 loss=2.617, ppl=6.13, wps=5853.1, ups=0.09, wpb=64782, bsz=128, num_updates=10037, lr=9.99277e-05, gnorm=5.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=115427 2021-06-20 02:42:44 | INFO | train_inner | epoch 004: 1093 / 3002 loss=2.669, ppl=6.36, wps=5881.3, ups=0.09, wpb=64819, bsz=128, num_updates=10038, lr=9.99277e-05, gnorm=2.146, loss_scale=2, train_wall=11, gb_free=2.8, wall=115438 2021-06-20 02:42:55 | INFO | train_inner | epoch 004: 1094 / 3002 loss=2.533, ppl=5.79, wps=5899.5, ups=0.09, wpb=64853, bsz=128, num_updates=10039, lr=9.99277e-05, gnorm=2.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=115449 2021-06-20 02:43:06 | INFO | train_inner | epoch 004: 1095 / 3002 loss=2.708, ppl=6.53, wps=5722.5, ups=0.09, wpb=64785, bsz=128, num_updates=10040, lr=9.99277e-05, gnorm=2.039, loss_scale=2, train_wall=11, gb_free=2.8, wall=115460 2021-06-20 02:43:17 | INFO | train_inner | epoch 004: 1096 / 3002 loss=2.584, ppl=6, wps=5865.8, ups=0.09, wpb=64784, bsz=128, num_updates=10041, lr=9.99277e-05, gnorm=8.753, loss_scale=2, train_wall=11, gb_free=2.8, wall=115471 2021-06-20 02:43:28 | INFO | train_inner | epoch 004: 1097 / 3002 loss=2.734, ppl=6.65, wps=5919.2, ups=0.09, wpb=64801, bsz=128, num_updates=10042, lr=9.99277e-05, gnorm=2.132, loss_scale=2, train_wall=10, gb_free=2.8, wall=115482 2021-06-20 02:43:39 | INFO | train_inner | epoch 004: 1098 / 3002 loss=2.541, ppl=5.82, wps=5804.4, ups=0.09, wpb=64835, bsz=128, num_updates=10043, lr=9.99277e-05, gnorm=2.083, loss_scale=2, train_wall=11, gb_free=2.8, wall=115494 2021-06-20 02:43:50 | INFO | train_inner | epoch 004: 1099 / 3002 loss=2.623, ppl=6.16, wps=5778.6, ups=0.09, wpb=64818, bsz=128, num_updates=10044, lr=9.99276e-05, gnorm=2.26, loss_scale=2, train_wall=11, gb_free=2.8, wall=115505 2021-06-20 02:44:01 | INFO | train_inner | epoch 004: 1100 / 3002 loss=2.553, ppl=5.87, wps=5842.8, ups=0.09, wpb=64813, bsz=128, num_updates=10045, lr=9.99276e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=115516 2021-06-20 02:44:13 | INFO | train_inner | epoch 004: 1101 / 3002 loss=2.628, ppl=6.18, wps=5783.3, ups=0.09, wpb=64772, bsz=128, num_updates=10046, lr=9.99276e-05, gnorm=2.23, loss_scale=2, train_wall=11, gb_free=2.8, wall=115527 2021-06-20 02:44:24 | INFO | train_inner | epoch 004: 1102 / 3002 loss=2.808, ppl=7, wps=5769.1, ups=0.09, wpb=64840, bsz=128, num_updates=10047, lr=9.99276e-05, gnorm=2.198, loss_scale=2, train_wall=11, gb_free=2.8, wall=115538 2021-06-20 02:44:35 | INFO | train_inner | epoch 004: 1103 / 3002 loss=2.532, ppl=5.78, wps=6001.5, ups=0.09, wpb=64896, bsz=128, num_updates=10048, lr=9.99276e-05, gnorm=2.111, loss_scale=2, train_wall=10, gb_free=2.8, wall=115549 2021-06-20 02:44:46 | INFO | train_inner | epoch 004: 1104 / 3002 loss=2.729, ppl=6.63, wps=5951.7, ups=0.09, wpb=64791, bsz=128, num_updates=10049, lr=9.99276e-05, gnorm=2.176, loss_scale=2, train_wall=10, gb_free=2.8, wall=115560 2021-06-20 02:44:57 | INFO | train_inner | epoch 004: 1105 / 3002 loss=2.491, ppl=5.62, wps=5953.9, ups=0.09, wpb=64900, bsz=128, num_updates=10050, lr=9.99276e-05, gnorm=2.001, loss_scale=2, train_wall=10, gb_free=2.8, wall=115571 2021-06-20 02:45:08 | INFO | train_inner | epoch 004: 1106 / 3002 loss=2.695, ppl=6.48, wps=5738.5, ups=0.09, wpb=64772, bsz=128, num_updates=10051, lr=9.99276e-05, gnorm=2.13, loss_scale=2, train_wall=11, gb_free=2.8, wall=115582 2021-06-20 02:45:19 | INFO | train_inner | epoch 004: 1107 / 3002 loss=2.422, ppl=5.36, wps=5960.6, ups=0.09, wpb=64868, bsz=128, num_updates=10052, lr=9.99276e-05, gnorm=2.318, loss_scale=2, train_wall=10, gb_free=2.8, wall=115593 2021-06-20 02:45:30 | INFO | train_inner | epoch 004: 1108 / 3002 loss=2.68, ppl=6.41, wps=5798.9, ups=0.09, wpb=64782, bsz=128, num_updates=10053, lr=9.99276e-05, gnorm=1.993, loss_scale=2, train_wall=11, gb_free=2.8, wall=115604 2021-06-20 02:45:41 | INFO | train_inner | epoch 004: 1109 / 3002 loss=2.717, ppl=6.58, wps=5783.3, ups=0.09, wpb=64868, bsz=128, num_updates=10054, lr=9.99276e-05, gnorm=2.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=115615 2021-06-20 02:45:52 | INFO | train_inner | epoch 004: 1110 / 3002 loss=2.484, ppl=5.6, wps=5900.9, ups=0.09, wpb=64824, bsz=128, num_updates=10055, lr=9.99276e-05, gnorm=2.342, loss_scale=2, train_wall=11, gb_free=2.8, wall=115626 2021-06-20 02:46:03 | INFO | train_inner | epoch 004: 1111 / 3002 loss=2.657, ppl=6.31, wps=5808.9, ups=0.09, wpb=64820, bsz=128, num_updates=10056, lr=9.99275e-05, gnorm=2.248, loss_scale=2, train_wall=11, gb_free=2.8, wall=115638 2021-06-20 02:46:14 | INFO | train_inner | epoch 004: 1112 / 3002 loss=2.824, ppl=7.08, wps=5786.9, ups=0.09, wpb=64781, bsz=128, num_updates=10057, lr=9.99275e-05, gnorm=2.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=115649 2021-06-20 02:46:26 | INFO | train_inner | epoch 004: 1113 / 3002 loss=2.684, ppl=6.43, wps=5808.1, ups=0.09, wpb=64820, bsz=128, num_updates=10058, lr=9.99275e-05, gnorm=2.175, loss_scale=2, train_wall=11, gb_free=2.8, wall=115660 2021-06-20 02:46:36 | INFO | train_inner | epoch 004: 1114 / 3002 loss=2.731, ppl=6.64, wps=5938.7, ups=0.09, wpb=64760, bsz=128, num_updates=10059, lr=9.99275e-05, gnorm=2.107, loss_scale=2, train_wall=10, gb_free=2.8, wall=115671 2021-06-20 02:46:48 | INFO | train_inner | epoch 004: 1115 / 3002 loss=2.651, ppl=6.28, wps=5881.1, ups=0.09, wpb=64846, bsz=128, num_updates=10060, lr=9.99275e-05, gnorm=2.048, loss_scale=2, train_wall=11, gb_free=2.8, wall=115682 2021-06-20 02:46:59 | INFO | train_inner | epoch 004: 1116 / 3002 loss=2.701, ppl=6.5, wps=5899.1, ups=0.09, wpb=64838, bsz=128, num_updates=10061, lr=9.99275e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=115693 2021-06-20 02:47:10 | INFO | train_inner | epoch 004: 1117 / 3002 loss=2.729, ppl=6.63, wps=5850.1, ups=0.09, wpb=64900, bsz=128, num_updates=10062, lr=9.99275e-05, gnorm=2.561, loss_scale=2, train_wall=11, gb_free=2.8, wall=115704 2021-06-20 02:47:21 | INFO | train_inner | epoch 004: 1118 / 3002 loss=2.633, ppl=6.2, wps=5900.2, ups=0.09, wpb=64887, bsz=128, num_updates=10063, lr=9.99275e-05, gnorm=2.271, loss_scale=2, train_wall=11, gb_free=2.8, wall=115715 2021-06-20 02:47:32 | INFO | train_inner | epoch 004: 1119 / 3002 loss=2.545, ppl=5.83, wps=5909.1, ups=0.09, wpb=64816, bsz=128, num_updates=10064, lr=9.99275e-05, gnorm=12.7, loss_scale=2, train_wall=11, gb_free=2.8, wall=115726 2021-06-20 02:47:43 | INFO | train_inner | epoch 004: 1120 / 3002 loss=2.613, ppl=6.12, wps=5856.3, ups=0.09, wpb=64904, bsz=128, num_updates=10065, lr=9.99275e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=115737 2021-06-20 02:47:54 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 2021-06-20 02:48:05 | INFO | train_inner | epoch 004: 1122 / 3002 loss=2.665, ppl=6.34, wps=2956.7, ups=0.05, wpb=64790, bsz=128, num_updates=10066, lr=9.99275e-05, gnorm=3.286, loss_scale=1, train_wall=21, gb_free=2.8, wall=115759 2021-06-20 02:48:16 | INFO | train_inner | epoch 004: 1123 / 3002 loss=2.976, ppl=7.87, wps=5855.4, ups=0.09, wpb=64768, bsz=128, num_updates=10067, lr=9.99275e-05, gnorm=3.639, loss_scale=1, train_wall=11, gb_free=2.8, wall=115770 2021-06-20 02:48:27 | INFO | train_inner | epoch 004: 1124 / 3002 loss=2.926, ppl=7.6, wps=5724.4, ups=0.09, wpb=64805, bsz=128, num_updates=10068, lr=9.99275e-05, gnorm=39.922, loss_scale=1, train_wall=11, gb_free=2.8, wall=115781 2021-06-20 02:48:38 | INFO | train_inner | epoch 004: 1125 / 3002 loss=2.948, ppl=7.71, wps=5821.7, ups=0.09, wpb=64870, bsz=128, num_updates=10069, lr=9.99274e-05, gnorm=4.003, loss_scale=1, train_wall=11, gb_free=2.8, wall=115792 2021-06-20 02:48:49 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 2021-06-20 02:49:00 | INFO | train_inner | epoch 004: 1127 / 3002 loss=2.744, ppl=6.7, wps=2962.7, ups=0.05, wpb=64847, bsz=128, num_updates=10070, lr=9.99274e-05, gnorm=4.255, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=115814 2021-06-20 02:49:11 | INFO | train_inner | epoch 004: 1128 / 3002 loss=2.748, ppl=6.72, wps=5848.2, ups=0.09, wpb=64836, bsz=128, num_updates=10071, lr=9.99274e-05, gnorm=2.342, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115825 2021-06-20 02:49:22 | INFO | train_inner | epoch 004: 1129 / 3002 loss=2.634, ppl=6.21, wps=5790.7, ups=0.09, wpb=64771, bsz=128, num_updates=10072, lr=9.99274e-05, gnorm=2.259, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115837 2021-06-20 02:49:33 | INFO | train_inner | epoch 004: 1130 / 3002 loss=2.606, ppl=6.09, wps=5904.6, ups=0.09, wpb=64774, bsz=128, num_updates=10073, lr=9.99274e-05, gnorm=2.371, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115848 2021-06-20 02:49:44 | INFO | train_inner | epoch 004: 1131 / 3002 loss=2.524, ppl=5.75, wps=5981.2, ups=0.09, wpb=64886, bsz=128, num_updates=10074, lr=9.99274e-05, gnorm=2.162, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=115858 2021-06-20 02:49:55 | INFO | train_inner | epoch 004: 1132 / 3002 loss=2.567, ppl=5.93, wps=5954.7, ups=0.09, wpb=64910, bsz=128, num_updates=10075, lr=9.99274e-05, gnorm=2.256, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=115869 2021-06-20 02:50:06 | INFO | train_inner | epoch 004: 1133 / 3002 loss=2.549, ppl=5.85, wps=5859.1, ups=0.09, wpb=64844, bsz=128, num_updates=10076, lr=9.99274e-05, gnorm=2.217, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115880 2021-06-20 02:50:17 | INFO | train_inner | epoch 004: 1134 / 3002 loss=2.711, ppl=6.55, wps=5790.6, ups=0.09, wpb=64780, bsz=128, num_updates=10077, lr=9.99274e-05, gnorm=2.668, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115892 2021-06-20 02:50:28 | INFO | train_inner | epoch 004: 1135 / 3002 loss=2.541, ppl=5.82, wps=5927.4, ups=0.09, wpb=64856, bsz=128, num_updates=10078, lr=9.99274e-05, gnorm=2.153, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=115903 2021-06-20 02:50:39 | INFO | train_inner | epoch 004: 1136 / 3002 loss=2.549, ppl=5.85, wps=5906.2, ups=0.09, wpb=64878, bsz=128, num_updates=10079, lr=9.99274e-05, gnorm=2.254, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115913 2021-06-20 02:50:51 | INFO | train_inner | epoch 004: 1137 / 3002 loss=2.636, ppl=6.22, wps=5708.1, ups=0.09, wpb=64863, bsz=128, num_updates=10080, lr=9.99274e-05, gnorm=2.261, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115925 2021-06-20 02:51:02 | INFO | train_inner | epoch 004: 1138 / 3002 loss=2.718, ppl=6.58, wps=5853.2, ups=0.09, wpb=64796, bsz=128, num_updates=10081, lr=9.99273e-05, gnorm=2.208, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115936 2021-06-20 02:51:13 | INFO | train_inner | epoch 004: 1139 / 3002 loss=2.608, ppl=6.09, wps=5850.5, ups=0.09, wpb=64827, bsz=128, num_updates=10082, lr=9.99273e-05, gnorm=2.129, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115947 2021-06-20 02:51:23 | INFO | train_inner | epoch 004: 1140 / 3002 loss=2.49, ppl=5.62, wps=6022.3, ups=0.09, wpb=64793, bsz=128, num_updates=10083, lr=9.99273e-05, gnorm=2.089, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=115958 2021-06-20 02:51:35 | INFO | train_inner | epoch 004: 1141 / 3002 loss=2.637, ppl=6.22, wps=5822.2, ups=0.09, wpb=64745, bsz=128, num_updates=10084, lr=9.99273e-05, gnorm=2.208, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115969 2021-06-20 02:51:46 | INFO | train_inner | epoch 004: 1142 / 3002 loss=2.66, ppl=6.32, wps=5917.4, ups=0.09, wpb=64901, bsz=128, num_updates=10085, lr=9.99273e-05, gnorm=2.104, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115980 2021-06-20 02:51:56 | INFO | train_inner | epoch 004: 1143 / 3002 loss=2.44, ppl=5.43, wps=5946.2, ups=0.09, wpb=64826, bsz=128, num_updates=10086, lr=9.99273e-05, gnorm=2.088, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=115991 2021-06-20 02:52:07 | INFO | train_inner | epoch 004: 1144 / 3002 loss=2.613, ppl=6.12, wps=5865.3, ups=0.09, wpb=64805, bsz=128, num_updates=10087, lr=9.99273e-05, gnorm=2.07, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116002 2021-06-20 02:52:18 | INFO | train_inner | epoch 004: 1145 / 3002 loss=2.622, ppl=6.16, wps=5888.9, ups=0.09, wpb=64726, bsz=128, num_updates=10088, lr=9.99273e-05, gnorm=2.033, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116013 2021-06-20 02:52:29 | INFO | train_inner | epoch 004: 1146 / 3002 loss=2.581, ppl=5.99, wps=5887, ups=0.09, wpb=64787, bsz=128, num_updates=10089, lr=9.99273e-05, gnorm=2.13, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116024 2021-06-20 02:52:41 | INFO | train_inner | epoch 004: 1147 / 3002 loss=2.529, ppl=5.77, wps=5817.6, ups=0.09, wpb=64828, bsz=128, num_updates=10090, lr=9.99273e-05, gnorm=2.222, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116035 2021-06-20 02:52:52 | INFO | train_inner | epoch 004: 1148 / 3002 loss=2.596, ppl=6.04, wps=5876.3, ups=0.09, wpb=64844, bsz=128, num_updates=10091, lr=9.99273e-05, gnorm=2.116, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116046 2021-06-20 02:53:03 | INFO | train_inner | epoch 004: 1149 / 3002 loss=2.534, ppl=5.79, wps=5912.3, ups=0.09, wpb=64832, bsz=128, num_updates=10092, lr=9.99273e-05, gnorm=2.068, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116057 2021-06-20 02:53:14 | INFO | train_inner | epoch 004: 1150 / 3002 loss=2.436, ppl=5.41, wps=5797.6, ups=0.09, wpb=64799, bsz=128, num_updates=10093, lr=9.99273e-05, gnorm=1.998, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116068 2021-06-20 02:53:25 | INFO | train_inner | epoch 004: 1151 / 3002 loss=2.603, ppl=6.07, wps=5810.5, ups=0.09, wpb=64811, bsz=128, num_updates=10094, lr=9.99272e-05, gnorm=2.075, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116079 2021-06-20 02:53:36 | INFO | train_inner | epoch 004: 1152 / 3002 loss=2.435, ppl=5.41, wps=5756.2, ups=0.09, wpb=64809, bsz=128, num_updates=10095, lr=9.99272e-05, gnorm=1.991, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116091 2021-06-20 02:53:47 | INFO | train_inner | epoch 004: 1153 / 3002 loss=2.555, ppl=5.87, wps=5828.5, ups=0.09, wpb=64825, bsz=128, num_updates=10096, lr=9.99272e-05, gnorm=1.953, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116102 2021-06-20 02:53:58 | INFO | train_inner | epoch 004: 1154 / 3002 loss=2.724, ppl=6.6, wps=5920.4, ups=0.09, wpb=64778, bsz=128, num_updates=10097, lr=9.99272e-05, gnorm=2.021, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116113 2021-06-20 02:54:10 | INFO | train_inner | epoch 004: 1155 / 3002 loss=2.668, ppl=6.36, wps=5724.5, ups=0.09, wpb=64740, bsz=128, num_updates=10098, lr=9.99272e-05, gnorm=2.038, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116124 2021-06-20 02:54:21 | INFO | train_inner | epoch 004: 1156 / 3002 loss=2.562, ppl=5.91, wps=5913, ups=0.09, wpb=64905, bsz=128, num_updates=10099, lr=9.99272e-05, gnorm=2.145, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116135 2021-06-20 02:54:32 | INFO | train_inner | epoch 004: 1157 / 3002 loss=2.638, ppl=6.22, wps=5874.7, ups=0.09, wpb=64780, bsz=128, num_updates=10100, lr=9.99272e-05, gnorm=2.12, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116146 2021-06-20 02:54:43 | INFO | train_inner | epoch 004: 1158 / 3002 loss=2.619, ppl=6.15, wps=5769.4, ups=0.09, wpb=64803, bsz=128, num_updates=10101, lr=9.99272e-05, gnorm=2.071, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116157 2021-06-20 02:54:54 | INFO | train_inner | epoch 004: 1159 / 3002 loss=2.569, ppl=5.93, wps=5735.5, ups=0.09, wpb=64805, bsz=128, num_updates=10102, lr=9.99272e-05, gnorm=2.129, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116168 2021-06-20 02:55:05 | INFO | train_inner | epoch 004: 1160 / 3002 loss=2.554, ppl=5.87, wps=5895.9, ups=0.09, wpb=64767, bsz=128, num_updates=10103, lr=9.99272e-05, gnorm=2.068, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116179 2021-06-20 02:55:16 | INFO | train_inner | epoch 004: 1161 / 3002 loss=2.712, ppl=6.55, wps=5901.6, ups=0.09, wpb=64863, bsz=128, num_updates=10104, lr=9.99272e-05, gnorm=2.155, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116190 2021-06-20 02:55:27 | INFO | train_inner | epoch 004: 1162 / 3002 loss=2.542, ppl=5.83, wps=5827.4, ups=0.09, wpb=64855, bsz=128, num_updates=10105, lr=9.99272e-05, gnorm=2.038, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116202 2021-06-20 02:55:38 | INFO | train_inner | epoch 004: 1163 / 3002 loss=2.511, ppl=5.7, wps=5847.4, ups=0.09, wpb=64879, bsz=128, num_updates=10106, lr=9.99271e-05, gnorm=1.946, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116213 2021-06-20 02:55:49 | INFO | train_inner | epoch 004: 1164 / 3002 loss=2.518, ppl=5.73, wps=5844.2, ups=0.09, wpb=64871, bsz=128, num_updates=10107, lr=9.99271e-05, gnorm=9.294, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116224 2021-06-20 02:56:00 | INFO | train_inner | epoch 004: 1165 / 3002 loss=2.576, ppl=5.96, wps=5936.8, ups=0.09, wpb=64825, bsz=128, num_updates=10108, lr=9.99271e-05, gnorm=2.028, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116235 2021-06-20 02:56:12 | INFO | train_inner | epoch 004: 1166 / 3002 loss=2.524, ppl=5.75, wps=5735.8, ups=0.09, wpb=64796, bsz=128, num_updates=10109, lr=9.99271e-05, gnorm=2.107, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116246 2021-06-20 02:56:23 | INFO | train_inner | epoch 004: 1167 / 3002 loss=2.585, ppl=6, wps=5894.1, ups=0.09, wpb=64866, bsz=128, num_updates=10110, lr=9.99271e-05, gnorm=2.07, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116257 2021-06-20 02:56:34 | INFO | train_inner | epoch 004: 1168 / 3002 loss=2.759, ppl=6.77, wps=5893.7, ups=0.09, wpb=64825, bsz=128, num_updates=10111, lr=9.99271e-05, gnorm=2.174, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116268 2021-06-20 02:56:45 | INFO | train_inner | epoch 004: 1169 / 3002 loss=2.5, ppl=5.66, wps=5894.5, ups=0.09, wpb=64957, bsz=128, num_updates=10112, lr=9.99271e-05, gnorm=2.098, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116279 2021-06-20 02:56:56 | INFO | train_inner | epoch 004: 1170 / 3002 loss=2.695, ppl=6.48, wps=5753, ups=0.09, wpb=64778, bsz=128, num_updates=10113, lr=9.99271e-05, gnorm=2.121, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116290 2021-06-20 02:57:07 | INFO | train_inner | epoch 004: 1171 / 3002 loss=2.66, ppl=6.32, wps=5792.8, ups=0.09, wpb=64841, bsz=128, num_updates=10114, lr=9.99271e-05, gnorm=2.056, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116301 2021-06-20 02:57:18 | INFO | train_inner | epoch 004: 1172 / 3002 loss=2.549, ppl=5.85, wps=6022.4, ups=0.09, wpb=64865, bsz=128, num_updates=10115, lr=9.99271e-05, gnorm=1.971, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116312 2021-06-20 02:57:29 | INFO | train_inner | epoch 004: 1173 / 3002 loss=2.692, ppl=6.46, wps=5788.8, ups=0.09, wpb=64655, bsz=128, num_updates=10116, lr=9.99271e-05, gnorm=2.133, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116323 2021-06-20 02:57:40 | INFO | train_inner | epoch 004: 1174 / 3002 loss=2.528, ppl=5.77, wps=5794.9, ups=0.09, wpb=64858, bsz=128, num_updates=10117, lr=9.99271e-05, gnorm=2.094, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116335 2021-06-20 02:57:51 | INFO | train_inner | epoch 004: 1175 / 3002 loss=2.566, ppl=5.92, wps=5881.6, ups=0.09, wpb=64886, bsz=128, num_updates=10118, lr=9.99271e-05, gnorm=2.034, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116346 2021-06-20 02:58:02 | INFO | train_inner | epoch 004: 1176 / 3002 loss=2.754, ppl=6.74, wps=5837.3, ups=0.09, wpb=64807, bsz=128, num_updates=10119, lr=9.9927e-05, gnorm=2.423, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116357 2021-06-20 02:58:13 | INFO | train_inner | epoch 004: 1177 / 3002 loss=2.588, ppl=6.01, wps=5899.9, ups=0.09, wpb=64959, bsz=128, num_updates=10120, lr=9.9927e-05, gnorm=2.083, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116368 2021-06-20 02:58:25 | INFO | train_inner | epoch 004: 1178 / 3002 loss=2.595, ppl=6.04, wps=5729.2, ups=0.09, wpb=64881, bsz=128, num_updates=10121, lr=9.9927e-05, gnorm=2.23, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116379 2021-06-20 02:58:36 | INFO | train_inner | epoch 004: 1179 / 3002 loss=2.55, ppl=5.86, wps=5843.3, ups=0.09, wpb=64885, bsz=128, num_updates=10122, lr=9.9927e-05, gnorm=2.101, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116390 2021-06-20 02:58:47 | INFO | train_inner | epoch 004: 1180 / 3002 loss=2.675, ppl=6.38, wps=5799.4, ups=0.09, wpb=64763, bsz=128, num_updates=10123, lr=9.9927e-05, gnorm=2.103, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116401 2021-06-20 02:58:58 | INFO | train_inner | epoch 004: 1181 / 3002 loss=2.555, ppl=5.88, wps=5830.2, ups=0.09, wpb=64864, bsz=128, num_updates=10124, lr=9.9927e-05, gnorm=2.255, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116412 2021-06-20 02:59:09 | INFO | train_inner | epoch 004: 1182 / 3002 loss=2.564, ppl=5.91, wps=5944, ups=0.09, wpb=64937, bsz=128, num_updates=10125, lr=9.9927e-05, gnorm=2.019, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116423 2021-06-20 02:59:20 | INFO | train_inner | epoch 004: 1183 / 3002 loss=2.581, ppl=5.98, wps=5727.8, ups=0.09, wpb=64871, bsz=128, num_updates=10126, lr=9.9927e-05, gnorm=2.087, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116435 2021-06-20 02:59:31 | INFO | train_inner | epoch 004: 1184 / 3002 loss=2.644, ppl=6.25, wps=5834.3, ups=0.09, wpb=64853, bsz=128, num_updates=10127, lr=9.9927e-05, gnorm=2.305, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116446 2021-06-20 02:59:43 | INFO | train_inner | epoch 004: 1185 / 3002 loss=2.566, ppl=5.92, wps=5797.5, ups=0.09, wpb=64797, bsz=128, num_updates=10128, lr=9.9927e-05, gnorm=3.303, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116457 2021-06-20 02:59:54 | INFO | train_inner | epoch 004: 1186 / 3002 loss=2.681, ppl=6.41, wps=5905.2, ups=0.09, wpb=64803, bsz=128, num_updates=10129, lr=9.9927e-05, gnorm=2.133, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116468 2021-06-20 03:00:05 | INFO | train_inner | epoch 004: 1187 / 3002 loss=2.669, ppl=6.36, wps=5795.1, ups=0.09, wpb=64825, bsz=128, num_updates=10130, lr=9.9927e-05, gnorm=2.09, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116479 2021-06-20 03:00:16 | INFO | train_inner | epoch 004: 1188 / 3002 loss=2.598, ppl=6.05, wps=5823.8, ups=0.09, wpb=64781, bsz=128, num_updates=10131, lr=9.99269e-05, gnorm=2.005, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116490 2021-06-20 03:00:27 | INFO | train_inner | epoch 004: 1189 / 3002 loss=2.547, ppl=5.84, wps=5788.6, ups=0.09, wpb=64790, bsz=128, num_updates=10132, lr=9.99269e-05, gnorm=2.062, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116501 2021-06-20 03:00:38 | INFO | train_inner | epoch 004: 1190 / 3002 loss=2.559, ppl=5.89, wps=5934, ups=0.09, wpb=64865, bsz=128, num_updates=10133, lr=9.99269e-05, gnorm=2.179, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116512 2021-06-20 03:00:49 | INFO | train_inner | epoch 004: 1191 / 3002 loss=2.515, ppl=5.72, wps=5703.1, ups=0.09, wpb=64816, bsz=128, num_updates=10134, lr=9.99269e-05, gnorm=2.116, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116524 2021-06-20 03:01:01 | INFO | train_inner | epoch 004: 1192 / 3002 loss=2.495, ppl=5.64, wps=5796.3, ups=0.09, wpb=64805, bsz=128, num_updates=10135, lr=9.99269e-05, gnorm=2.201, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116535 2021-06-20 03:01:12 | INFO | train_inner | epoch 004: 1193 / 3002 loss=2.526, ppl=5.76, wps=5813.5, ups=0.09, wpb=64846, bsz=128, num_updates=10136, lr=9.99269e-05, gnorm=2.167, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116546 2021-06-20 03:01:23 | INFO | train_inner | epoch 004: 1194 / 3002 loss=2.576, ppl=5.96, wps=5822, ups=0.09, wpb=64797, bsz=128, num_updates=10137, lr=9.99269e-05, gnorm=2.003, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116557 2021-06-20 03:01:34 | INFO | train_inner | epoch 004: 1195 / 3002 loss=2.602, ppl=6.07, wps=5839.8, ups=0.09, wpb=64844, bsz=128, num_updates=10138, lr=9.99269e-05, gnorm=2.049, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116568 2021-06-20 03:01:45 | INFO | train_inner | epoch 004: 1196 / 3002 loss=2.696, ppl=6.48, wps=5738.7, ups=0.09, wpb=64775, bsz=128, num_updates=10139, lr=9.99269e-05, gnorm=2.044, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116580 2021-06-20 03:01:56 | INFO | train_inner | epoch 004: 1197 / 3002 loss=2.558, ppl=5.89, wps=5788.8, ups=0.09, wpb=64751, bsz=128, num_updates=10140, lr=9.99269e-05, gnorm=2.093, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116591 2021-06-20 03:02:07 | INFO | train_inner | epoch 004: 1198 / 3002 loss=2.675, ppl=6.39, wps=5913.4, ups=0.09, wpb=64895, bsz=128, num_updates=10141, lr=9.99269e-05, gnorm=2.28, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116602 2021-06-20 03:02:19 | INFO | train_inner | epoch 004: 1199 / 3002 loss=2.576, ppl=5.96, wps=5824.9, ups=0.09, wpb=64802, bsz=128, num_updates=10142, lr=9.99269e-05, gnorm=2.093, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116613 2021-06-20 03:02:30 | INFO | train_inner | epoch 004: 1200 / 3002 loss=2.621, ppl=6.15, wps=5813.9, ups=0.09, wpb=64894, bsz=128, num_updates=10143, lr=9.99269e-05, gnorm=2.116, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116624 2021-06-20 03:02:41 | INFO | train_inner | epoch 004: 1201 / 3002 loss=2.704, ppl=6.51, wps=5847, ups=0.09, wpb=64868, bsz=128, num_updates=10144, lr=9.99268e-05, gnorm=2.4, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116635 2021-06-20 03:02:52 | INFO | train_inner | epoch 004: 1202 / 3002 loss=2.547, ppl=5.84, wps=5921.3, ups=0.09, wpb=64845, bsz=128, num_updates=10145, lr=9.99268e-05, gnorm=2.124, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116646 2021-06-20 03:03:03 | INFO | train_inner | epoch 004: 1203 / 3002 loss=2.556, ppl=5.88, wps=5775.1, ups=0.09, wpb=64841, bsz=128, num_updates=10146, lr=9.99268e-05, gnorm=2.182, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116657 2021-06-20 03:03:14 | INFO | train_inner | epoch 004: 1204 / 3002 loss=2.566, ppl=5.92, wps=5885.5, ups=0.09, wpb=64834, bsz=128, num_updates=10147, lr=9.99268e-05, gnorm=2.089, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116668 2021-06-20 03:03:25 | INFO | train_inner | epoch 004: 1205 / 3002 loss=2.457, ppl=5.49, wps=5801.3, ups=0.09, wpb=64794, bsz=128, num_updates=10148, lr=9.99268e-05, gnorm=2.037, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116680 2021-06-20 03:03:36 | INFO | train_inner | epoch 004: 1206 / 3002 loss=2.512, ppl=5.7, wps=5886.1, ups=0.09, wpb=64933, bsz=128, num_updates=10149, lr=9.99268e-05, gnorm=2.125, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116691 2021-06-20 03:03:47 | INFO | train_inner | epoch 004: 1207 / 3002 loss=2.512, ppl=5.7, wps=5797.2, ups=0.09, wpb=64790, bsz=128, num_updates=10150, lr=9.99268e-05, gnorm=1.983, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116702 2021-06-20 03:03:59 | INFO | train_inner | epoch 004: 1208 / 3002 loss=2.551, ppl=5.86, wps=5757, ups=0.09, wpb=64854, bsz=128, num_updates=10151, lr=9.99268e-05, gnorm=2.052, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116713 2021-06-20 03:04:10 | INFO | train_inner | epoch 004: 1209 / 3002 loss=2.589, ppl=6.02, wps=5881.6, ups=0.09, wpb=64799, bsz=128, num_updates=10152, lr=9.99268e-05, gnorm=2.106, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116724 2021-06-20 03:04:21 | INFO | train_inner | epoch 004: 1210 / 3002 loss=2.621, ppl=6.15, wps=5838.1, ups=0.09, wpb=64913, bsz=128, num_updates=10153, lr=9.99268e-05, gnorm=2.193, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116735 2021-06-20 03:04:32 | INFO | train_inner | epoch 004: 1211 / 3002 loss=2.504, ppl=5.67, wps=5828.3, ups=0.09, wpb=64787, bsz=128, num_updates=10154, lr=9.99268e-05, gnorm=1.986, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116746 2021-06-20 03:04:43 | INFO | train_inner | epoch 004: 1212 / 3002 loss=2.595, ppl=6.04, wps=5939, ups=0.09, wpb=64811, bsz=128, num_updates=10155, lr=9.99268e-05, gnorm=2.113, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116757 2021-06-20 03:04:54 | INFO | train_inner | epoch 004: 1213 / 3002 loss=2.476, ppl=5.56, wps=5755.9, ups=0.09, wpb=64859, bsz=128, num_updates=10156, lr=9.99267e-05, gnorm=4.463, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116768 2021-06-20 03:05:05 | INFO | train_inner | epoch 004: 1214 / 3002 loss=2.628, ppl=6.18, wps=5848.9, ups=0.09, wpb=64802, bsz=128, num_updates=10157, lr=9.99267e-05, gnorm=2.085, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116780 2021-06-20 03:05:16 | INFO | train_inner | epoch 004: 1215 / 3002 loss=2.444, ppl=5.44, wps=5796, ups=0.09, wpb=64899, bsz=128, num_updates=10158, lr=9.99267e-05, gnorm=1.972, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116791 2021-06-20 03:05:27 | INFO | train_inner | epoch 004: 1216 / 3002 loss=2.645, ppl=6.26, wps=5923.9, ups=0.09, wpb=64925, bsz=128, num_updates=10159, lr=9.99267e-05, gnorm=2.077, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116802 2021-06-20 03:05:38 | INFO | train_inner | epoch 004: 1217 / 3002 loss=2.443, ppl=5.44, wps=5863.8, ups=0.09, wpb=64865, bsz=128, num_updates=10160, lr=9.99267e-05, gnorm=2.047, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116813 2021-06-20 03:05:50 | INFO | train_inner | epoch 004: 1218 / 3002 loss=2.506, ppl=5.68, wps=5755, ups=0.09, wpb=64783, bsz=128, num_updates=10161, lr=9.99267e-05, gnorm=2.309, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116824 2021-06-20 03:06:01 | INFO | train_inner | epoch 004: 1219 / 3002 loss=2.528, ppl=5.77, wps=5781.9, ups=0.09, wpb=64812, bsz=128, num_updates=10162, lr=9.99267e-05, gnorm=9.636, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116835 2021-06-20 03:06:12 | INFO | train_inner | epoch 004: 1220 / 3002 loss=2.519, ppl=5.73, wps=6002.3, ups=0.09, wpb=64901, bsz=128, num_updates=10163, lr=9.99267e-05, gnorm=17.12, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116846 2021-06-20 03:06:23 | INFO | train_inner | epoch 004: 1221 / 3002 loss=2.556, ppl=5.88, wps=5865.8, ups=0.09, wpb=64865, bsz=128, num_updates=10164, lr=9.99267e-05, gnorm=1.957, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116857 2021-06-20 03:06:34 | INFO | train_inner | epoch 004: 1222 / 3002 loss=2.661, ppl=6.32, wps=5888.1, ups=0.09, wpb=64841, bsz=128, num_updates=10165, lr=9.99267e-05, gnorm=2.188, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116868 2021-06-20 03:06:45 | INFO | train_inner | epoch 004: 1223 / 3002 loss=2.653, ppl=6.29, wps=5927.6, ups=0.09, wpb=64884, bsz=128, num_updates=10166, lr=9.99267e-05, gnorm=2.139, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116879 2021-06-20 03:06:56 | INFO | train_inner | epoch 004: 1224 / 3002 loss=2.635, ppl=6.21, wps=5979.2, ups=0.09, wpb=64842, bsz=128, num_updates=10167, lr=9.99267e-05, gnorm=2.037, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116890 2021-06-20 03:07:07 | INFO | train_inner | epoch 004: 1225 / 3002 loss=2.564, ppl=5.91, wps=5865.5, ups=0.09, wpb=64889, bsz=128, num_updates=10168, lr=9.99267e-05, gnorm=2.04, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116901 2021-06-20 03:07:17 | INFO | train_inner | epoch 004: 1226 / 3002 loss=2.603, ppl=6.08, wps=5968.1, ups=0.09, wpb=64838, bsz=128, num_updates=10169, lr=9.99266e-05, gnorm=2.099, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116912 2021-06-20 03:07:29 | INFO | train_inner | epoch 004: 1227 / 3002 loss=2.611, ppl=6.11, wps=5815.9, ups=0.09, wpb=64709, bsz=128, num_updates=10170, lr=9.99266e-05, gnorm=2.071, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116923 2021-06-20 03:07:40 | INFO | train_inner | epoch 004: 1228 / 3002 loss=2.667, ppl=6.35, wps=5803.1, ups=0.09, wpb=64916, bsz=128, num_updates=10171, lr=9.99266e-05, gnorm=2.102, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116934 2021-06-20 03:07:51 | INFO | train_inner | epoch 004: 1229 / 3002 loss=2.643, ppl=6.25, wps=5838.2, ups=0.09, wpb=64859, bsz=128, num_updates=10172, lr=9.99266e-05, gnorm=2.115, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116945 2021-06-20 03:08:02 | INFO | train_inner | epoch 004: 1230 / 3002 loss=2.626, ppl=6.17, wps=5885.4, ups=0.09, wpb=64860, bsz=128, num_updates=10173, lr=9.99266e-05, gnorm=2.043, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116956 2021-06-20 03:08:13 | INFO | train_inner | epoch 004: 1231 / 3002 loss=2.535, ppl=5.79, wps=5878.5, ups=0.09, wpb=64839, bsz=128, num_updates=10174, lr=9.99266e-05, gnorm=2.105, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116967 2021-06-20 03:08:24 | INFO | train_inner | epoch 004: 1232 / 3002 loss=2.601, ppl=6.07, wps=5903.8, ups=0.09, wpb=64824, bsz=128, num_updates=10175, lr=9.99266e-05, gnorm=2.168, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116978 2021-06-20 03:08:35 | INFO | train_inner | epoch 004: 1233 / 3002 loss=2.54, ppl=5.82, wps=5877.3, ups=0.09, wpb=64796, bsz=128, num_updates=10176, lr=9.99266e-05, gnorm=2.017, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116989 2021-06-20 03:08:46 | INFO | train_inner | epoch 004: 1234 / 3002 loss=2.556, ppl=5.88, wps=5926.1, ups=0.09, wpb=64768, bsz=128, num_updates=10177, lr=9.99266e-05, gnorm=2.076, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=117000 2021-06-20 03:08:57 | INFO | train_inner | epoch 004: 1235 / 3002 loss=2.517, ppl=5.73, wps=5793.4, ups=0.09, wpb=64805, bsz=128, num_updates=10178, lr=9.99266e-05, gnorm=2.019, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117011 2021-06-20 03:09:08 | INFO | train_inner | epoch 004: 1236 / 3002 loss=2.563, ppl=5.91, wps=5875, ups=0.09, wpb=64851, bsz=128, num_updates=10179, lr=9.99266e-05, gnorm=2.13, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117022 2021-06-20 03:09:19 | INFO | train_inner | epoch 004: 1237 / 3002 loss=2.748, ppl=6.72, wps=5927.2, ups=0.09, wpb=64811, bsz=128, num_updates=10180, lr=9.99266e-05, gnorm=2.141, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=117033 2021-06-20 03:09:30 | INFO | train_inner | epoch 004: 1238 / 3002 loss=2.77, ppl=6.82, wps=5853, ups=0.09, wpb=64767, bsz=128, num_updates=10181, lr=9.99265e-05, gnorm=2.138, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117044 2021-06-20 03:09:41 | INFO | train_inner | epoch 004: 1239 / 3002 loss=2.601, ppl=6.07, wps=5828.9, ups=0.09, wpb=64854, bsz=128, num_updates=10182, lr=9.99265e-05, gnorm=2.051, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117056 2021-06-20 03:09:52 | INFO | train_inner | epoch 004: 1240 / 3002 loss=2.86, ppl=7.26, wps=5877.2, ups=0.09, wpb=64893, bsz=128, num_updates=10183, lr=9.99265e-05, gnorm=2.228, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117067 2021-06-20 03:10:03 | INFO | train_inner | epoch 004: 1241 / 3002 loss=2.641, ppl=6.24, wps=5801.3, ups=0.09, wpb=64816, bsz=128, num_updates=10184, lr=9.99265e-05, gnorm=2.036, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117078 2021-06-20 03:10:14 | INFO | train_inner | epoch 004: 1242 / 3002 loss=2.531, ppl=5.78, wps=5869.2, ups=0.09, wpb=64825, bsz=128, num_updates=10185, lr=9.99265e-05, gnorm=2.741, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117089 2021-06-20 03:10:26 | INFO | train_inner | epoch 004: 1243 / 3002 loss=2.733, ppl=6.65, wps=5869.5, ups=0.09, wpb=64777, bsz=128, num_updates=10186, lr=9.99265e-05, gnorm=2.263, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117100 2021-06-20 03:10:37 | INFO | train_inner | epoch 004: 1244 / 3002 loss=2.55, ppl=5.86, wps=5693.7, ups=0.09, wpb=64839, bsz=128, num_updates=10187, lr=9.99265e-05, gnorm=2.064, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117111 2021-06-20 03:10:48 | INFO | train_inner | epoch 004: 1245 / 3002 loss=2.459, ppl=5.5, wps=5876.5, ups=0.09, wpb=64869, bsz=128, num_updates=10188, lr=9.99265e-05, gnorm=2.084, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117122 2021-06-20 03:10:59 | INFO | train_inner | epoch 004: 1246 / 3002 loss=2.485, ppl=5.6, wps=5821, ups=0.09, wpb=64780, bsz=128, num_updates=10189, lr=9.99265e-05, gnorm=2.155, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117133 2021-06-20 03:11:10 | INFO | train_inner | epoch 004: 1247 / 3002 loss=2.638, ppl=6.23, wps=5794, ups=0.09, wpb=64731, bsz=128, num_updates=10190, lr=9.99265e-05, gnorm=1.988, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117145 2021-06-20 03:11:22 | INFO | train_inner | epoch 004: 1248 / 3002 loss=2.611, ppl=6.11, wps=5659, ups=0.09, wpb=64838, bsz=128, num_updates=10191, lr=9.99265e-05, gnorm=1.98, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117156 2021-06-20 03:11:33 | INFO | train_inner | epoch 004: 1249 / 3002 loss=2.623, ppl=6.16, wps=5837.1, ups=0.09, wpb=64820, bsz=128, num_updates=10192, lr=9.99265e-05, gnorm=2.155, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117167 2021-06-20 03:11:44 | INFO | train_inner | epoch 004: 1250 / 3002 loss=2.635, ppl=6.21, wps=5871.8, ups=0.09, wpb=64893, bsz=128, num_updates=10193, lr=9.99265e-05, gnorm=2.062, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117178 2021-06-20 03:11:55 | INFO | train_inner | epoch 004: 1251 / 3002 loss=2.623, ppl=6.16, wps=5917.6, ups=0.09, wpb=64819, bsz=128, num_updates=10194, lr=9.99264e-05, gnorm=2.193, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117189 2021-06-20 03:12:06 | INFO | train_inner | epoch 004: 1252 / 3002 loss=2.728, ppl=6.63, wps=5929, ups=0.09, wpb=64866, bsz=128, num_updates=10195, lr=9.99264e-05, gnorm=2.121, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=117200 2021-06-20 03:12:17 | INFO | train_inner | epoch 004: 1253 / 3002 loss=2.566, ppl=5.92, wps=5782.9, ups=0.09, wpb=64810, bsz=128, num_updates=10196, lr=9.99264e-05, gnorm=2.114, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117211 2021-06-20 03:12:28 | INFO | train_inner | epoch 004: 1254 / 3002 loss=2.287, ppl=4.88, wps=5885, ups=0.09, wpb=64890, bsz=128, num_updates=10197, lr=9.99264e-05, gnorm=2.216, loss_scale=1, train_wall=11, gb_free=2.8, wall=117222 2021-06-20 03:12:39 | INFO | train_inner | epoch 004: 1255 / 3002 loss=2.53, ppl=5.77, wps=5951.4, ups=0.09, wpb=64823, bsz=128, num_updates=10198, lr=9.99264e-05, gnorm=2.172, loss_scale=1, train_wall=10, gb_free=2.8, wall=117233 2021-06-20 03:12:50 | INFO | train_inner | epoch 004: 1256 / 3002 loss=2.529, ppl=5.77, wps=5890.4, ups=0.09, wpb=64884, bsz=128, num_updates=10199, lr=9.99264e-05, gnorm=2.134, loss_scale=1, train_wall=11, gb_free=2.8, wall=117244 2021-06-20 03:13:01 | INFO | train_inner | epoch 004: 1257 / 3002 loss=2.698, ppl=6.49, wps=5853.1, ups=0.09, wpb=64776, bsz=128, num_updates=10200, lr=9.99264e-05, gnorm=2.58, loss_scale=1, train_wall=11, gb_free=2.8, wall=117255 2021-06-20 03:13:12 | INFO | train_inner | epoch 004: 1258 / 3002 loss=2.542, ppl=5.82, wps=5833.1, ups=0.09, wpb=64911, bsz=128, num_updates=10201, lr=9.99264e-05, gnorm=2.227, loss_scale=1, train_wall=11, gb_free=2.8, wall=117266 2021-06-20 03:13:23 | INFO | train_inner | epoch 004: 1259 / 3002 loss=2.427, ppl=5.38, wps=5883.3, ups=0.09, wpb=64797, bsz=128, num_updates=10202, lr=9.99264e-05, gnorm=2.018, loss_scale=1, train_wall=11, gb_free=2.8, wall=117277 2021-06-20 03:13:34 | INFO | train_inner | epoch 004: 1260 / 3002 loss=2.556, ppl=5.88, wps=5995.4, ups=0.09, wpb=64812, bsz=128, num_updates=10203, lr=9.99264e-05, gnorm=1.971, loss_scale=1, train_wall=10, gb_free=2.8, wall=117288 2021-06-20 03:13:45 | INFO | train_inner | epoch 004: 1261 / 3002 loss=2.625, ppl=6.17, wps=5955.2, ups=0.09, wpb=64914, bsz=128, num_updates=10204, lr=9.99264e-05, gnorm=2.059, loss_scale=1, train_wall=10, gb_free=2.8, wall=117299 2021-06-20 03:13:56 | INFO | train_inner | epoch 004: 1262 / 3002 loss=2.653, ppl=6.29, wps=5908.7, ups=0.09, wpb=64850, bsz=128, num_updates=10205, lr=9.99264e-05, gnorm=2.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=117310 2021-06-20 03:14:07 | INFO | train_inner | epoch 004: 1263 / 3002 loss=2.505, ppl=5.68, wps=5961, ups=0.09, wpb=64843, bsz=128, num_updates=10206, lr=9.99263e-05, gnorm=2.076, loss_scale=1, train_wall=10, gb_free=2.8, wall=117321 2021-06-20 03:14:18 | INFO | train_inner | epoch 004: 1264 / 3002 loss=2.596, ppl=6.05, wps=5933.4, ups=0.09, wpb=64892, bsz=128, num_updates=10207, lr=9.99263e-05, gnorm=2.143, loss_scale=1, train_wall=10, gb_free=2.8, wall=117332 2021-06-20 03:14:29 | INFO | train_inner | epoch 004: 1265 / 3002 loss=2.493, ppl=5.63, wps=5807.8, ups=0.09, wpb=64826, bsz=128, num_updates=10208, lr=9.99263e-05, gnorm=2.08, loss_scale=1, train_wall=11, gb_free=2.8, wall=117343 2021-06-20 03:14:40 | INFO | train_inner | epoch 004: 1266 / 3002 loss=2.575, ppl=5.96, wps=5913.1, ups=0.09, wpb=64821, bsz=128, num_updates=10209, lr=9.99263e-05, gnorm=2.072, loss_scale=1, train_wall=11, gb_free=2.8, wall=117354 2021-06-20 03:14:51 | INFO | train_inner | epoch 004: 1267 / 3002 loss=2.637, ppl=6.22, wps=5957.2, ups=0.09, wpb=64799, bsz=128, num_updates=10210, lr=9.99263e-05, gnorm=1.996, loss_scale=1, train_wall=10, gb_free=2.8, wall=117365 2021-06-20 03:15:02 | INFO | train_inner | epoch 004: 1268 / 3002 loss=2.492, ppl=5.63, wps=5914.7, ups=0.09, wpb=64826, bsz=128, num_updates=10211, lr=9.99263e-05, gnorm=2.011, loss_scale=1, train_wall=11, gb_free=2.8, wall=117376 2021-06-20 03:15:13 | INFO | train_inner | epoch 004: 1269 / 3002 loss=2.561, ppl=5.9, wps=5705.2, ups=0.09, wpb=64790, bsz=128, num_updates=10212, lr=9.99263e-05, gnorm=2.072, loss_scale=1, train_wall=11, gb_free=2.8, wall=117387 2021-06-20 03:15:24 | INFO | train_inner | epoch 004: 1270 / 3002 loss=2.579, ppl=5.98, wps=5856.1, ups=0.09, wpb=64855, bsz=128, num_updates=10213, lr=9.99263e-05, gnorm=2.102, loss_scale=1, train_wall=11, gb_free=2.8, wall=117398 2021-06-20 03:15:35 | INFO | train_inner | epoch 004: 1271 / 3002 loss=2.635, ppl=6.21, wps=5727.3, ups=0.09, wpb=64840, bsz=128, num_updates=10214, lr=9.99263e-05, gnorm=2.042, loss_scale=1, train_wall=11, gb_free=2.8, wall=117410 2021-06-20 03:15:46 | INFO | train_inner | epoch 004: 1272 / 3002 loss=2.665, ppl=6.34, wps=5862.4, ups=0.09, wpb=64868, bsz=128, num_updates=10215, lr=9.99263e-05, gnorm=2.145, loss_scale=1, train_wall=11, gb_free=2.8, wall=117421 2021-06-20 03:15:58 | INFO | train_inner | epoch 004: 1273 / 3002 loss=2.637, ppl=6.22, wps=5847.5, ups=0.09, wpb=64882, bsz=128, num_updates=10216, lr=9.99263e-05, gnorm=2.011, loss_scale=1, train_wall=11, gb_free=2.8, wall=117432 2021-06-20 03:16:09 | INFO | train_inner | epoch 004: 1274 / 3002 loss=2.685, ppl=6.43, wps=5894.2, ups=0.09, wpb=64869, bsz=128, num_updates=10217, lr=9.99263e-05, gnorm=2.159, loss_scale=1, train_wall=11, gb_free=2.8, wall=117443 2021-06-20 03:16:20 | INFO | train_inner | epoch 004: 1275 / 3002 loss=2.574, ppl=5.96, wps=5846.8, ups=0.09, wpb=64775, bsz=128, num_updates=10218, lr=9.99263e-05, gnorm=2.07, loss_scale=1, train_wall=11, gb_free=2.8, wall=117454 2021-06-20 03:16:31 | INFO | train_inner | epoch 004: 1276 / 3002 loss=2.461, ppl=5.51, wps=5896.7, ups=0.09, wpb=64808, bsz=128, num_updates=10219, lr=9.99262e-05, gnorm=2.097, loss_scale=1, train_wall=11, gb_free=2.8, wall=117465 2021-06-20 03:16:42 | INFO | train_inner | epoch 004: 1277 / 3002 loss=2.627, ppl=6.18, wps=5926.2, ups=0.09, wpb=64848, bsz=128, num_updates=10220, lr=9.99262e-05, gnorm=2.083, loss_scale=1, train_wall=10, gb_free=2.8, wall=117476 2021-06-20 03:16:52 | INFO | train_inner | epoch 004: 1278 / 3002 loss=2.659, ppl=6.31, wps=5996.4, ups=0.09, wpb=64902, bsz=128, num_updates=10221, lr=9.99262e-05, gnorm=2.072, loss_scale=1, train_wall=10, gb_free=2.8, wall=117487 2021-06-20 03:17:04 | INFO | train_inner | epoch 004: 1279 / 3002 loss=2.609, ppl=6.1, wps=5756, ups=0.09, wpb=64749, bsz=128, num_updates=10222, lr=9.99262e-05, gnorm=2.173, loss_scale=1, train_wall=11, gb_free=2.8, wall=117498 2021-06-20 03:17:15 | INFO | train_inner | epoch 004: 1280 / 3002 loss=2.714, ppl=6.56, wps=5821.7, ups=0.09, wpb=64915, bsz=128, num_updates=10223, lr=9.99262e-05, gnorm=8.619, loss_scale=1, train_wall=11, gb_free=2.8, wall=117509 2021-06-20 03:17:26 | INFO | train_inner | epoch 004: 1281 / 3002 loss=2.565, ppl=5.92, wps=5893.8, ups=0.09, wpb=64764, bsz=128, num_updates=10224, lr=9.99262e-05, gnorm=2.072, loss_scale=1, train_wall=11, gb_free=2.8, wall=117520 2021-06-20 03:17:37 | INFO | train_inner | epoch 004: 1282 / 3002 loss=2.575, ppl=5.96, wps=5947.4, ups=0.09, wpb=64821, bsz=128, num_updates=10225, lr=9.99262e-05, gnorm=2.182, loss_scale=1, train_wall=10, gb_free=2.8, wall=117531 2021-06-20 03:17:47 | INFO | train_inner | epoch 004: 1283 / 3002 loss=2.527, ppl=5.76, wps=5995.3, ups=0.09, wpb=64819, bsz=128, num_updates=10226, lr=9.99262e-05, gnorm=4.21, loss_scale=1, train_wall=10, gb_free=2.8, wall=117542 2021-06-20 03:17:58 | INFO | train_inner | epoch 004: 1284 / 3002 loss=2.625, ppl=6.17, wps=5865.5, ups=0.09, wpb=64811, bsz=128, num_updates=10227, lr=9.99262e-05, gnorm=2.029, loss_scale=1, train_wall=11, gb_free=2.8, wall=117553 2021-06-20 03:18:10 | INFO | train_inner | epoch 004: 1285 / 3002 loss=2.627, ppl=6.18, wps=5818.6, ups=0.09, wpb=64862, bsz=128, num_updates=10228, lr=9.99262e-05, gnorm=2.19, loss_scale=1, train_wall=11, gb_free=2.8, wall=117564 2021-06-20 03:18:21 | INFO | train_inner | epoch 004: 1286 / 3002 loss=2.561, ppl=5.9, wps=5876.6, ups=0.09, wpb=64818, bsz=128, num_updates=10229, lr=9.99262e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=117575 2021-06-20 03:18:32 | INFO | train_inner | epoch 004: 1287 / 3002 loss=2.525, ppl=5.76, wps=5855.4, ups=0.09, wpb=64812, bsz=128, num_updates=10230, lr=9.99262e-05, gnorm=2.102, loss_scale=1, train_wall=11, gb_free=2.8, wall=117586 2021-06-20 03:18:43 | INFO | train_inner | epoch 004: 1288 / 3002 loss=2.639, ppl=6.23, wps=5842.7, ups=0.09, wpb=64861, bsz=128, num_updates=10231, lr=9.99261e-05, gnorm=2.225, loss_scale=1, train_wall=11, gb_free=2.8, wall=117597 2021-06-20 03:18:54 | INFO | train_inner | epoch 004: 1289 / 3002 loss=2.566, ppl=5.92, wps=5898.7, ups=0.09, wpb=64705, bsz=128, num_updates=10232, lr=9.99261e-05, gnorm=3.387, loss_scale=1, train_wall=10, gb_free=2.8, wall=117608 2021-06-20 03:19:05 | INFO | train_inner | epoch 004: 1290 / 3002 loss=2.663, ppl=6.33, wps=6005, ups=0.09, wpb=64855, bsz=128, num_updates=10233, lr=9.99261e-05, gnorm=2.649, loss_scale=1, train_wall=10, gb_free=2.8, wall=117619 2021-06-20 03:19:16 | INFO | train_inner | epoch 004: 1291 / 3002 loss=2.807, ppl=7, wps=5939.6, ups=0.09, wpb=64865, bsz=128, num_updates=10234, lr=9.99261e-05, gnorm=2.178, loss_scale=1, train_wall=10, gb_free=2.8, wall=117630 2021-06-20 03:19:27 | INFO | train_inner | epoch 004: 1292 / 3002 loss=2.61, ppl=6.11, wps=5862.1, ups=0.09, wpb=64816, bsz=128, num_updates=10235, lr=9.99261e-05, gnorm=2.101, loss_scale=1, train_wall=11, gb_free=2.8, wall=117641 2021-06-20 03:19:38 | INFO | train_inner | epoch 004: 1293 / 3002 loss=2.739, ppl=6.67, wps=5933.9, ups=0.09, wpb=64826, bsz=128, num_updates=10236, lr=9.99261e-05, gnorm=8.649, loss_scale=1, train_wall=10, gb_free=2.8, wall=117652 2021-06-20 03:19:49 | INFO | train_inner | epoch 004: 1294 / 3002 loss=2.806, ppl=6.99, wps=5786.2, ups=0.09, wpb=64763, bsz=128, num_updates=10237, lr=9.99261e-05, gnorm=24.351, loss_scale=1, train_wall=11, gb_free=2.8, wall=117663 2021-06-20 03:20:00 | INFO | train_inner | epoch 004: 1295 / 3002 loss=2.549, ppl=5.85, wps=5841.3, ups=0.09, wpb=64762, bsz=128, num_updates=10238, lr=9.99261e-05, gnorm=3.349, loss_scale=1, train_wall=11, gb_free=2.8, wall=117674 2021-06-20 03:20:11 | INFO | train_inner | epoch 004: 1296 / 3002 loss=2.701, ppl=6.5, wps=5929.3, ups=0.09, wpb=64811, bsz=128, num_updates=10239, lr=9.99261e-05, gnorm=2.085, loss_scale=1, train_wall=10, gb_free=2.8, wall=117685 2021-06-20 03:20:22 | INFO | train_inner | epoch 004: 1297 / 3002 loss=2.646, ppl=6.26, wps=5793.4, ups=0.09, wpb=64917, bsz=128, num_updates=10240, lr=9.99261e-05, gnorm=2.096, loss_scale=1, train_wall=11, gb_free=2.8, wall=117696 2021-06-20 03:20:33 | INFO | train_inner | epoch 004: 1298 / 3002 loss=2.564, ppl=5.91, wps=5761.5, ups=0.09, wpb=64765, bsz=128, num_updates=10241, lr=9.99261e-05, gnorm=2.806, loss_scale=1, train_wall=11, gb_free=2.8, wall=117708 2021-06-20 03:20:44 | INFO | train_inner | epoch 004: 1299 / 3002 loss=2.559, ppl=5.89, wps=5921.9, ups=0.09, wpb=64877, bsz=128, num_updates=10242, lr=9.99261e-05, gnorm=2.198, loss_scale=1, train_wall=10, gb_free=2.8, wall=117718 2021-06-20 03:20:55 | INFO | train_inner | epoch 004: 1300 / 3002 loss=2.673, ppl=6.38, wps=5828.2, ups=0.09, wpb=64844, bsz=128, num_updates=10243, lr=9.99261e-05, gnorm=9.962, loss_scale=1, train_wall=11, gb_free=2.8, wall=117730 2021-06-20 03:21:06 | INFO | train_inner | epoch 004: 1301 / 3002 loss=2.613, ppl=6.12, wps=5817.8, ups=0.09, wpb=64710, bsz=128, num_updates=10244, lr=9.9926e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=117741 2021-06-20 03:21:17 | INFO | train_inner | epoch 004: 1302 / 3002 loss=2.687, ppl=6.44, wps=5920.6, ups=0.09, wpb=64861, bsz=128, num_updates=10245, lr=9.9926e-05, gnorm=2.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=117752 2021-06-20 03:21:29 | INFO | train_inner | epoch 004: 1303 / 3002 loss=2.621, ppl=6.15, wps=5783, ups=0.09, wpb=64847, bsz=128, num_updates=10246, lr=9.9926e-05, gnorm=2.501, loss_scale=1, train_wall=11, gb_free=2.8, wall=117763 2021-06-20 03:21:40 | INFO | train_inner | epoch 004: 1304 / 3002 loss=2.658, ppl=6.31, wps=5875.2, ups=0.09, wpb=64763, bsz=128, num_updates=10247, lr=9.9926e-05, gnorm=2.19, loss_scale=1, train_wall=11, gb_free=2.8, wall=117774 2021-06-20 03:21:51 | INFO | train_inner | epoch 004: 1305 / 3002 loss=2.475, ppl=5.56, wps=5780.1, ups=0.09, wpb=64841, bsz=128, num_updates=10248, lr=9.9926e-05, gnorm=2.414, loss_scale=1, train_wall=11, gb_free=2.8, wall=117785 2021-06-20 03:22:02 | INFO | train_inner | epoch 004: 1306 / 3002 loss=2.662, ppl=6.33, wps=6034.4, ups=0.09, wpb=64817, bsz=128, num_updates=10249, lr=9.9926e-05, gnorm=2.098, loss_scale=1, train_wall=10, gb_free=2.8, wall=117796 2021-06-20 03:22:13 | INFO | train_inner | epoch 004: 1307 / 3002 loss=2.602, ppl=6.07, wps=5811.3, ups=0.09, wpb=64870, bsz=128, num_updates=10250, lr=9.9926e-05, gnorm=2.276, loss_scale=1, train_wall=11, gb_free=2.8, wall=117807 2021-06-20 03:22:24 | INFO | train_inner | epoch 004: 1308 / 3002 loss=2.558, ppl=5.89, wps=5878, ups=0.09, wpb=64853, bsz=128, num_updates=10251, lr=9.9926e-05, gnorm=2.167, loss_scale=1, train_wall=11, gb_free=2.8, wall=117818 2021-06-20 03:22:35 | INFO | train_inner | epoch 004: 1309 / 3002 loss=2.889, ppl=7.41, wps=5838.8, ups=0.09, wpb=64827, bsz=128, num_updates=10252, lr=9.9926e-05, gnorm=2.414, loss_scale=1, train_wall=11, gb_free=2.8, wall=117829 2021-06-20 03:22:46 | INFO | train_inner | epoch 004: 1310 / 3002 loss=2.541, ppl=5.82, wps=5940.2, ups=0.09, wpb=64863, bsz=128, num_updates=10253, lr=9.9926e-05, gnorm=2.211, loss_scale=1, train_wall=10, gb_free=2.8, wall=117840 2021-06-20 03:22:57 | INFO | train_inner | epoch 004: 1311 / 3002 loss=2.565, ppl=5.92, wps=5870.2, ups=0.09, wpb=64815, bsz=128, num_updates=10254, lr=9.9926e-05, gnorm=2.189, loss_scale=1, train_wall=11, gb_free=2.8, wall=117851 2021-06-20 03:23:08 | INFO | train_inner | epoch 004: 1312 / 3002 loss=2.579, ppl=5.98, wps=5772.9, ups=0.09, wpb=64784, bsz=128, num_updates=10255, lr=9.9926e-05, gnorm=2.318, loss_scale=1, train_wall=11, gb_free=2.8, wall=117862 2021-06-20 03:23:19 | INFO | train_inner | epoch 004: 1313 / 3002 loss=2.654, ppl=6.29, wps=5928.5, ups=0.09, wpb=64786, bsz=128, num_updates=10256, lr=9.99259e-05, gnorm=2.182, loss_scale=1, train_wall=10, gb_free=2.8, wall=117873 2021-06-20 03:23:30 | INFO | train_inner | epoch 004: 1314 / 3002 loss=2.556, ppl=5.88, wps=5993.3, ups=0.09, wpb=64961, bsz=128, num_updates=10257, lr=9.99259e-05, gnorm=2.121, loss_scale=1, train_wall=10, gb_free=2.8, wall=117884 2021-06-20 03:23:41 | INFO | train_inner | epoch 004: 1315 / 3002 loss=2.623, ppl=6.16, wps=5981.1, ups=0.09, wpb=64873, bsz=128, num_updates=10258, lr=9.99259e-05, gnorm=2.149, loss_scale=1, train_wall=10, gb_free=2.8, wall=117895 2021-06-20 03:23:52 | INFO | train_inner | epoch 004: 1316 / 3002 loss=2.559, ppl=5.89, wps=5776.9, ups=0.09, wpb=64836, bsz=128, num_updates=10259, lr=9.99259e-05, gnorm=2.175, loss_scale=1, train_wall=11, gb_free=2.8, wall=117906 2021-06-20 03:24:03 | INFO | train_inner | epoch 004: 1317 / 3002 loss=2.586, ppl=6.01, wps=5778.6, ups=0.09, wpb=64812, bsz=128, num_updates=10260, lr=9.99259e-05, gnorm=2.124, loss_scale=1, train_wall=11, gb_free=2.8, wall=117917 2021-06-20 03:24:14 | INFO | train_inner | epoch 004: 1318 / 3002 loss=2.538, ppl=5.81, wps=5849.7, ups=0.09, wpb=64817, bsz=128, num_updates=10261, lr=9.99259e-05, gnorm=2.529, loss_scale=1, train_wall=11, gb_free=2.8, wall=117928 2021-06-20 03:24:25 | INFO | train_inner | epoch 004: 1319 / 3002 loss=2.485, ppl=5.6, wps=5921.3, ups=0.09, wpb=64985, bsz=128, num_updates=10262, lr=9.99259e-05, gnorm=2.137, loss_scale=1, train_wall=11, gb_free=2.8, wall=117939 2021-06-20 03:24:36 | INFO | train_inner | epoch 004: 1320 / 3002 loss=2.687, ppl=6.44, wps=5891.7, ups=0.09, wpb=64827, bsz=128, num_updates=10263, lr=9.99259e-05, gnorm=2.135, loss_scale=1, train_wall=11, gb_free=2.8, wall=117950 2021-06-20 03:24:47 | INFO | train_inner | epoch 004: 1321 / 3002 loss=2.533, ppl=5.79, wps=5844.8, ups=0.09, wpb=64800, bsz=128, num_updates=10264, lr=9.99259e-05, gnorm=2.089, loss_scale=1, train_wall=11, gb_free=2.8, wall=117962 2021-06-20 03:24:58 | INFO | train_inner | epoch 004: 1322 / 3002 loss=2.476, ppl=5.56, wps=5894.9, ups=0.09, wpb=64849, bsz=128, num_updates=10265, lr=9.99259e-05, gnorm=2.062, loss_scale=1, train_wall=11, gb_free=2.8, wall=117973 2021-06-20 03:25:09 | INFO | train_inner | epoch 004: 1323 / 3002 loss=2.547, ppl=5.84, wps=5919.4, ups=0.09, wpb=64832, bsz=128, num_updates=10266, lr=9.99259e-05, gnorm=2.073, loss_scale=1, train_wall=11, gb_free=2.8, wall=117984 2021-06-20 03:25:20 | INFO | train_inner | epoch 004: 1324 / 3002 loss=2.593, ppl=6.03, wps=5928, ups=0.09, wpb=64879, bsz=128, num_updates=10267, lr=9.99259e-05, gnorm=2.111, loss_scale=1, train_wall=10, gb_free=2.8, wall=117994 2021-06-20 03:25:31 | INFO | train_inner | epoch 004: 1325 / 3002 loss=2.543, ppl=5.83, wps=5706.2, ups=0.09, wpb=64810, bsz=128, num_updates=10268, lr=9.99259e-05, gnorm=2.105, loss_scale=1, train_wall=11, gb_free=2.8, wall=118006 2021-06-20 03:25:43 | INFO | train_inner | epoch 004: 1326 / 3002 loss=2.478, ppl=5.57, wps=5831.8, ups=0.09, wpb=64835, bsz=128, num_updates=10269, lr=9.99258e-05, gnorm=2.905, loss_scale=1, train_wall=11, gb_free=2.8, wall=118017 2021-06-20 03:25:54 | INFO | train_inner | epoch 004: 1327 / 3002 loss=2.692, ppl=6.46, wps=5789.3, ups=0.09, wpb=64811, bsz=128, num_updates=10270, lr=9.99258e-05, gnorm=2.232, loss_scale=1, train_wall=11, gb_free=2.8, wall=118028 2021-06-20 03:26:05 | INFO | train_inner | epoch 004: 1328 / 3002 loss=2.601, ppl=6.07, wps=5787.2, ups=0.09, wpb=64817, bsz=128, num_updates=10271, lr=9.99258e-05, gnorm=2.016, loss_scale=1, train_wall=11, gb_free=2.8, wall=118039 2021-06-20 03:26:16 | INFO | train_inner | epoch 004: 1329 / 3002 loss=2.729, ppl=6.63, wps=5735, ups=0.09, wpb=64836, bsz=128, num_updates=10272, lr=9.99258e-05, gnorm=21.78, loss_scale=1, train_wall=11, gb_free=2.8, wall=118051 2021-06-20 03:26:27 | INFO | train_inner | epoch 004: 1330 / 3002 loss=2.626, ppl=6.17, wps=5932.1, ups=0.09, wpb=64890, bsz=128, num_updates=10273, lr=9.99258e-05, gnorm=2.207, loss_scale=1, train_wall=10, gb_free=2.8, wall=118062 2021-06-20 03:26:39 | INFO | train_inner | epoch 004: 1331 / 3002 loss=2.531, ppl=5.78, wps=5745.6, ups=0.09, wpb=64809, bsz=128, num_updates=10274, lr=9.99258e-05, gnorm=2.308, loss_scale=1, train_wall=11, gb_free=2.8, wall=118073 2021-06-20 03:26:50 | INFO | train_inner | epoch 004: 1332 / 3002 loss=2.651, ppl=6.28, wps=5765.7, ups=0.09, wpb=64827, bsz=128, num_updates=10275, lr=9.99258e-05, gnorm=2.522, loss_scale=1, train_wall=11, gb_free=2.8, wall=118084 2021-06-20 03:27:01 | INFO | train_inner | epoch 004: 1333 / 3002 loss=2.718, ppl=6.58, wps=5754.3, ups=0.09, wpb=64820, bsz=128, num_updates=10276, lr=9.99258e-05, gnorm=2.27, loss_scale=1, train_wall=11, gb_free=2.8, wall=118095 2021-06-20 03:27:12 | INFO | train_inner | epoch 004: 1334 / 3002 loss=2.659, ppl=6.32, wps=5974.7, ups=0.09, wpb=64985, bsz=128, num_updates=10277, lr=9.99258e-05, gnorm=2.019, loss_scale=1, train_wall=10, gb_free=2.8, wall=118106 2021-06-20 03:27:23 | INFO | train_inner | epoch 004: 1335 / 3002 loss=2.406, ppl=5.3, wps=5866.8, ups=0.09, wpb=64785, bsz=128, num_updates=10278, lr=9.99258e-05, gnorm=2.11, loss_scale=1, train_wall=11, gb_free=2.8, wall=118117 2021-06-20 03:27:34 | INFO | train_inner | epoch 004: 1336 / 3002 loss=2.649, ppl=6.27, wps=5751.3, ups=0.09, wpb=64903, bsz=128, num_updates=10279, lr=9.99258e-05, gnorm=3.145, loss_scale=1, train_wall=11, gb_free=2.8, wall=118129 2021-06-20 03:27:45 | INFO | train_inner | epoch 004: 1337 / 3002 loss=2.512, ppl=5.71, wps=5823.5, ups=0.09, wpb=64782, bsz=128, num_updates=10280, lr=9.99258e-05, gnorm=2.138, loss_scale=1, train_wall=11, gb_free=2.8, wall=118140 2021-06-20 03:27:56 | INFO | train_inner | epoch 004: 1338 / 3002 loss=2.608, ppl=6.1, wps=5889, ups=0.09, wpb=64844, bsz=128, num_updates=10281, lr=9.99257e-05, gnorm=2.168, loss_scale=1, train_wall=11, gb_free=2.8, wall=118151 2021-06-20 03:28:08 | INFO | train_inner | epoch 004: 1339 / 3002 loss=2.588, ppl=6.01, wps=5741.4, ups=0.09, wpb=64772, bsz=128, num_updates=10282, lr=9.99257e-05, gnorm=2.26, loss_scale=1, train_wall=11, gb_free=2.8, wall=118162 2021-06-20 03:28:19 | INFO | train_inner | epoch 004: 1340 / 3002 loss=2.542, ppl=5.83, wps=5877.2, ups=0.09, wpb=64869, bsz=128, num_updates=10283, lr=9.99257e-05, gnorm=2.228, loss_scale=1, train_wall=11, gb_free=2.8, wall=118173 2021-06-20 03:28:30 | INFO | train_inner | epoch 004: 1341 / 3002 loss=2.551, ppl=5.86, wps=5862.5, ups=0.09, wpb=64888, bsz=128, num_updates=10284, lr=9.99257e-05, gnorm=2.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=118184 2021-06-20 03:28:41 | INFO | train_inner | epoch 004: 1342 / 3002 loss=2.615, ppl=6.13, wps=5953.8, ups=0.09, wpb=64898, bsz=128, num_updates=10285, lr=9.99257e-05, gnorm=2.094, loss_scale=1, train_wall=10, gb_free=2.8, wall=118195 2021-06-20 03:28:52 | INFO | train_inner | epoch 004: 1343 / 3002 loss=2.605, ppl=6.09, wps=5871.7, ups=0.09, wpb=64840, bsz=128, num_updates=10286, lr=9.99257e-05, gnorm=6.703, loss_scale=1, train_wall=11, gb_free=2.8, wall=118206 2021-06-20 03:29:03 | INFO | train_inner | epoch 004: 1344 / 3002 loss=2.462, ppl=5.51, wps=5854.1, ups=0.09, wpb=64854, bsz=128, num_updates=10287, lr=9.99257e-05, gnorm=2.151, loss_scale=1, train_wall=11, gb_free=2.8, wall=118217 2021-06-20 03:29:14 | INFO | train_inner | epoch 004: 1345 / 3002 loss=2.649, ppl=6.27, wps=5787.5, ups=0.09, wpb=64828, bsz=128, num_updates=10288, lr=9.99257e-05, gnorm=2.437, loss_scale=1, train_wall=11, gb_free=2.8, wall=118228 2021-06-20 03:29:25 | INFO | train_inner | epoch 004: 1346 / 3002 loss=2.543, ppl=5.83, wps=5914.4, ups=0.09, wpb=64836, bsz=128, num_updates=10289, lr=9.99257e-05, gnorm=2.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=118239 2021-06-20 03:29:36 | INFO | train_inner | epoch 004: 1347 / 3002 loss=2.551, ppl=5.86, wps=5824.3, ups=0.09, wpb=64820, bsz=128, num_updates=10290, lr=9.99257e-05, gnorm=10.612, loss_scale=1, train_wall=11, gb_free=2.8, wall=118250 2021-06-20 03:29:47 | INFO | train_inner | epoch 004: 1348 / 3002 loss=2.55, ppl=5.86, wps=5926.5, ups=0.09, wpb=64803, bsz=128, num_updates=10291, lr=9.99257e-05, gnorm=2.116, loss_scale=1, train_wall=10, gb_free=2.8, wall=118261 2021-06-20 03:29:58 | INFO | train_inner | epoch 004: 1349 / 3002 loss=2.516, ppl=5.72, wps=5824.2, ups=0.09, wpb=64742, bsz=128, num_updates=10292, lr=9.99257e-05, gnorm=2.102, loss_scale=1, train_wall=11, gb_free=2.8, wall=118272 2021-06-20 03:30:09 | INFO | train_inner | epoch 004: 1350 / 3002 loss=2.395, ppl=5.26, wps=5873.6, ups=0.09, wpb=64847, bsz=128, num_updates=10293, lr=9.99257e-05, gnorm=2.225, loss_scale=1, train_wall=11, gb_free=2.8, wall=118283 2021-06-20 03:30:20 | INFO | train_inner | epoch 004: 1351 / 3002 loss=2.672, ppl=6.37, wps=5770.2, ups=0.09, wpb=64862, bsz=128, num_updates=10294, lr=9.99256e-05, gnorm=2.128, loss_scale=1, train_wall=11, gb_free=2.8, wall=118295 2021-06-20 03:30:31 | INFO | train_inner | epoch 004: 1352 / 3002 loss=2.572, ppl=5.94, wps=5845.6, ups=0.09, wpb=64804, bsz=128, num_updates=10295, lr=9.99256e-05, gnorm=4.899, loss_scale=1, train_wall=11, gb_free=2.8, wall=118306 2021-06-20 03:30:42 | INFO | train_inner | epoch 004: 1353 / 3002 loss=2.658, ppl=6.31, wps=5921, ups=0.09, wpb=64817, bsz=128, num_updates=10296, lr=9.99256e-05, gnorm=2.088, loss_scale=1, train_wall=10, gb_free=2.8, wall=118317 2021-06-20 03:30:54 | INFO | train_inner | epoch 004: 1354 / 3002 loss=2.653, ppl=6.29, wps=5827.5, ups=0.09, wpb=64824, bsz=128, num_updates=10297, lr=9.99256e-05, gnorm=2.084, loss_scale=1, train_wall=11, gb_free=2.8, wall=118328 2021-06-20 03:31:05 | INFO | train_inner | epoch 004: 1355 / 3002 loss=2.517, ppl=5.72, wps=5812.4, ups=0.09, wpb=64816, bsz=128, num_updates=10298, lr=9.99256e-05, gnorm=2.039, loss_scale=1, train_wall=11, gb_free=2.8, wall=118339 2021-06-20 03:31:16 | INFO | train_inner | epoch 004: 1356 / 3002 loss=2.543, ppl=5.83, wps=5778.8, ups=0.09, wpb=64811, bsz=128, num_updates=10299, lr=9.99256e-05, gnorm=2.031, loss_scale=1, train_wall=11, gb_free=2.8, wall=118350 2021-06-20 03:31:27 | INFO | train_inner | epoch 004: 1357 / 3002 loss=2.475, ppl=5.56, wps=5901.3, ups=0.09, wpb=64895, bsz=128, num_updates=10300, lr=9.99256e-05, gnorm=2.004, loss_scale=1, train_wall=11, gb_free=2.8, wall=118361 2021-06-20 03:31:38 | INFO | train_inner | epoch 004: 1358 / 3002 loss=2.716, ppl=6.57, wps=5897.3, ups=0.09, wpb=64763, bsz=128, num_updates=10301, lr=9.99256e-05, gnorm=2.128, loss_scale=1, train_wall=11, gb_free=2.8, wall=118372 2021-06-20 03:31:49 | INFO | train_inner | epoch 004: 1359 / 3002 loss=2.544, ppl=5.83, wps=5852.3, ups=0.09, wpb=64924, bsz=128, num_updates=10302, lr=9.99256e-05, gnorm=2.044, loss_scale=1, train_wall=11, gb_free=2.8, wall=118383 2021-06-20 03:32:00 | INFO | train_inner | epoch 004: 1360 / 3002 loss=2.663, ppl=6.33, wps=5907.8, ups=0.09, wpb=64807, bsz=128, num_updates=10303, lr=9.99256e-05, gnorm=2.124, loss_scale=1, train_wall=11, gb_free=2.8, wall=118394 2021-06-20 03:32:11 | INFO | train_inner | epoch 004: 1361 / 3002 loss=2.661, ppl=6.32, wps=5857.9, ups=0.09, wpb=64795, bsz=128, num_updates=10304, lr=9.99256e-05, gnorm=2.169, loss_scale=1, train_wall=11, gb_free=2.8, wall=118405 2021-06-20 03:32:22 | INFO | train_inner | epoch 004: 1362 / 3002 loss=2.649, ppl=6.27, wps=5787.4, ups=0.09, wpb=64762, bsz=128, num_updates=10305, lr=9.99256e-05, gnorm=2.012, loss_scale=1, train_wall=11, gb_free=2.8, wall=118417 2021-06-20 03:32:33 | INFO | train_inner | epoch 004: 1363 / 3002 loss=2.64, ppl=6.23, wps=5762.7, ups=0.09, wpb=64850, bsz=128, num_updates=10306, lr=9.99255e-05, gnorm=2.003, loss_scale=1, train_wall=11, gb_free=2.8, wall=118428 2021-06-20 03:32:45 | INFO | train_inner | epoch 004: 1364 / 3002 loss=2.791, ppl=6.92, wps=5748.9, ups=0.09, wpb=64859, bsz=128, num_updates=10307, lr=9.99255e-05, gnorm=2.124, loss_scale=1, train_wall=11, gb_free=2.8, wall=118439 2021-06-20 03:32:56 | INFO | train_inner | epoch 004: 1365 / 3002 loss=2.724, ppl=6.61, wps=5857.2, ups=0.09, wpb=64808, bsz=128, num_updates=10308, lr=9.99255e-05, gnorm=2.229, loss_scale=1, train_wall=11, gb_free=2.8, wall=118450 2021-06-20 03:33:07 | INFO | train_inner | epoch 004: 1366 / 3002 loss=2.725, ppl=6.61, wps=5979.4, ups=0.09, wpb=64899, bsz=128, num_updates=10309, lr=9.99255e-05, gnorm=2.175, loss_scale=1, train_wall=10, gb_free=2.8, wall=118461 2021-06-20 03:33:18 | INFO | train_inner | epoch 004: 1367 / 3002 loss=2.429, ppl=5.38, wps=5875.9, ups=0.09, wpb=64907, bsz=128, num_updates=10310, lr=9.99255e-05, gnorm=2.191, loss_scale=1, train_wall=11, gb_free=2.8, wall=118472 2021-06-20 03:33:29 | INFO | train_inner | epoch 004: 1368 / 3002 loss=2.482, ppl=5.59, wps=5848.3, ups=0.09, wpb=64879, bsz=128, num_updates=10311, lr=9.99255e-05, gnorm=3.098, loss_scale=1, train_wall=11, gb_free=2.8, wall=118483 2021-06-20 03:33:40 | INFO | train_inner | epoch 004: 1369 / 3002 loss=2.61, ppl=6.1, wps=5814, ups=0.09, wpb=64867, bsz=128, num_updates=10312, lr=9.99255e-05, gnorm=12.511, loss_scale=1, train_wall=11, gb_free=2.8, wall=118494 2021-06-20 03:33:51 | INFO | train_inner | epoch 004: 1370 / 3002 loss=2.506, ppl=5.68, wps=5841.6, ups=0.09, wpb=64849, bsz=128, num_updates=10313, lr=9.99255e-05, gnorm=2.142, loss_scale=1, train_wall=11, gb_free=2.8, wall=118505 2021-06-20 03:34:02 | INFO | train_inner | epoch 004: 1371 / 3002 loss=2.496, ppl=5.64, wps=5818, ups=0.09, wpb=64774, bsz=128, num_updates=10314, lr=9.99255e-05, gnorm=2.035, loss_scale=1, train_wall=11, gb_free=2.8, wall=118517 2021-06-20 03:34:13 | INFO | train_inner | epoch 004: 1372 / 3002 loss=2.653, ppl=6.29, wps=5951, ups=0.09, wpb=64851, bsz=128, num_updates=10315, lr=9.99255e-05, gnorm=2.174, loss_scale=1, train_wall=10, gb_free=2.8, wall=118527 2021-06-20 03:34:24 | INFO | train_inner | epoch 004: 1373 / 3002 loss=2.676, ppl=6.39, wps=5827.1, ups=0.09, wpb=64846, bsz=128, num_updates=10316, lr=9.99255e-05, gnorm=2.157, loss_scale=1, train_wall=11, gb_free=2.8, wall=118539 2021-06-20 03:34:35 | INFO | train_inner | epoch 004: 1374 / 3002 loss=2.711, ppl=6.55, wps=5840.8, ups=0.09, wpb=64867, bsz=128, num_updates=10317, lr=9.99255e-05, gnorm=2.157, loss_scale=1, train_wall=11, gb_free=2.8, wall=118550 2021-06-20 03:34:46 | INFO | train_inner | epoch 004: 1375 / 3002 loss=2.335, ppl=5.05, wps=5839.3, ups=0.09, wpb=64928, bsz=128, num_updates=10318, lr=9.99255e-05, gnorm=2.077, loss_scale=1, train_wall=11, gb_free=2.8, wall=118561 2021-06-20 03:34:58 | INFO | train_inner | epoch 004: 1376 / 3002 loss=2.556, ppl=5.88, wps=5839.8, ups=0.09, wpb=64870, bsz=128, num_updates=10319, lr=9.99254e-05, gnorm=2.08, loss_scale=1, train_wall=11, gb_free=2.8, wall=118572 2021-06-20 03:35:09 | INFO | train_inner | epoch 004: 1377 / 3002 loss=2.556, ppl=5.88, wps=5834.3, ups=0.09, wpb=64812, bsz=128, num_updates=10320, lr=9.99254e-05, gnorm=2.056, loss_scale=1, train_wall=11, gb_free=2.8, wall=118583 2021-06-20 03:35:20 | INFO | train_inner | epoch 004: 1378 / 3002 loss=2.519, ppl=5.73, wps=5960.1, ups=0.09, wpb=64882, bsz=128, num_updates=10321, lr=9.99254e-05, gnorm=2.026, loss_scale=1, train_wall=10, gb_free=2.8, wall=118594 2021-06-20 03:35:31 | INFO | train_inner | epoch 004: 1379 / 3002 loss=2.552, ppl=5.86, wps=5857.4, ups=0.09, wpb=64842, bsz=128, num_updates=10322, lr=9.99254e-05, gnorm=2.108, loss_scale=1, train_wall=11, gb_free=2.8, wall=118605 2021-06-20 03:35:42 | INFO | train_inner | epoch 004: 1380 / 3002 loss=2.567, ppl=5.93, wps=5887, ups=0.09, wpb=64817, bsz=128, num_updates=10323, lr=9.99254e-05, gnorm=2.064, loss_scale=1, train_wall=11, gb_free=2.8, wall=118616 2021-06-20 03:35:53 | INFO | train_inner | epoch 004: 1381 / 3002 loss=2.799, ppl=6.96, wps=5838.3, ups=0.09, wpb=64853, bsz=128, num_updates=10324, lr=9.99254e-05, gnorm=2.326, loss_scale=1, train_wall=11, gb_free=2.8, wall=118627 2021-06-20 03:36:04 | INFO | train_inner | epoch 004: 1382 / 3002 loss=2.472, ppl=5.55, wps=5726.2, ups=0.09, wpb=64756, bsz=128, num_updates=10325, lr=9.99254e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=118638 2021-06-20 03:36:15 | INFO | train_inner | epoch 004: 1383 / 3002 loss=2.555, ppl=5.88, wps=5867.4, ups=0.09, wpb=64879, bsz=128, num_updates=10326, lr=9.99254e-05, gnorm=2.155, loss_scale=2, train_wall=11, gb_free=2.8, wall=118649 2021-06-20 03:36:26 | INFO | train_inner | epoch 004: 1384 / 3002 loss=2.548, ppl=5.85, wps=5887.4, ups=0.09, wpb=64741, bsz=128, num_updates=10327, lr=9.99254e-05, gnorm=2.17, loss_scale=2, train_wall=11, gb_free=2.8, wall=118660 2021-06-20 03:36:37 | INFO | train_inner | epoch 004: 1385 / 3002 loss=2.898, ppl=7.45, wps=5799.2, ups=0.09, wpb=64831, bsz=128, num_updates=10328, lr=9.99254e-05, gnorm=2.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=118672 2021-06-20 03:36:48 | INFO | train_inner | epoch 004: 1386 / 3002 loss=2.457, ppl=5.49, wps=5886.5, ups=0.09, wpb=64859, bsz=128, num_updates=10329, lr=9.99254e-05, gnorm=1.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=118683 2021-06-20 03:36:59 | INFO | train_inner | epoch 004: 1387 / 3002 loss=2.648, ppl=6.27, wps=5861.3, ups=0.09, wpb=64814, bsz=128, num_updates=10330, lr=9.99254e-05, gnorm=2.164, loss_scale=2, train_wall=11, gb_free=2.8, wall=118694 2021-06-20 03:37:11 | INFO | train_inner | epoch 004: 1388 / 3002 loss=2.718, ppl=6.58, wps=5827.4, ups=0.09, wpb=64849, bsz=128, num_updates=10331, lr=9.99253e-05, gnorm=2.272, loss_scale=2, train_wall=11, gb_free=2.8, wall=118705 2021-06-20 03:37:22 | INFO | train_inner | epoch 004: 1389 / 3002 loss=2.551, ppl=5.86, wps=5745.7, ups=0.09, wpb=64919, bsz=128, num_updates=10332, lr=9.99253e-05, gnorm=2.08, loss_scale=2, train_wall=11, gb_free=2.8, wall=118716 2021-06-20 03:37:33 | INFO | train_inner | epoch 004: 1390 / 3002 loss=2.713, ppl=6.56, wps=5866.6, ups=0.09, wpb=64784, bsz=128, num_updates=10333, lr=9.99253e-05, gnorm=2.044, loss_scale=2, train_wall=11, gb_free=2.8, wall=118727 2021-06-20 03:37:44 | INFO | train_inner | epoch 004: 1391 / 3002 loss=2.622, ppl=6.16, wps=5837.3, ups=0.09, wpb=64793, bsz=128, num_updates=10334, lr=9.99253e-05, gnorm=2.028, loss_scale=2, train_wall=11, gb_free=2.8, wall=118738 2021-06-20 03:37:55 | INFO | train_inner | epoch 004: 1392 / 3002 loss=2.68, ppl=6.41, wps=5923, ups=0.09, wpb=64822, bsz=128, num_updates=10335, lr=9.99253e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=118749 2021-06-20 03:38:06 | INFO | train_inner | epoch 004: 1393 / 3002 loss=2.712, ppl=6.55, wps=5812.6, ups=0.09, wpb=64830, bsz=128, num_updates=10336, lr=9.99253e-05, gnorm=5.41, loss_scale=2, train_wall=11, gb_free=2.8, wall=118760 2021-06-20 03:38:17 | INFO | train_inner | epoch 004: 1394 / 3002 loss=2.601, ppl=6.07, wps=5778.3, ups=0.09, wpb=64730, bsz=128, num_updates=10337, lr=9.99253e-05, gnorm=2.084, loss_scale=2, train_wall=11, gb_free=2.8, wall=118772 2021-06-20 03:38:28 | INFO | train_inner | epoch 004: 1395 / 3002 loss=2.683, ppl=6.42, wps=5907.5, ups=0.09, wpb=64853, bsz=128, num_updates=10338, lr=9.99253e-05, gnorm=4.071, loss_scale=2, train_wall=11, gb_free=2.8, wall=118783 2021-06-20 03:38:39 | INFO | train_inner | epoch 004: 1396 / 3002 loss=2.589, ppl=6.02, wps=5926.6, ups=0.09, wpb=64807, bsz=128, num_updates=10339, lr=9.99253e-05, gnorm=2.029, loss_scale=2, train_wall=10, gb_free=2.8, wall=118793 2021-06-20 03:38:50 | INFO | train_inner | epoch 004: 1397 / 3002 loss=2.781, ppl=6.87, wps=5804.7, ups=0.09, wpb=64793, bsz=128, num_updates=10340, lr=9.99253e-05, gnorm=2.256, loss_scale=2, train_wall=11, gb_free=2.8, wall=118805 2021-06-20 03:39:01 | INFO | train_inner | epoch 004: 1398 / 3002 loss=2.701, ppl=6.5, wps=5828.1, ups=0.09, wpb=64798, bsz=128, num_updates=10341, lr=9.99253e-05, gnorm=2.103, loss_scale=2, train_wall=11, gb_free=2.8, wall=118816 2021-06-20 03:39:13 | INFO | train_inner | epoch 004: 1399 / 3002 loss=2.578, ppl=5.97, wps=5857.6, ups=0.09, wpb=64827, bsz=128, num_updates=10342, lr=9.99253e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=118827 2021-06-20 03:39:24 | INFO | train_inner | epoch 004: 1400 / 3002 loss=2.451, ppl=5.47, wps=5841.6, ups=0.09, wpb=64864, bsz=128, num_updates=10343, lr=9.99253e-05, gnorm=2.007, loss_scale=2, train_wall=11, gb_free=2.8, wall=118838 2021-06-20 03:39:35 | INFO | train_inner | epoch 004: 1401 / 3002 loss=2.59, ppl=6.02, wps=5756.8, ups=0.09, wpb=64809, bsz=128, num_updates=10344, lr=9.99252e-05, gnorm=2.069, loss_scale=2, train_wall=11, gb_free=2.8, wall=118849 2021-06-20 03:39:46 | INFO | train_inner | epoch 004: 1402 / 3002 loss=2.655, ppl=6.3, wps=5916.7, ups=0.09, wpb=64824, bsz=128, num_updates=10345, lr=9.99252e-05, gnorm=2.059, loss_scale=2, train_wall=11, gb_free=2.8, wall=118860 2021-06-20 03:39:57 | INFO | train_inner | epoch 004: 1403 / 3002 loss=2.677, ppl=6.39, wps=5854.1, ups=0.09, wpb=64836, bsz=128, num_updates=10346, lr=9.99252e-05, gnorm=4.031, loss_scale=2, train_wall=11, gb_free=2.8, wall=118871 2021-06-20 03:40:08 | INFO | train_inner | epoch 004: 1404 / 3002 loss=2.504, ppl=5.67, wps=5825.4, ups=0.09, wpb=64841, bsz=128, num_updates=10347, lr=9.99252e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=118882 2021-06-20 03:40:19 | INFO | train_inner | epoch 004: 1405 / 3002 loss=2.502, ppl=5.66, wps=5909.6, ups=0.09, wpb=64890, bsz=128, num_updates=10348, lr=9.99252e-05, gnorm=7.559, loss_scale=2, train_wall=11, gb_free=2.8, wall=118893 2021-06-20 03:40:30 | INFO | train_inner | epoch 004: 1406 / 3002 loss=2.484, ppl=5.59, wps=5801, ups=0.09, wpb=64865, bsz=128, num_updates=10349, lr=9.99252e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=118905 2021-06-20 03:40:41 | INFO | train_inner | epoch 004: 1407 / 3002 loss=2.485, ppl=5.6, wps=5852.4, ups=0.09, wpb=64868, bsz=128, num_updates=10350, lr=9.99252e-05, gnorm=2.019, loss_scale=2, train_wall=11, gb_free=2.8, wall=118916 2021-06-20 03:40:52 | INFO | train_inner | epoch 004: 1408 / 3002 loss=2.662, ppl=6.33, wps=5822.1, ups=0.09, wpb=64850, bsz=128, num_updates=10351, lr=9.99252e-05, gnorm=1.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=118927 2021-06-20 03:41:04 | INFO | train_inner | epoch 004: 1409 / 3002 loss=2.342, ppl=5.07, wps=5742.4, ups=0.09, wpb=64849, bsz=128, num_updates=10352, lr=9.99252e-05, gnorm=1.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=118938 2021-06-20 03:41:15 | INFO | train_inner | epoch 004: 1410 / 3002 loss=2.521, ppl=5.74, wps=5951.4, ups=0.09, wpb=64873, bsz=128, num_updates=10353, lr=9.99252e-05, gnorm=2.109, loss_scale=2, train_wall=10, gb_free=2.8, wall=118949 2021-06-20 03:41:26 | INFO | train_inner | epoch 004: 1411 / 3002 loss=2.585, ppl=6, wps=5814.8, ups=0.09, wpb=64809, bsz=128, num_updates=10354, lr=9.99252e-05, gnorm=3.862, loss_scale=2, train_wall=11, gb_free=2.8, wall=118960 2021-06-20 03:41:37 | INFO | train_inner | epoch 004: 1412 / 3002 loss=2.621, ppl=6.15, wps=5793.3, ups=0.09, wpb=64856, bsz=128, num_updates=10355, lr=9.99252e-05, gnorm=2.369, loss_scale=2, train_wall=11, gb_free=2.8, wall=118971 2021-06-20 03:41:48 | INFO | train_inner | epoch 004: 1413 / 3002 loss=2.557, ppl=5.88, wps=5926, ups=0.09, wpb=64836, bsz=128, num_updates=10356, lr=9.99251e-05, gnorm=2.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=118982 2021-06-20 03:41:59 | INFO | train_inner | epoch 004: 1414 / 3002 loss=2.607, ppl=6.09, wps=5665.3, ups=0.09, wpb=64811, bsz=128, num_updates=10357, lr=9.99251e-05, gnorm=2.105, loss_scale=2, train_wall=11, gb_free=2.8, wall=118994 2021-06-20 03:42:10 | INFO | train_inner | epoch 004: 1415 / 3002 loss=2.589, ppl=6.02, wps=5896.8, ups=0.09, wpb=64887, bsz=128, num_updates=10358, lr=9.99251e-05, gnorm=2.081, loss_scale=2, train_wall=11, gb_free=2.8, wall=119005 2021-06-20 03:42:21 | INFO | train_inner | epoch 004: 1416 / 3002 loss=2.439, ppl=5.42, wps=5810.7, ups=0.09, wpb=64785, bsz=128, num_updates=10359, lr=9.99251e-05, gnorm=2.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=119016 2021-06-20 03:42:33 | INFO | train_inner | epoch 004: 1417 / 3002 loss=2.61, ppl=6.1, wps=5784.3, ups=0.09, wpb=64804, bsz=128, num_updates=10360, lr=9.99251e-05, gnorm=2.126, loss_scale=2, train_wall=11, gb_free=2.8, wall=119027 2021-06-20 03:42:44 | INFO | train_inner | epoch 004: 1418 / 3002 loss=2.659, ppl=6.32, wps=5803.6, ups=0.09, wpb=64764, bsz=128, num_updates=10361, lr=9.99251e-05, gnorm=2.247, loss_scale=2, train_wall=11, gb_free=2.8, wall=119038 2021-06-20 03:42:55 | INFO | train_inner | epoch 004: 1419 / 3002 loss=2.464, ppl=5.52, wps=5926.4, ups=0.09, wpb=64891, bsz=128, num_updates=10362, lr=9.99251e-05, gnorm=1.976, loss_scale=2, train_wall=10, gb_free=2.8, wall=119049 2021-06-20 03:43:06 | INFO | train_inner | epoch 004: 1420 / 3002 loss=2.702, ppl=6.51, wps=5799.6, ups=0.09, wpb=64870, bsz=128, num_updates=10363, lr=9.99251e-05, gnorm=1.996, loss_scale=2, train_wall=11, gb_free=2.8, wall=119060 2021-06-20 03:43:17 | INFO | train_inner | epoch 004: 1421 / 3002 loss=2.615, ppl=6.13, wps=5895.1, ups=0.09, wpb=64744, bsz=128, num_updates=10364, lr=9.99251e-05, gnorm=2.033, loss_scale=2, train_wall=11, gb_free=2.8, wall=119071 2021-06-20 03:43:28 | INFO | train_inner | epoch 004: 1422 / 3002 loss=2.612, ppl=6.11, wps=5747.9, ups=0.09, wpb=64872, bsz=128, num_updates=10365, lr=9.99251e-05, gnorm=2.151, loss_scale=2, train_wall=11, gb_free=2.8, wall=119083 2021-06-20 03:43:39 | INFO | train_inner | epoch 004: 1423 / 3002 loss=2.538, ppl=5.81, wps=5956.3, ups=0.09, wpb=64922, bsz=128, num_updates=10366, lr=9.99251e-05, gnorm=2.043, loss_scale=2, train_wall=10, gb_free=2.8, wall=119093 2021-06-20 03:43:50 | INFO | train_inner | epoch 004: 1424 / 3002 loss=2.649, ppl=6.27, wps=5819.4, ups=0.09, wpb=64813, bsz=128, num_updates=10367, lr=9.99251e-05, gnorm=2.068, loss_scale=2, train_wall=11, gb_free=2.8, wall=119105 2021-06-20 03:44:01 | INFO | train_inner | epoch 004: 1425 / 3002 loss=2.442, ppl=5.43, wps=5823.4, ups=0.09, wpb=64790, bsz=128, num_updates=10368, lr=9.99251e-05, gnorm=2.023, loss_scale=2, train_wall=11, gb_free=2.8, wall=119116 2021-06-20 03:44:13 | INFO | train_inner | epoch 004: 1426 / 3002 loss=2.552, ppl=5.86, wps=5795, ups=0.09, wpb=64813, bsz=128, num_updates=10369, lr=9.9925e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=119127 2021-06-20 03:44:24 | INFO | train_inner | epoch 004: 1427 / 3002 loss=2.669, ppl=6.36, wps=5770.8, ups=0.09, wpb=64731, bsz=128, num_updates=10370, lr=9.9925e-05, gnorm=2.023, loss_scale=2, train_wall=11, gb_free=2.8, wall=119138 2021-06-20 03:44:35 | INFO | train_inner | epoch 004: 1428 / 3002 loss=2.669, ppl=6.36, wps=5810.7, ups=0.09, wpb=64810, bsz=128, num_updates=10371, lr=9.9925e-05, gnorm=2.009, loss_scale=2, train_wall=11, gb_free=2.8, wall=119149 2021-06-20 03:44:46 | INFO | train_inner | epoch 004: 1429 / 3002 loss=2.655, ppl=6.3, wps=5943.5, ups=0.09, wpb=64805, bsz=128, num_updates=10372, lr=9.9925e-05, gnorm=2.034, loss_scale=2, train_wall=10, gb_free=2.8, wall=119160 2021-06-20 03:44:57 | INFO | train_inner | epoch 004: 1430 / 3002 loss=2.599, ppl=6.06, wps=5741.1, ups=0.09, wpb=64751, bsz=128, num_updates=10373, lr=9.9925e-05, gnorm=2.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=119171 2021-06-20 03:45:08 | INFO | train_inner | epoch 004: 1431 / 3002 loss=2.545, ppl=5.84, wps=5749.8, ups=0.09, wpb=64779, bsz=128, num_updates=10374, lr=9.9925e-05, gnorm=1.992, loss_scale=2, train_wall=11, gb_free=2.8, wall=119183 2021-06-20 03:45:19 | INFO | train_inner | epoch 004: 1432 / 3002 loss=2.52, ppl=5.74, wps=5986.6, ups=0.09, wpb=64892, bsz=128, num_updates=10375, lr=9.9925e-05, gnorm=1.998, loss_scale=2, train_wall=10, gb_free=2.8, wall=119194 2021-06-20 03:45:31 | INFO | train_inner | epoch 004: 1433 / 3002 loss=2.671, ppl=6.37, wps=5757.2, ups=0.09, wpb=64747, bsz=128, num_updates=10376, lr=9.9925e-05, gnorm=2.004, loss_scale=2, train_wall=11, gb_free=2.8, wall=119205 2021-06-20 03:45:42 | INFO | train_inner | epoch 004: 1434 / 3002 loss=2.646, ppl=6.26, wps=5789.3, ups=0.09, wpb=64832, bsz=128, num_updates=10377, lr=9.9925e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=119216 2021-06-20 03:45:53 | INFO | train_inner | epoch 004: 1435 / 3002 loss=2.573, ppl=5.95, wps=5875.6, ups=0.09, wpb=64839, bsz=128, num_updates=10378, lr=9.9925e-05, gnorm=2.013, loss_scale=2, train_wall=11, gb_free=2.8, wall=119227 2021-06-20 03:46:04 | INFO | train_inner | epoch 004: 1436 / 3002 loss=2.647, ppl=6.27, wps=5903.4, ups=0.09, wpb=64820, bsz=128, num_updates=10379, lr=9.9925e-05, gnorm=2.059, loss_scale=2, train_wall=11, gb_free=2.8, wall=119238 2021-06-20 03:46:15 | INFO | train_inner | epoch 004: 1437 / 3002 loss=2.567, ppl=5.93, wps=5890.1, ups=0.09, wpb=64841, bsz=128, num_updates=10380, lr=9.9925e-05, gnorm=1.956, loss_scale=2, train_wall=11, gb_free=2.8, wall=119249 2021-06-20 03:46:26 | INFO | train_inner | epoch 004: 1438 / 3002 loss=2.413, ppl=5.33, wps=5899.1, ups=0.09, wpb=64909, bsz=128, num_updates=10381, lr=9.99249e-05, gnorm=2.017, loss_scale=2, train_wall=11, gb_free=2.8, wall=119260 2021-06-20 03:46:37 | INFO | train_inner | epoch 004: 1439 / 3002 loss=2.563, ppl=5.91, wps=5937.4, ups=0.09, wpb=64951, bsz=128, num_updates=10382, lr=9.99249e-05, gnorm=2.064, loss_scale=2, train_wall=10, gb_free=2.8, wall=119271 2021-06-20 03:46:48 | INFO | train_inner | epoch 004: 1440 / 3002 loss=2.455, ppl=5.48, wps=5940.5, ups=0.09, wpb=64932, bsz=128, num_updates=10383, lr=9.99249e-05, gnorm=2.1, loss_scale=2, train_wall=10, gb_free=2.8, wall=119282 2021-06-20 03:46:59 | INFO | train_inner | epoch 004: 1441 / 3002 loss=2.604, ppl=6.08, wps=5753.7, ups=0.09, wpb=64749, bsz=128, num_updates=10384, lr=9.99249e-05, gnorm=2.097, loss_scale=2, train_wall=11, gb_free=2.8, wall=119293 2021-06-20 03:47:10 | INFO | train_inner | epoch 004: 1442 / 3002 loss=2.597, ppl=6.05, wps=5956.2, ups=0.09, wpb=64857, bsz=128, num_updates=10385, lr=9.99249e-05, gnorm=2.056, loss_scale=2, train_wall=10, gb_free=2.8, wall=119304 2021-06-20 03:47:21 | INFO | train_inner | epoch 004: 1443 / 3002 loss=2.606, ppl=6.09, wps=5819.8, ups=0.09, wpb=64843, bsz=128, num_updates=10386, lr=9.99249e-05, gnorm=2.102, loss_scale=2, train_wall=11, gb_free=2.8, wall=119315 2021-06-20 03:47:32 | INFO | train_inner | epoch 004: 1444 / 3002 loss=2.518, ppl=5.73, wps=5929.7, ups=0.09, wpb=64832, bsz=128, num_updates=10387, lr=9.99249e-05, gnorm=2.018, loss_scale=2, train_wall=10, gb_free=2.8, wall=119326 2021-06-20 03:47:43 | INFO | train_inner | epoch 004: 1445 / 3002 loss=2.672, ppl=6.37, wps=5895.6, ups=0.09, wpb=64841, bsz=128, num_updates=10388, lr=9.99249e-05, gnorm=2.225, loss_scale=2, train_wall=11, gb_free=2.8, wall=119337 2021-06-20 03:47:54 | INFO | train_inner | epoch 004: 1446 / 3002 loss=2.592, ppl=6.03, wps=5787.3, ups=0.09, wpb=64815, bsz=128, num_updates=10389, lr=9.99249e-05, gnorm=1.957, loss_scale=2, train_wall=11, gb_free=2.8, wall=119348 2021-06-20 03:48:05 | INFO | train_inner | epoch 004: 1447 / 3002 loss=2.539, ppl=5.81, wps=5824.1, ups=0.09, wpb=64867, bsz=128, num_updates=10390, lr=9.99249e-05, gnorm=2.004, loss_scale=2, train_wall=11, gb_free=2.8, wall=119359 2021-06-20 03:48:16 | INFO | train_inner | epoch 004: 1448 / 3002 loss=2.483, ppl=5.59, wps=5796.7, ups=0.09, wpb=64813, bsz=128, num_updates=10391, lr=9.99249e-05, gnorm=2.004, loss_scale=2, train_wall=11, gb_free=2.8, wall=119371 2021-06-20 03:48:27 | INFO | train_inner | epoch 004: 1449 / 3002 loss=2.66, ppl=6.32, wps=5930.8, ups=0.09, wpb=64846, bsz=128, num_updates=10392, lr=9.99249e-05, gnorm=2.186, loss_scale=2, train_wall=11, gb_free=2.8, wall=119382 2021-06-20 03:48:38 | INFO | train_inner | epoch 004: 1450 / 3002 loss=2.586, ppl=6, wps=5816.5, ups=0.09, wpb=64842, bsz=128, num_updates=10393, lr=9.99248e-05, gnorm=2.057, loss_scale=2, train_wall=11, gb_free=2.8, wall=119393 2021-06-20 03:48:50 | INFO | train_inner | epoch 004: 1451 / 3002 loss=2.545, ppl=5.84, wps=5820, ups=0.09, wpb=64891, bsz=128, num_updates=10394, lr=9.99248e-05, gnorm=2.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=119404 2021-06-20 03:49:01 | INFO | train_inner | epoch 004: 1452 / 3002 loss=2.513, ppl=5.71, wps=5838, ups=0.09, wpb=64783, bsz=128, num_updates=10395, lr=9.99248e-05, gnorm=2.085, loss_scale=2, train_wall=11, gb_free=2.8, wall=119415 2021-06-20 03:49:12 | INFO | train_inner | epoch 004: 1453 / 3002 loss=2.486, ppl=5.6, wps=5922, ups=0.09, wpb=64882, bsz=128, num_updates=10396, lr=9.99248e-05, gnorm=2.04, loss_scale=2, train_wall=10, gb_free=2.8, wall=119426 2021-06-20 03:49:23 | INFO | train_inner | epoch 004: 1454 / 3002 loss=2.645, ppl=6.25, wps=5873.3, ups=0.09, wpb=64819, bsz=128, num_updates=10397, lr=9.99248e-05, gnorm=2.002, loss_scale=2, train_wall=11, gb_free=2.8, wall=119437 2021-06-20 03:49:34 | INFO | train_inner | epoch 004: 1455 / 3002 loss=2.48, ppl=5.58, wps=5852.7, ups=0.09, wpb=64883, bsz=128, num_updates=10398, lr=9.99248e-05, gnorm=1.989, loss_scale=2, train_wall=11, gb_free=2.8, wall=119448 2021-06-20 03:49:45 | INFO | train_inner | epoch 004: 1456 / 3002 loss=2.396, ppl=5.26, wps=5867.3, ups=0.09, wpb=64841, bsz=128, num_updates=10399, lr=9.99248e-05, gnorm=2.017, loss_scale=2, train_wall=11, gb_free=2.8, wall=119459 2021-06-20 03:49:56 | INFO | train_inner | epoch 004: 1457 / 3002 loss=2.692, ppl=6.46, wps=5750.6, ups=0.09, wpb=64711, bsz=128, num_updates=10400, lr=9.99248e-05, gnorm=2.028, loss_scale=2, train_wall=11, gb_free=2.8, wall=119470 2021-06-20 03:50:07 | INFO | train_inner | epoch 004: 1458 / 3002 loss=2.459, ppl=5.5, wps=5780, ups=0.09, wpb=64823, bsz=128, num_updates=10401, lr=9.99248e-05, gnorm=2.007, loss_scale=2, train_wall=11, gb_free=2.8, wall=119482 2021-06-20 03:50:18 | INFO | train_inner | epoch 004: 1459 / 3002 loss=2.557, ppl=5.88, wps=5866, ups=0.09, wpb=64899, bsz=128, num_updates=10402, lr=9.99248e-05, gnorm=2.139, loss_scale=2, train_wall=11, gb_free=2.8, wall=119493 2021-06-20 03:50:29 | INFO | train_inner | epoch 004: 1460 / 3002 loss=2.708, ppl=6.53, wps=5915.4, ups=0.09, wpb=64837, bsz=128, num_updates=10403, lr=9.99248e-05, gnorm=2.106, loss_scale=2, train_wall=11, gb_free=2.8, wall=119504 2021-06-20 03:50:40 | INFO | train_inner | epoch 004: 1461 / 3002 loss=2.635, ppl=6.21, wps=5834.2, ups=0.09, wpb=64792, bsz=128, num_updates=10404, lr=9.99248e-05, gnorm=1.944, loss_scale=2, train_wall=11, gb_free=2.8, wall=119515 2021-06-20 03:50:51 | INFO | train_inner | epoch 004: 1462 / 3002 loss=2.541, ppl=5.82, wps=5894, ups=0.09, wpb=64902, bsz=128, num_updates=10405, lr=9.99248e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=119526 2021-06-20 03:51:02 | INFO | train_inner | epoch 004: 1463 / 3002 loss=2.485, ppl=5.6, wps=5911.8, ups=0.09, wpb=64803, bsz=128, num_updates=10406, lr=9.99247e-05, gnorm=1.997, loss_scale=2, train_wall=10, gb_free=2.8, wall=119537 2021-06-20 03:51:14 | INFO | train_inner | epoch 004: 1464 / 3002 loss=2.549, ppl=5.85, wps=5786.2, ups=0.09, wpb=64892, bsz=128, num_updates=10407, lr=9.99247e-05, gnorm=2.04, loss_scale=2, train_wall=11, gb_free=2.8, wall=119548 2021-06-20 03:51:25 | INFO | train_inner | epoch 004: 1465 / 3002 loss=2.46, ppl=5.5, wps=5838.5, ups=0.09, wpb=64787, bsz=128, num_updates=10408, lr=9.99247e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=119559 2021-06-20 03:51:36 | INFO | train_inner | epoch 004: 1466 / 3002 loss=2.766, ppl=6.8, wps=5864.3, ups=0.09, wpb=64728, bsz=128, num_updates=10409, lr=9.99247e-05, gnorm=2.111, loss_scale=2, train_wall=11, gb_free=2.8, wall=119570 2021-06-20 03:51:47 | INFO | train_inner | epoch 004: 1467 / 3002 loss=2.676, ppl=6.39, wps=5836.4, ups=0.09, wpb=64824, bsz=128, num_updates=10410, lr=9.99247e-05, gnorm=2.087, loss_scale=2, train_wall=11, gb_free=2.8, wall=119581 2021-06-20 03:51:58 | INFO | train_inner | epoch 004: 1468 / 3002 loss=2.755, ppl=6.75, wps=5835.8, ups=0.09, wpb=64690, bsz=128, num_updates=10411, lr=9.99247e-05, gnorm=1.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=119592 2021-06-20 03:52:09 | INFO | train_inner | epoch 004: 1469 / 3002 loss=2.647, ppl=6.26, wps=5801.7, ups=0.09, wpb=64857, bsz=128, num_updates=10412, lr=9.99247e-05, gnorm=2.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=119603 2021-06-20 03:52:20 | INFO | train_inner | epoch 004: 1470 / 3002 loss=2.563, ppl=5.91, wps=5697.5, ups=0.09, wpb=64851, bsz=128, num_updates=10413, lr=9.99247e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=119615 2021-06-20 03:52:31 | INFO | train_inner | epoch 004: 1471 / 3002 loss=2.492, ppl=5.63, wps=5910.5, ups=0.09, wpb=64763, bsz=128, num_updates=10414, lr=9.99247e-05, gnorm=2.036, loss_scale=2, train_wall=11, gb_free=2.8, wall=119626 2021-06-20 03:52:42 | INFO | train_inner | epoch 004: 1472 / 3002 loss=2.46, ppl=5.5, wps=5915, ups=0.09, wpb=64860, bsz=128, num_updates=10415, lr=9.99247e-05, gnorm=2.128, loss_scale=2, train_wall=10, gb_free=2.8, wall=119637 2021-06-20 03:52:53 | INFO | train_inner | epoch 004: 1473 / 3002 loss=2.545, ppl=5.84, wps=6011.5, ups=0.09, wpb=64836, bsz=128, num_updates=10416, lr=9.99247e-05, gnorm=2.09, loss_scale=2, train_wall=10, gb_free=2.8, wall=119648 2021-06-20 03:53:04 | INFO | train_inner | epoch 004: 1474 / 3002 loss=2.77, ppl=6.82, wps=6031.8, ups=0.09, wpb=64836, bsz=128, num_updates=10417, lr=9.99247e-05, gnorm=2.121, loss_scale=2, train_wall=10, gb_free=2.8, wall=119658 2021-06-20 03:53:15 | INFO | train_inner | epoch 004: 1475 / 3002 loss=2.616, ppl=6.13, wps=5815.4, ups=0.09, wpb=64744, bsz=128, num_updates=10418, lr=9.99246e-05, gnorm=2.446, loss_scale=2, train_wall=11, gb_free=2.8, wall=119669 2021-06-20 03:53:26 | INFO | train_inner | epoch 004: 1476 / 3002 loss=2.523, ppl=5.75, wps=5983.3, ups=0.09, wpb=64834, bsz=128, num_updates=10419, lr=9.99246e-05, gnorm=2.161, loss_scale=2, train_wall=10, gb_free=2.8, wall=119680 2021-06-20 03:53:37 | INFO | train_inner | epoch 004: 1477 / 3002 loss=2.569, ppl=5.93, wps=5757.3, ups=0.09, wpb=64767, bsz=128, num_updates=10420, lr=9.99246e-05, gnorm=2.222, loss_scale=2, train_wall=11, gb_free=2.8, wall=119691 2021-06-20 03:53:48 | INFO | train_inner | epoch 004: 1478 / 3002 loss=2.679, ppl=6.4, wps=5870.3, ups=0.09, wpb=64892, bsz=128, num_updates=10421, lr=9.99246e-05, gnorm=2.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=119703 2021-06-20 03:53:59 | INFO | train_inner | epoch 004: 1479 / 3002 loss=2.447, ppl=5.45, wps=5933.6, ups=0.09, wpb=64849, bsz=128, num_updates=10422, lr=9.99246e-05, gnorm=1.968, loss_scale=2, train_wall=10, gb_free=2.8, wall=119713 2021-06-20 03:54:10 | INFO | train_inner | epoch 004: 1480 / 3002 loss=2.639, ppl=6.23, wps=5790.3, ups=0.09, wpb=64863, bsz=128, num_updates=10423, lr=9.99246e-05, gnorm=2.025, loss_scale=2, train_wall=11, gb_free=2.8, wall=119725 2021-06-20 03:54:21 | INFO | train_inner | epoch 004: 1481 / 3002 loss=2.608, ppl=6.1, wps=5835.1, ups=0.09, wpb=64853, bsz=128, num_updates=10424, lr=9.99246e-05, gnorm=1.959, loss_scale=2, train_wall=11, gb_free=2.8, wall=119736 2021-06-20 03:54:33 | INFO | train_inner | epoch 004: 1482 / 3002 loss=2.656, ppl=6.3, wps=5773, ups=0.09, wpb=64847, bsz=128, num_updates=10425, lr=9.99246e-05, gnorm=2.266, loss_scale=2, train_wall=11, gb_free=2.8, wall=119747 2021-06-20 03:54:44 | INFO | train_inner | epoch 004: 1483 / 3002 loss=2.634, ppl=6.21, wps=5936.1, ups=0.09, wpb=64799, bsz=128, num_updates=10426, lr=9.99246e-05, gnorm=2.03, loss_scale=2, train_wall=10, gb_free=2.8, wall=119758 2021-06-20 03:54:55 | INFO | train_inner | epoch 004: 1484 / 3002 loss=2.692, ppl=6.46, wps=5803.8, ups=0.09, wpb=64681, bsz=128, num_updates=10427, lr=9.99246e-05, gnorm=2.061, loss_scale=2, train_wall=11, gb_free=2.8, wall=119769 2021-06-20 03:55:06 | INFO | train_inner | epoch 004: 1485 / 3002 loss=2.53, ppl=5.77, wps=5891.8, ups=0.09, wpb=64860, bsz=128, num_updates=10428, lr=9.99246e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=119780 2021-06-20 03:55:17 | INFO | train_inner | epoch 004: 1486 / 3002 loss=2.758, ppl=6.77, wps=5765, ups=0.09, wpb=64848, bsz=128, num_updates=10429, lr=9.99246e-05, gnorm=2.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=119791 2021-06-20 03:55:28 | INFO | train_inner | epoch 004: 1487 / 3002 loss=2.66, ppl=6.32, wps=5857.7, ups=0.09, wpb=64715, bsz=128, num_updates=10430, lr=9.99246e-05, gnorm=2.104, loss_scale=2, train_wall=11, gb_free=2.8, wall=119802 2021-06-20 03:55:39 | INFO | train_inner | epoch 004: 1488 / 3002 loss=2.586, ppl=6.01, wps=5773.9, ups=0.09, wpb=64777, bsz=128, num_updates=10431, lr=9.99245e-05, gnorm=2.008, loss_scale=2, train_wall=11, gb_free=2.8, wall=119814 2021-06-20 03:55:50 | INFO | train_inner | epoch 004: 1489 / 3002 loss=2.577, ppl=5.97, wps=5851.2, ups=0.09, wpb=64771, bsz=128, num_updates=10432, lr=9.99245e-05, gnorm=2.037, loss_scale=2, train_wall=11, gb_free=2.8, wall=119825 2021-06-20 03:56:01 | INFO | train_inner | epoch 004: 1490 / 3002 loss=2.49, ppl=5.62, wps=5999.2, ups=0.09, wpb=64905, bsz=128, num_updates=10433, lr=9.99245e-05, gnorm=2.065, loss_scale=2, train_wall=10, gb_free=2.8, wall=119836 2021-06-20 03:56:12 | INFO | train_inner | epoch 004: 1491 / 3002 loss=2.662, ppl=6.33, wps=5911.1, ups=0.09, wpb=64875, bsz=128, num_updates=10434, lr=9.99245e-05, gnorm=2.12, loss_scale=2, train_wall=11, gb_free=2.8, wall=119846 2021-06-20 03:56:23 | INFO | train_inner | epoch 004: 1492 / 3002 loss=2.47, ppl=5.54, wps=5768.7, ups=0.09, wpb=64771, bsz=128, num_updates=10435, lr=9.99245e-05, gnorm=2.026, loss_scale=2, train_wall=11, gb_free=2.8, wall=119858 2021-06-20 03:56:34 | INFO | train_inner | epoch 004: 1493 / 3002 loss=2.721, ppl=6.59, wps=5862.9, ups=0.09, wpb=64775, bsz=128, num_updates=10436, lr=9.99245e-05, gnorm=2.096, loss_scale=2, train_wall=11, gb_free=2.8, wall=119869 2021-06-20 03:56:46 | INFO | train_inner | epoch 004: 1494 / 3002 loss=2.608, ppl=6.1, wps=5737.9, ups=0.09, wpb=64771, bsz=128, num_updates=10437, lr=9.99245e-05, gnorm=2.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=119880 2021-06-20 03:56:57 | INFO | train_inner | epoch 004: 1495 / 3002 loss=2.547, ppl=5.84, wps=5832.7, ups=0.09, wpb=64865, bsz=128, num_updates=10438, lr=9.99245e-05, gnorm=2.153, loss_scale=2, train_wall=11, gb_free=2.8, wall=119891 2021-06-20 03:57:08 | INFO | train_inner | epoch 004: 1496 / 3002 loss=2.625, ppl=6.17, wps=5750.7, ups=0.09, wpb=64884, bsz=128, num_updates=10439, lr=9.99245e-05, gnorm=2.142, loss_scale=2, train_wall=11, gb_free=2.8, wall=119902 2021-06-20 03:57:19 | INFO | train_inner | epoch 004: 1497 / 3002 loss=2.586, ppl=6.01, wps=5779.6, ups=0.09, wpb=64839, bsz=128, num_updates=10440, lr=9.99245e-05, gnorm=2.088, loss_scale=2, train_wall=11, gb_free=2.8, wall=119914 2021-06-20 03:57:31 | INFO | train_inner | epoch 004: 1498 / 3002 loss=2.525, ppl=5.75, wps=5754, ups=0.09, wpb=64841, bsz=128, num_updates=10441, lr=9.99245e-05, gnorm=2.006, loss_scale=2, train_wall=11, gb_free=2.8, wall=119925 2021-06-20 03:57:42 | INFO | train_inner | epoch 004: 1499 / 3002 loss=2.579, ppl=5.97, wps=5750.7, ups=0.09, wpb=64797, bsz=128, num_updates=10442, lr=9.99245e-05, gnorm=2.026, loss_scale=2, train_wall=11, gb_free=2.8, wall=119936 2021-06-20 03:57:53 | INFO | train_inner | epoch 004: 1500 / 3002 loss=2.549, ppl=5.85, wps=5886.3, ups=0.09, wpb=64918, bsz=128, num_updates=10443, lr=9.99244e-05, gnorm=1.999, loss_scale=2, train_wall=11, gb_free=2.8, wall=119947 2021-06-20 03:58:04 | INFO | train_inner | epoch 004: 1501 / 3002 loss=2.634, ppl=6.21, wps=5735.2, ups=0.09, wpb=64703, bsz=128, num_updates=10444, lr=9.99244e-05, gnorm=2.028, loss_scale=2, train_wall=11, gb_free=2.8, wall=119959 2021-06-20 03:58:16 | INFO | train_inner | epoch 004: 1502 / 3002 loss=2.522, ppl=5.75, wps=5700.1, ups=0.09, wpb=64856, bsz=128, num_updates=10445, lr=9.99244e-05, gnorm=4.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=119970 2021-06-20 03:58:26 | INFO | train_inner | epoch 004: 1503 / 3002 loss=2.68, ppl=6.41, wps=5929, ups=0.09, wpb=64841, bsz=128, num_updates=10446, lr=9.99244e-05, gnorm=2.057, loss_scale=2, train_wall=10, gb_free=2.8, wall=119981 2021-06-20 03:58:38 | INFO | train_inner | epoch 004: 1504 / 3002 loss=2.599, ppl=6.06, wps=5857.7, ups=0.09, wpb=64835, bsz=128, num_updates=10447, lr=9.99244e-05, gnorm=2.03, loss_scale=2, train_wall=11, gb_free=2.8, wall=119992 2021-06-20 03:58:49 | INFO | train_inner | epoch 004: 1505 / 3002 loss=2.573, ppl=5.95, wps=5757.2, ups=0.09, wpb=64840, bsz=128, num_updates=10448, lr=9.99244e-05, gnorm=2.243, loss_scale=2, train_wall=11, gb_free=2.8, wall=120003 2021-06-20 03:59:00 | INFO | train_inner | epoch 004: 1506 / 3002 loss=2.611, ppl=6.11, wps=5935.2, ups=0.09, wpb=64873, bsz=128, num_updates=10449, lr=9.99244e-05, gnorm=2.072, loss_scale=2, train_wall=10, gb_free=2.8, wall=120014 2021-06-20 03:59:11 | INFO | train_inner | epoch 004: 1507 / 3002 loss=2.492, ppl=5.63, wps=5938.5, ups=0.09, wpb=64861, bsz=128, num_updates=10450, lr=9.99244e-05, gnorm=2.023, loss_scale=2, train_wall=10, gb_free=2.8, wall=120025 2021-06-20 03:59:22 | INFO | train_inner | epoch 004: 1508 / 3002 loss=2.558, ppl=5.89, wps=5768.2, ups=0.09, wpb=64849, bsz=128, num_updates=10451, lr=9.99244e-05, gnorm=2.051, loss_scale=2, train_wall=11, gb_free=2.8, wall=120036 2021-06-20 03:59:33 | INFO | train_inner | epoch 004: 1509 / 3002 loss=2.548, ppl=5.85, wps=5817.2, ups=0.09, wpb=64797, bsz=128, num_updates=10452, lr=9.99244e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=120047 2021-06-20 03:59:44 | INFO | train_inner | epoch 004: 1510 / 3002 loss=2.696, ppl=6.48, wps=5835.7, ups=0.09, wpb=64800, bsz=128, num_updates=10453, lr=9.99244e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=120059 2021-06-20 03:59:55 | INFO | train_inner | epoch 004: 1511 / 3002 loss=2.493, ppl=5.63, wps=5877.5, ups=0.09, wpb=64798, bsz=128, num_updates=10454, lr=9.99244e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=120070 2021-06-20 04:00:06 | INFO | train_inner | epoch 004: 1512 / 3002 loss=2.435, ppl=5.41, wps=5764.8, ups=0.09, wpb=64761, bsz=128, num_updates=10455, lr=9.99244e-05, gnorm=2.155, loss_scale=4, train_wall=11, gb_free=2.8, wall=120081 2021-06-20 04:00:18 | INFO | train_inner | epoch 004: 1513 / 3002 loss=2.569, ppl=5.94, wps=5846.1, ups=0.09, wpb=64874, bsz=128, num_updates=10456, lr=9.99243e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=120092 2021-06-20 04:00:29 | INFO | train_inner | epoch 004: 1514 / 3002 loss=2.466, ppl=5.52, wps=5880, ups=0.09, wpb=64829, bsz=128, num_updates=10457, lr=9.99243e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=120103 2021-06-20 04:00:40 | INFO | train_inner | epoch 004: 1515 / 3002 loss=2.676, ppl=6.39, wps=5828.8, ups=0.09, wpb=64782, bsz=128, num_updates=10458, lr=9.99243e-05, gnorm=2.02, loss_scale=4, train_wall=11, gb_free=2.8, wall=120114 2021-06-20 04:00:51 | INFO | train_inner | epoch 004: 1516 / 3002 loss=2.607, ppl=6.09, wps=5814.8, ups=0.09, wpb=64839, bsz=128, num_updates=10459, lr=9.99243e-05, gnorm=2.028, loss_scale=4, train_wall=11, gb_free=2.8, wall=120125 2021-06-20 04:01:02 | INFO | train_inner | epoch 004: 1517 / 3002 loss=2.628, ppl=6.18, wps=5882, ups=0.09, wpb=64770, bsz=128, num_updates=10460, lr=9.99243e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=120136 2021-06-20 04:01:13 | INFO | train_inner | epoch 004: 1518 / 3002 loss=2.504, ppl=5.67, wps=5868.4, ups=0.09, wpb=64786, bsz=128, num_updates=10461, lr=9.99243e-05, gnorm=1.981, loss_scale=4, train_wall=11, gb_free=2.8, wall=120147 2021-06-20 04:01:24 | INFO | train_inner | epoch 004: 1519 / 3002 loss=2.666, ppl=6.35, wps=5840.5, ups=0.09, wpb=64829, bsz=128, num_updates=10462, lr=9.99243e-05, gnorm=2.047, loss_scale=4, train_wall=11, gb_free=2.8, wall=120158 2021-06-20 04:01:35 | INFO | train_inner | epoch 004: 1520 / 3002 loss=2.61, ppl=6.11, wps=5831.4, ups=0.09, wpb=64837, bsz=128, num_updates=10463, lr=9.99243e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=120169 2021-06-20 04:01:46 | INFO | train_inner | epoch 004: 1521 / 3002 loss=2.53, ppl=5.78, wps=5949.4, ups=0.09, wpb=64909, bsz=128, num_updates=10464, lr=9.99243e-05, gnorm=2.327, loss_scale=4, train_wall=10, gb_free=2.8, wall=120180 2021-06-20 04:01:57 | INFO | train_inner | epoch 004: 1522 / 3002 loss=2.604, ppl=6.08, wps=5808.1, ups=0.09, wpb=64871, bsz=128, num_updates=10465, lr=9.99243e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=120192 2021-06-20 04:02:08 | INFO | train_inner | epoch 004: 1523 / 3002 loss=2.528, ppl=5.77, wps=5789.9, ups=0.09, wpb=64906, bsz=128, num_updates=10466, lr=9.99243e-05, gnorm=2.175, loss_scale=4, train_wall=11, gb_free=2.8, wall=120203 2021-06-20 04:02:20 | INFO | train_inner | epoch 004: 1524 / 3002 loss=2.578, ppl=5.97, wps=5752.7, ups=0.09, wpb=64798, bsz=128, num_updates=10467, lr=9.99243e-05, gnorm=2.077, loss_scale=4, train_wall=11, gb_free=2.8, wall=120214 2021-06-20 04:02:31 | INFO | train_inner | epoch 004: 1525 / 3002 loss=2.631, ppl=6.19, wps=5846.3, ups=0.09, wpb=64798, bsz=128, num_updates=10468, lr=9.99242e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=120225 2021-06-20 04:02:42 | INFO | train_inner | epoch 004: 1526 / 3002 loss=2.428, ppl=5.38, wps=5997.5, ups=0.09, wpb=64931, bsz=128, num_updates=10469, lr=9.99242e-05, gnorm=2.04, loss_scale=4, train_wall=10, gb_free=2.8, wall=120236 2021-06-20 04:02:53 | INFO | train_inner | epoch 004: 1527 / 3002 loss=2.602, ppl=6.07, wps=5865.6, ups=0.09, wpb=64845, bsz=128, num_updates=10470, lr=9.99242e-05, gnorm=1.997, loss_scale=4, train_wall=11, gb_free=2.8, wall=120247 2021-06-20 04:03:04 | INFO | train_inner | epoch 004: 1528 / 3002 loss=2.575, ppl=5.96, wps=5914.7, ups=0.09, wpb=64787, bsz=128, num_updates=10471, lr=9.99242e-05, gnorm=2.015, loss_scale=4, train_wall=10, gb_free=2.8, wall=120258 2021-06-20 04:03:15 | INFO | train_inner | epoch 004: 1529 / 3002 loss=2.551, ppl=5.86, wps=5895.2, ups=0.09, wpb=64828, bsz=128, num_updates=10472, lr=9.99242e-05, gnorm=2.022, loss_scale=4, train_wall=11, gb_free=2.8, wall=120269 2021-06-20 04:03:26 | INFO | train_inner | epoch 004: 1530 / 3002 loss=2.52, ppl=5.74, wps=5806.5, ups=0.09, wpb=64759, bsz=128, num_updates=10473, lr=9.99242e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=120280 2021-06-20 04:03:37 | INFO | train_inner | epoch 004: 1531 / 3002 loss=2.638, ppl=6.22, wps=5874.8, ups=0.09, wpb=64765, bsz=128, num_updates=10474, lr=9.99242e-05, gnorm=2.39, loss_scale=4, train_wall=11, gb_free=2.8, wall=120291 2021-06-20 04:03:48 | INFO | train_inner | epoch 004: 1532 / 3002 loss=2.602, ppl=6.07, wps=5859.2, ups=0.09, wpb=64827, bsz=128, num_updates=10475, lr=9.99242e-05, gnorm=2.086, loss_scale=4, train_wall=11, gb_free=2.8, wall=120302 2021-06-20 04:03:59 | INFO | train_inner | epoch 004: 1533 / 3002 loss=2.516, ppl=5.72, wps=5818.3, ups=0.09, wpb=64840, bsz=128, num_updates=10476, lr=9.99242e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=120313 2021-06-20 04:04:10 | INFO | train_inner | epoch 004: 1534 / 3002 loss=2.675, ppl=6.39, wps=5863.2, ups=0.09, wpb=64831, bsz=128, num_updates=10477, lr=9.99242e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=120324 2021-06-20 04:04:21 | INFO | train_inner | epoch 004: 1535 / 3002 loss=2.532, ppl=5.78, wps=5956.3, ups=0.09, wpb=64882, bsz=128, num_updates=10478, lr=9.99242e-05, gnorm=2.424, loss_scale=4, train_wall=10, gb_free=2.8, wall=120335 2021-06-20 04:04:32 | INFO | train_inner | epoch 004: 1536 / 3002 loss=2.466, ppl=5.53, wps=5898.7, ups=0.09, wpb=64924, bsz=128, num_updates=10479, lr=9.99242e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=120346 2021-06-20 04:04:43 | INFO | train_inner | epoch 004: 1537 / 3002 loss=2.716, ppl=6.57, wps=5796.3, ups=0.09, wpb=64737, bsz=128, num_updates=10480, lr=9.99242e-05, gnorm=2.094, loss_scale=4, train_wall=11, gb_free=2.8, wall=120357 2021-06-20 04:04:54 | INFO | train_inner | epoch 004: 1538 / 3002 loss=2.472, ppl=5.55, wps=5985.9, ups=0.09, wpb=64910, bsz=128, num_updates=10481, lr=9.99241e-05, gnorm=2.031, loss_scale=4, train_wall=10, gb_free=2.8, wall=120368 2021-06-20 04:05:05 | INFO | train_inner | epoch 004: 1539 / 3002 loss=2.523, ppl=5.75, wps=5791.8, ups=0.09, wpb=64789, bsz=128, num_updates=10482, lr=9.99241e-05, gnorm=2.733, loss_scale=4, train_wall=11, gb_free=2.8, wall=120379 2021-06-20 04:05:16 | INFO | train_inner | epoch 004: 1540 / 3002 loss=2.556, ppl=5.88, wps=5815.9, ups=0.09, wpb=64872, bsz=128, num_updates=10483, lr=9.99241e-05, gnorm=2.156, loss_scale=4, train_wall=11, gb_free=2.8, wall=120391 2021-06-20 04:05:27 | INFO | train_inner | epoch 004: 1541 / 3002 loss=2.501, ppl=5.66, wps=5903.2, ups=0.09, wpb=64799, bsz=128, num_updates=10484, lr=9.99241e-05, gnorm=2.018, loss_scale=4, train_wall=10, gb_free=2.8, wall=120402 2021-06-20 04:05:39 | INFO | train_inner | epoch 004: 1542 / 3002 loss=2.427, ppl=5.38, wps=5734.5, ups=0.09, wpb=64831, bsz=128, num_updates=10485, lr=9.99241e-05, gnorm=2.12, loss_scale=4, train_wall=11, gb_free=2.8, wall=120413 2021-06-20 04:05:50 | INFO | train_inner | epoch 004: 1543 / 3002 loss=2.496, ppl=5.64, wps=5757, ups=0.09, wpb=64797, bsz=128, num_updates=10486, lr=9.99241e-05, gnorm=3.631, loss_scale=4, train_wall=11, gb_free=2.8, wall=120424 2021-06-20 04:06:01 | INFO | train_inner | epoch 004: 1544 / 3002 loss=2.467, ppl=5.53, wps=5827.2, ups=0.09, wpb=64775, bsz=128, num_updates=10487, lr=9.99241e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=120435 2021-06-20 04:06:12 | INFO | train_inner | epoch 004: 1545 / 3002 loss=2.522, ppl=5.74, wps=5874.7, ups=0.09, wpb=64862, bsz=128, num_updates=10488, lr=9.99241e-05, gnorm=2.105, loss_scale=4, train_wall=11, gb_free=2.8, wall=120446 2021-06-20 04:06:23 | INFO | train_inner | epoch 004: 1546 / 3002 loss=2.548, ppl=5.85, wps=5922.8, ups=0.09, wpb=64820, bsz=128, num_updates=10489, lr=9.99241e-05, gnorm=1.967, loss_scale=4, train_wall=10, gb_free=2.8, wall=120457 2021-06-20 04:06:34 | INFO | train_inner | epoch 004: 1547 / 3002 loss=2.484, ppl=5.6, wps=6017.3, ups=0.09, wpb=64879, bsz=128, num_updates=10490, lr=9.99241e-05, gnorm=1.99, loss_scale=4, train_wall=10, gb_free=2.8, wall=120468 2021-06-20 04:06:45 | INFO | train_inner | epoch 004: 1548 / 3002 loss=2.524, ppl=5.75, wps=5901.3, ups=0.09, wpb=64796, bsz=128, num_updates=10491, lr=9.99241e-05, gnorm=2.15, loss_scale=4, train_wall=10, gb_free=2.8, wall=120479 2021-06-20 04:06:56 | INFO | train_inner | epoch 004: 1549 / 3002 loss=2.484, ppl=5.6, wps=5894.3, ups=0.09, wpb=64789, bsz=128, num_updates=10492, lr=9.99241e-05, gnorm=1.926, loss_scale=4, train_wall=11, gb_free=2.8, wall=120490 2021-06-20 04:07:07 | INFO | train_inner | epoch 004: 1550 / 3002 loss=2.54, ppl=5.82, wps=5826.6, ups=0.09, wpb=64773, bsz=128, num_updates=10493, lr=9.9924e-05, gnorm=2.45, loss_scale=4, train_wall=11, gb_free=2.8, wall=120501 2021-06-20 04:07:18 | INFO | train_inner | epoch 004: 1551 / 3002 loss=2.678, ppl=6.4, wps=5756, ups=0.09, wpb=64857, bsz=128, num_updates=10494, lr=9.9924e-05, gnorm=2.258, loss_scale=4, train_wall=11, gb_free=2.8, wall=120512 2021-06-20 04:07:29 | INFO | train_inner | epoch 004: 1552 / 3002 loss=2.622, ppl=6.16, wps=5818.2, ups=0.09, wpb=64830, bsz=128, num_updates=10495, lr=9.9924e-05, gnorm=2.124, loss_scale=4, train_wall=11, gb_free=2.8, wall=120524 2021-06-20 04:07:40 | INFO | train_inner | epoch 004: 1553 / 3002 loss=2.563, ppl=5.91, wps=5773.1, ups=0.09, wpb=64809, bsz=128, num_updates=10496, lr=9.9924e-05, gnorm=2.07, loss_scale=4, train_wall=11, gb_free=2.8, wall=120535 2021-06-20 04:07:51 | INFO | train_inner | epoch 004: 1554 / 3002 loss=2.554, ppl=5.87, wps=5872, ups=0.09, wpb=64919, bsz=128, num_updates=10497, lr=9.9924e-05, gnorm=3.438, loss_scale=4, train_wall=11, gb_free=2.8, wall=120546 2021-06-20 04:08:02 | INFO | train_inner | epoch 004: 1555 / 3002 loss=2.505, ppl=5.68, wps=5901, ups=0.09, wpb=64837, bsz=128, num_updates=10498, lr=9.9924e-05, gnorm=2.078, loss_scale=4, train_wall=11, gb_free=2.8, wall=120557 2021-06-20 04:08:13 | INFO | train_inner | epoch 004: 1556 / 3002 loss=2.534, ppl=5.79, wps=5894.3, ups=0.09, wpb=64890, bsz=128, num_updates=10499, lr=9.9924e-05, gnorm=2.064, loss_scale=4, train_wall=11, gb_free=2.8, wall=120568 2021-06-20 04:08:24 | INFO | train_inner | epoch 004: 1557 / 3002 loss=2.646, ppl=6.26, wps=5887.7, ups=0.09, wpb=64820, bsz=128, num_updates=10500, lr=9.9924e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=120579 2021-06-20 04:08:36 | INFO | train_inner | epoch 004: 1558 / 3002 loss=2.771, ppl=6.83, wps=5813, ups=0.09, wpb=64862, bsz=128, num_updates=10501, lr=9.9924e-05, gnorm=2.333, loss_scale=4, train_wall=11, gb_free=2.8, wall=120590 2021-06-20 04:08:47 | INFO | train_inner | epoch 004: 1559 / 3002 loss=2.665, ppl=6.34, wps=5769.5, ups=0.09, wpb=64853, bsz=128, num_updates=10502, lr=9.9924e-05, gnorm=2.012, loss_scale=4, train_wall=11, gb_free=2.8, wall=120601 2021-06-20 04:08:58 | INFO | train_inner | epoch 004: 1560 / 3002 loss=2.467, ppl=5.53, wps=5844, ups=0.09, wpb=64820, bsz=128, num_updates=10503, lr=9.9924e-05, gnorm=2.01, loss_scale=4, train_wall=11, gb_free=2.8, wall=120612 2021-06-20 04:09:09 | INFO | train_inner | epoch 004: 1561 / 3002 loss=2.722, ppl=6.6, wps=5924.6, ups=0.09, wpb=64821, bsz=128, num_updates=10504, lr=9.9924e-05, gnorm=2, loss_scale=4, train_wall=11, gb_free=2.8, wall=120623 2021-06-20 04:09:20 | INFO | train_inner | epoch 004: 1562 / 3002 loss=2.337, ppl=5.05, wps=5780.5, ups=0.09, wpb=64871, bsz=128, num_updates=10505, lr=9.9924e-05, gnorm=1.997, loss_scale=4, train_wall=11, gb_free=2.8, wall=120634 2021-06-20 04:09:31 | INFO | train_inner | epoch 004: 1563 / 3002 loss=2.56, ppl=5.9, wps=5876.6, ups=0.09, wpb=64828, bsz=128, num_updates=10506, lr=9.99239e-05, gnorm=2.101, loss_scale=4, train_wall=11, gb_free=2.8, wall=120645 2021-06-20 04:09:42 | INFO | train_inner | epoch 004: 1564 / 3002 loss=2.541, ppl=5.82, wps=5886.6, ups=0.09, wpb=64865, bsz=128, num_updates=10507, lr=9.99239e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=120657 2021-06-20 04:09:53 | INFO | train_inner | epoch 004: 1565 / 3002 loss=2.66, ppl=6.32, wps=5743.7, ups=0.09, wpb=64808, bsz=128, num_updates=10508, lr=9.99239e-05, gnorm=2.093, loss_scale=4, train_wall=11, gb_free=2.8, wall=120668 2021-06-20 04:10:05 | INFO | train_inner | epoch 004: 1566 / 3002 loss=2.734, ppl=6.65, wps=5767.1, ups=0.09, wpb=64840, bsz=128, num_updates=10509, lr=9.99239e-05, gnorm=2.401, loss_scale=4, train_wall=11, gb_free=2.8, wall=120679 2021-06-20 04:10:16 | INFO | train_inner | epoch 004: 1567 / 3002 loss=2.682, ppl=6.42, wps=5876.7, ups=0.09, wpb=64902, bsz=128, num_updates=10510, lr=9.99239e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=120690 2021-06-20 04:10:27 | INFO | train_inner | epoch 004: 1568 / 3002 loss=2.573, ppl=5.95, wps=5774, ups=0.09, wpb=64814, bsz=128, num_updates=10511, lr=9.99239e-05, gnorm=2.117, loss_scale=4, train_wall=11, gb_free=2.8, wall=120701 2021-06-20 04:10:38 | INFO | train_inner | epoch 004: 1569 / 3002 loss=2.425, ppl=5.37, wps=5855.9, ups=0.09, wpb=64828, bsz=128, num_updates=10512, lr=9.99239e-05, gnorm=2.219, loss_scale=4, train_wall=11, gb_free=2.8, wall=120712 2021-06-20 04:10:49 | INFO | train_inner | epoch 004: 1570 / 3002 loss=2.707, ppl=6.53, wps=5846.7, ups=0.09, wpb=64729, bsz=128, num_updates=10513, lr=9.99239e-05, gnorm=2.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=120723 2021-06-20 04:11:00 | INFO | train_inner | epoch 004: 1571 / 3002 loss=2.564, ppl=5.91, wps=5920, ups=0.09, wpb=64889, bsz=128, num_updates=10514, lr=9.99239e-05, gnorm=2.023, loss_scale=4, train_wall=11, gb_free=2.8, wall=120734 2021-06-20 04:11:11 | INFO | train_inner | epoch 004: 1572 / 3002 loss=2.521, ppl=5.74, wps=5774, ups=0.09, wpb=64872, bsz=128, num_updates=10515, lr=9.99239e-05, gnorm=2.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=120746 2021-06-20 04:11:22 | INFO | train_inner | epoch 004: 1573 / 3002 loss=2.632, ppl=6.2, wps=5832.3, ups=0.09, wpb=64835, bsz=128, num_updates=10516, lr=9.99239e-05, gnorm=2.174, loss_scale=4, train_wall=11, gb_free=2.8, wall=120757 2021-06-20 04:11:33 | INFO | train_inner | epoch 004: 1574 / 3002 loss=2.466, ppl=5.52, wps=5932.5, ups=0.09, wpb=64782, bsz=128, num_updates=10517, lr=9.99239e-05, gnorm=1.996, loss_scale=4, train_wall=10, gb_free=2.8, wall=120768 2021-06-20 04:11:45 | INFO | train_inner | epoch 004: 1575 / 3002 loss=2.621, ppl=6.15, wps=5775.8, ups=0.09, wpb=64858, bsz=128, num_updates=10518, lr=9.99238e-05, gnorm=2.323, loss_scale=4, train_wall=11, gb_free=2.8, wall=120779 2021-06-20 04:11:56 | INFO | train_inner | epoch 004: 1576 / 3002 loss=2.796, ppl=6.95, wps=5719.9, ups=0.09, wpb=64768, bsz=128, num_updates=10519, lr=9.99238e-05, gnorm=1.986, loss_scale=4, train_wall=11, gb_free=2.8, wall=120790 2021-06-20 04:12:07 | INFO | train_inner | epoch 004: 1577 / 3002 loss=2.725, ppl=6.61, wps=5804.7, ups=0.09, wpb=64845, bsz=128, num_updates=10520, lr=9.99238e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=120801 2021-06-20 04:12:18 | INFO | train_inner | epoch 004: 1578 / 3002 loss=2.561, ppl=5.9, wps=5781.8, ups=0.09, wpb=64824, bsz=128, num_updates=10521, lr=9.99238e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=120813 2021-06-20 04:12:29 | INFO | train_inner | epoch 004: 1579 / 3002 loss=2.542, ppl=5.82, wps=5822.2, ups=0.09, wpb=64826, bsz=128, num_updates=10522, lr=9.99238e-05, gnorm=2.001, loss_scale=4, train_wall=11, gb_free=2.8, wall=120824 2021-06-20 04:12:41 | INFO | train_inner | epoch 004: 1580 / 3002 loss=2.431, ppl=5.39, wps=5823.4, ups=0.09, wpb=64889, bsz=128, num_updates=10523, lr=9.99238e-05, gnorm=2.058, loss_scale=4, train_wall=11, gb_free=2.8, wall=120835 2021-06-20 04:12:52 | INFO | train_inner | epoch 004: 1581 / 3002 loss=2.404, ppl=5.29, wps=5829.3, ups=0.09, wpb=64831, bsz=128, num_updates=10524, lr=9.99238e-05, gnorm=1.991, loss_scale=4, train_wall=11, gb_free=2.8, wall=120846 2021-06-20 04:13:03 | INFO | train_inner | epoch 004: 1582 / 3002 loss=2.5, ppl=5.66, wps=5835.4, ups=0.09, wpb=64831, bsz=128, num_updates=10525, lr=9.99238e-05, gnorm=2.293, loss_scale=4, train_wall=11, gb_free=2.8, wall=120857 2021-06-20 04:13:14 | INFO | train_inner | epoch 004: 1583 / 3002 loss=2.48, ppl=5.58, wps=6012.7, ups=0.09, wpb=64877, bsz=128, num_updates=10526, lr=9.99238e-05, gnorm=2.046, loss_scale=4, train_wall=10, gb_free=2.8, wall=120868 2021-06-20 04:13:25 | INFO | train_inner | epoch 004: 1584 / 3002 loss=2.635, ppl=6.21, wps=5884.4, ups=0.09, wpb=64829, bsz=128, num_updates=10527, lr=9.99238e-05, gnorm=2.086, loss_scale=4, train_wall=11, gb_free=2.8, wall=120879 2021-06-20 04:13:36 | INFO | train_inner | epoch 004: 1585 / 3002 loss=2.621, ppl=6.15, wps=5852.4, ups=0.09, wpb=64856, bsz=128, num_updates=10528, lr=9.99238e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=120890 2021-06-20 04:13:47 | INFO | train_inner | epoch 004: 1586 / 3002 loss=2.807, ppl=7, wps=5812, ups=0.09, wpb=64846, bsz=128, num_updates=10529, lr=9.99238e-05, gnorm=2.167, loss_scale=4, train_wall=11, gb_free=2.8, wall=120901 2021-06-20 04:13:58 | INFO | train_inner | epoch 004: 1587 / 3002 loss=2.551, ppl=5.86, wps=5850.9, ups=0.09, wpb=64847, bsz=128, num_updates=10530, lr=9.99238e-05, gnorm=4.521, loss_scale=4, train_wall=11, gb_free=2.8, wall=120912 2021-06-20 04:14:09 | INFO | train_inner | epoch 004: 1588 / 3002 loss=2.619, ppl=6.15, wps=5860.9, ups=0.09, wpb=64861, bsz=128, num_updates=10531, lr=9.99237e-05, gnorm=2.674, loss_scale=4, train_wall=11, gb_free=2.8, wall=120923 2021-06-20 04:14:20 | INFO | train_inner | epoch 004: 1589 / 3002 loss=2.542, ppl=5.82, wps=5882.5, ups=0.09, wpb=64843, bsz=128, num_updates=10532, lr=9.99237e-05, gnorm=2.074, loss_scale=4, train_wall=11, gb_free=2.8, wall=120934 2021-06-20 04:14:31 | INFO | train_inner | epoch 004: 1590 / 3002 loss=2.484, ppl=5.6, wps=5838, ups=0.09, wpb=64838, bsz=128, num_updates=10533, lr=9.99237e-05, gnorm=2.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=120945 2021-06-20 04:14:42 | INFO | train_inner | epoch 004: 1591 / 3002 loss=2.629, ppl=6.19, wps=5952.2, ups=0.09, wpb=64804, bsz=128, num_updates=10534, lr=9.99237e-05, gnorm=2.074, loss_scale=4, train_wall=10, gb_free=2.8, wall=120956 2021-06-20 04:14:53 | INFO | train_inner | epoch 004: 1592 / 3002 loss=2.56, ppl=5.9, wps=5902.3, ups=0.09, wpb=64949, bsz=128, num_updates=10535, lr=9.99237e-05, gnorm=2.068, loss_scale=4, train_wall=11, gb_free=2.8, wall=120967 2021-06-20 04:15:04 | INFO | train_inner | epoch 004: 1593 / 3002 loss=2.794, ppl=6.94, wps=5873.9, ups=0.09, wpb=64773, bsz=128, num_updates=10536, lr=9.99237e-05, gnorm=2.106, loss_scale=4, train_wall=11, gb_free=2.8, wall=120978 2021-06-20 04:15:15 | INFO | train_inner | epoch 004: 1594 / 3002 loss=2.713, ppl=6.56, wps=5749.2, ups=0.09, wpb=64853, bsz=128, num_updates=10537, lr=9.99237e-05, gnorm=2.092, loss_scale=4, train_wall=11, gb_free=2.8, wall=120990 2021-06-20 04:15:27 | INFO | train_inner | epoch 004: 1595 / 3002 loss=2.446, ppl=5.45, wps=5798.5, ups=0.09, wpb=64839, bsz=128, num_updates=10538, lr=9.99237e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=121001 2021-06-20 04:15:37 | INFO | train_inner | epoch 004: 1596 / 3002 loss=2.573, ppl=5.95, wps=5912.8, ups=0.09, wpb=64801, bsz=128, num_updates=10539, lr=9.99237e-05, gnorm=2.247, loss_scale=4, train_wall=10, gb_free=2.8, wall=121012 2021-06-20 04:15:49 | INFO | train_inner | epoch 004: 1597 / 3002 loss=2.56, ppl=5.9, wps=5861.5, ups=0.09, wpb=64871, bsz=128, num_updates=10540, lr=9.99237e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=121023 2021-06-20 04:15:59 | INFO | train_inner | epoch 004: 1598 / 3002 loss=2.594, ppl=6.04, wps=5972.4, ups=0.09, wpb=64955, bsz=128, num_updates=10541, lr=9.99237e-05, gnorm=1.998, loss_scale=4, train_wall=10, gb_free=2.8, wall=121034 2021-06-20 04:16:10 | INFO | train_inner | epoch 004: 1599 / 3002 loss=2.55, ppl=5.86, wps=5928.8, ups=0.09, wpb=64647, bsz=128, num_updates=10542, lr=9.99237e-05, gnorm=2.206, loss_scale=4, train_wall=10, gb_free=2.8, wall=121045 2021-06-20 04:16:22 | INFO | train_inner | epoch 004: 1600 / 3002 loss=2.489, ppl=5.61, wps=5788, ups=0.09, wpb=64861, bsz=128, num_updates=10543, lr=9.99236e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=121056 2021-06-20 04:16:32 | INFO | train_inner | epoch 004: 1601 / 3002 loss=2.525, ppl=5.76, wps=5971.4, ups=0.09, wpb=64896, bsz=128, num_updates=10544, lr=9.99236e-05, gnorm=2.101, loss_scale=4, train_wall=10, gb_free=2.8, wall=121067 2021-06-20 04:16:43 | INFO | train_inner | epoch 004: 1602 / 3002 loss=2.669, ppl=6.36, wps=5913, ups=0.09, wpb=64838, bsz=128, num_updates=10545, lr=9.99236e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=121078 2021-06-20 04:16:55 | INFO | train_inner | epoch 004: 1603 / 3002 loss=2.497, ppl=5.65, wps=5815.7, ups=0.09, wpb=64843, bsz=128, num_updates=10546, lr=9.99236e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=121089 2021-06-20 04:17:06 | INFO | train_inner | epoch 004: 1604 / 3002 loss=2.62, ppl=6.15, wps=5817.5, ups=0.09, wpb=64776, bsz=128, num_updates=10547, lr=9.99236e-05, gnorm=2.042, loss_scale=4, train_wall=11, gb_free=2.8, wall=121100 2021-06-20 04:17:17 | INFO | train_inner | epoch 004: 1605 / 3002 loss=2.496, ppl=5.64, wps=5842.3, ups=0.09, wpb=64890, bsz=128, num_updates=10548, lr=9.99236e-05, gnorm=2.561, loss_scale=4, train_wall=11, gb_free=2.8, wall=121111 2021-06-20 04:17:28 | INFO | train_inner | epoch 004: 1606 / 3002 loss=2.429, ppl=5.38, wps=5991.3, ups=0.09, wpb=64796, bsz=128, num_updates=10549, lr=9.99236e-05, gnorm=2.033, loss_scale=4, train_wall=10, gb_free=2.8, wall=121122 2021-06-20 04:17:39 | INFO | train_inner | epoch 004: 1607 / 3002 loss=2.396, ppl=5.26, wps=5894.8, ups=0.09, wpb=64912, bsz=128, num_updates=10550, lr=9.99236e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=121133 2021-06-20 04:17:50 | INFO | train_inner | epoch 004: 1608 / 3002 loss=2.651, ppl=6.28, wps=5872.6, ups=0.09, wpb=64783, bsz=128, num_updates=10551, lr=9.99236e-05, gnorm=2.125, loss_scale=4, train_wall=11, gb_free=2.8, wall=121144 2021-06-20 04:18:01 | INFO | train_inner | epoch 004: 1609 / 3002 loss=2.543, ppl=5.83, wps=5919.6, ups=0.09, wpb=64883, bsz=128, num_updates=10552, lr=9.99236e-05, gnorm=2.098, loss_scale=4, train_wall=11, gb_free=2.8, wall=121155 2021-06-20 04:18:12 | INFO | train_inner | epoch 004: 1610 / 3002 loss=2.49, ppl=5.62, wps=5910.6, ups=0.09, wpb=64856, bsz=128, num_updates=10553, lr=9.99236e-05, gnorm=2.011, loss_scale=4, train_wall=10, gb_free=2.8, wall=121166 2021-06-20 04:18:22 | INFO | train_inner | epoch 004: 1611 / 3002 loss=2.562, ppl=5.9, wps=5963.2, ups=0.09, wpb=64879, bsz=128, num_updates=10554, lr=9.99236e-05, gnorm=2.048, loss_scale=4, train_wall=10, gb_free=2.8, wall=121177 2021-06-20 04:18:34 | INFO | train_inner | epoch 004: 1612 / 3002 loss=2.554, ppl=5.87, wps=5792.1, ups=0.09, wpb=64772, bsz=128, num_updates=10555, lr=9.99236e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=121188 2021-06-20 04:18:45 | INFO | train_inner | epoch 004: 1613 / 3002 loss=2.558, ppl=5.89, wps=5843.9, ups=0.09, wpb=64806, bsz=128, num_updates=10556, lr=9.99235e-05, gnorm=2.077, loss_scale=4, train_wall=11, gb_free=2.8, wall=121199 2021-06-20 04:18:56 | INFO | train_inner | epoch 004: 1614 / 3002 loss=2.524, ppl=5.75, wps=5823.5, ups=0.09, wpb=64818, bsz=128, num_updates=10557, lr=9.99235e-05, gnorm=1.981, loss_scale=4, train_wall=11, gb_free=2.8, wall=121210 2021-06-20 04:19:07 | INFO | train_inner | epoch 004: 1615 / 3002 loss=2.593, ppl=6.03, wps=5841.9, ups=0.09, wpb=64792, bsz=128, num_updates=10558, lr=9.99235e-05, gnorm=2.01, loss_scale=4, train_wall=11, gb_free=2.8, wall=121221 2021-06-20 04:19:18 | INFO | train_inner | epoch 004: 1616 / 3002 loss=2.502, ppl=5.66, wps=5750.7, ups=0.09, wpb=64829, bsz=128, num_updates=10559, lr=9.99235e-05, gnorm=2.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=121233 2021-06-20 04:19:29 | INFO | train_inner | epoch 004: 1617 / 3002 loss=2.712, ppl=6.55, wps=5849, ups=0.09, wpb=64890, bsz=128, num_updates=10560, lr=9.99235e-05, gnorm=2.106, loss_scale=4, train_wall=11, gb_free=2.8, wall=121244 2021-06-20 04:19:40 | INFO | train_inner | epoch 004: 1618 / 3002 loss=2.615, ppl=6.13, wps=5796.5, ups=0.09, wpb=64823, bsz=128, num_updates=10561, lr=9.99235e-05, gnorm=2.043, loss_scale=4, train_wall=11, gb_free=2.8, wall=121255 2021-06-20 04:19:52 | INFO | train_inner | epoch 004: 1619 / 3002 loss=2.439, ppl=5.42, wps=5717.7, ups=0.09, wpb=64814, bsz=128, num_updates=10562, lr=9.99235e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=121266 2021-06-20 04:20:03 | INFO | train_inner | epoch 004: 1620 / 3002 loss=2.54, ppl=5.82, wps=5949, ups=0.09, wpb=64883, bsz=128, num_updates=10563, lr=9.99235e-05, gnorm=2.091, loss_scale=4, train_wall=10, gb_free=2.8, wall=121277 2021-06-20 04:20:14 | INFO | train_inner | epoch 004: 1621 / 3002 loss=2.465, ppl=5.52, wps=5789.9, ups=0.09, wpb=64818, bsz=128, num_updates=10564, lr=9.99235e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=121288 2021-06-20 04:20:25 | INFO | train_inner | epoch 004: 1622 / 3002 loss=2.499, ppl=5.65, wps=5933, ups=0.09, wpb=64832, bsz=128, num_updates=10565, lr=9.99235e-05, gnorm=1.986, loss_scale=4, train_wall=10, gb_free=2.8, wall=121299 2021-06-20 04:20:36 | INFO | train_inner | epoch 004: 1623 / 3002 loss=2.636, ppl=6.21, wps=5744.5, ups=0.09, wpb=64783, bsz=128, num_updates=10566, lr=9.99235e-05, gnorm=2.047, loss_scale=4, train_wall=11, gb_free=2.8, wall=121310 2021-06-20 04:20:47 | INFO | train_inner | epoch 004: 1624 / 3002 loss=2.664, ppl=6.34, wps=5776.3, ups=0.09, wpb=64853, bsz=128, num_updates=10567, lr=9.99235e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=121322 2021-06-20 04:20:58 | INFO | train_inner | epoch 004: 1625 / 3002 loss=2.528, ppl=5.77, wps=5838.2, ups=0.09, wpb=64817, bsz=128, num_updates=10568, lr=9.99234e-05, gnorm=2.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=121333 2021-06-20 04:21:10 | INFO | train_inner | epoch 004: 1626 / 3002 loss=2.484, ppl=5.59, wps=5679.6, ups=0.09, wpb=64763, bsz=128, num_updates=10569, lr=9.99234e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=121344 2021-06-20 04:21:21 | INFO | train_inner | epoch 004: 1627 / 3002 loss=2.544, ppl=5.83, wps=5867.7, ups=0.09, wpb=64859, bsz=128, num_updates=10570, lr=9.99234e-05, gnorm=2.112, loss_scale=4, train_wall=11, gb_free=2.8, wall=121355 2021-06-20 04:21:32 | INFO | train_inner | epoch 004: 1628 / 3002 loss=2.777, ppl=6.85, wps=5742.4, ups=0.09, wpb=64819, bsz=128, num_updates=10571, lr=9.99234e-05, gnorm=2.056, loss_scale=4, train_wall=11, gb_free=2.8, wall=121367 2021-06-20 04:21:43 | INFO | train_inner | epoch 004: 1629 / 3002 loss=2.563, ppl=5.91, wps=5914.2, ups=0.09, wpb=64848, bsz=128, num_updates=10572, lr=9.99234e-05, gnorm=2.079, loss_scale=4, train_wall=11, gb_free=2.8, wall=121377 2021-06-20 04:21:54 | INFO | train_inner | epoch 004: 1630 / 3002 loss=2.456, ppl=5.49, wps=5920.7, ups=0.09, wpb=64804, bsz=128, num_updates=10573, lr=9.99234e-05, gnorm=1.949, loss_scale=4, train_wall=11, gb_free=2.8, wall=121388 2021-06-20 04:22:05 | INFO | train_inner | epoch 004: 1631 / 3002 loss=2.485, ppl=5.6, wps=5806.8, ups=0.09, wpb=64837, bsz=128, num_updates=10574, lr=9.99234e-05, gnorm=1.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=121400 2021-06-20 04:22:16 | INFO | train_inner | epoch 004: 1632 / 3002 loss=2.397, ppl=5.27, wps=5865, ups=0.09, wpb=64882, bsz=128, num_updates=10575, lr=9.99234e-05, gnorm=2.382, loss_scale=4, train_wall=11, gb_free=2.8, wall=121411 2021-06-20 04:22:28 | INFO | train_inner | epoch 004: 1633 / 3002 loss=2.594, ppl=6.04, wps=5687.8, ups=0.09, wpb=64740, bsz=128, num_updates=10576, lr=9.99234e-05, gnorm=2.176, loss_scale=4, train_wall=11, gb_free=2.8, wall=121422 2021-06-20 04:22:39 | INFO | train_inner | epoch 004: 1634 / 3002 loss=2.657, ppl=6.31, wps=5903, ups=0.09, wpb=64852, bsz=128, num_updates=10577, lr=9.99234e-05, gnorm=2.079, loss_scale=4, train_wall=11, gb_free=2.8, wall=121433 2021-06-20 04:22:50 | INFO | train_inner | epoch 004: 1635 / 3002 loss=2.61, ppl=6.11, wps=5930.2, ups=0.09, wpb=64926, bsz=128, num_updates=10578, lr=9.99234e-05, gnorm=2.049, loss_scale=4, train_wall=10, gb_free=2.8, wall=121444 2021-06-20 04:23:01 | INFO | train_inner | epoch 004: 1636 / 3002 loss=2.674, ppl=6.38, wps=5926, ups=0.09, wpb=64887, bsz=128, num_updates=10579, lr=9.99234e-05, gnorm=2.351, loss_scale=4, train_wall=11, gb_free=2.8, wall=121455 2021-06-20 04:23:12 | INFO | train_inner | epoch 004: 1637 / 3002 loss=2.674, ppl=6.38, wps=5664.9, ups=0.09, wpb=64768, bsz=128, num_updates=10580, lr=9.99234e-05, gnorm=2.048, loss_scale=4, train_wall=11, gb_free=2.8, wall=121466 2021-06-20 04:23:23 | INFO | train_inner | epoch 004: 1638 / 3002 loss=2.537, ppl=5.8, wps=5987, ups=0.09, wpb=64807, bsz=128, num_updates=10581, lr=9.99233e-05, gnorm=2.037, loss_scale=8, train_wall=10, gb_free=2.8, wall=121477 2021-06-20 04:23:34 | INFO | train_inner | epoch 004: 1639 / 3002 loss=2.585, ppl=6, wps=5818.2, ups=0.09, wpb=64764, bsz=128, num_updates=10582, lr=9.99233e-05, gnorm=2.023, loss_scale=8, train_wall=11, gb_free=2.8, wall=121488 2021-06-20 04:23:45 | INFO | train_inner | epoch 004: 1640 / 3002 loss=2.437, ppl=5.42, wps=5913.9, ups=0.09, wpb=64814, bsz=128, num_updates=10583, lr=9.99233e-05, gnorm=2.013, loss_scale=8, train_wall=10, gb_free=2.8, wall=121499 2021-06-20 04:23:56 | INFO | train_inner | epoch 004: 1641 / 3002 loss=2.458, ppl=5.49, wps=5874.4, ups=0.09, wpb=64806, bsz=128, num_updates=10584, lr=9.99233e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=121510 2021-06-20 04:24:07 | INFO | train_inner | epoch 004: 1642 / 3002 loss=2.558, ppl=5.89, wps=5786.6, ups=0.09, wpb=64823, bsz=128, num_updates=10585, lr=9.99233e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=121522 2021-06-20 04:24:18 | INFO | train_inner | epoch 004: 1643 / 3002 loss=2.361, ppl=5.14, wps=5894.3, ups=0.09, wpb=64888, bsz=128, num_updates=10586, lr=9.99233e-05, gnorm=2.08, loss_scale=8, train_wall=11, gb_free=2.8, wall=121533 2021-06-20 04:24:29 | INFO | train_inner | epoch 004: 1644 / 3002 loss=2.584, ppl=6, wps=5844.3, ups=0.09, wpb=64786, bsz=128, num_updates=10587, lr=9.99233e-05, gnorm=2.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=121544 2021-06-20 04:24:40 | INFO | train_inner | epoch 004: 1645 / 3002 loss=2.676, ppl=6.39, wps=5809.5, ups=0.09, wpb=64824, bsz=128, num_updates=10588, lr=9.99233e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=121555 2021-06-20 04:24:52 | INFO | train_inner | epoch 004: 1646 / 3002 loss=2.568, ppl=5.93, wps=5837.5, ups=0.09, wpb=64912, bsz=128, num_updates=10589, lr=9.99233e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=121566 2021-06-20 04:25:03 | INFO | train_inner | epoch 004: 1647 / 3002 loss=2.571, ppl=5.94, wps=5875.5, ups=0.09, wpb=64923, bsz=128, num_updates=10590, lr=9.99233e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=121577 2021-06-20 04:25:14 | INFO | train_inner | epoch 004: 1648 / 3002 loss=2.617, ppl=6.13, wps=5925, ups=0.09, wpb=64816, bsz=128, num_updates=10591, lr=9.99233e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=121588 2021-06-20 04:25:25 | INFO | train_inner | epoch 004: 1649 / 3002 loss=2.476, ppl=5.56, wps=5904, ups=0.09, wpb=64860, bsz=128, num_updates=10592, lr=9.99233e-05, gnorm=2.032, loss_scale=8, train_wall=11, gb_free=2.8, wall=121599 2021-06-20 04:25:35 | INFO | train_inner | epoch 004: 1650 / 3002 loss=2.603, ppl=6.08, wps=5927.7, ups=0.09, wpb=64810, bsz=128, num_updates=10593, lr=9.99232e-05, gnorm=2.047, loss_scale=8, train_wall=10, gb_free=2.8, wall=121610 2021-06-20 04:25:47 | INFO | train_inner | epoch 004: 1651 / 3002 loss=2.549, ppl=5.85, wps=5851.1, ups=0.09, wpb=64736, bsz=128, num_updates=10594, lr=9.99232e-05, gnorm=2.1, loss_scale=8, train_wall=11, gb_free=2.8, wall=121621 2021-06-20 04:25:58 | INFO | train_inner | epoch 004: 1652 / 3002 loss=2.441, ppl=5.43, wps=5822, ups=0.09, wpb=64814, bsz=128, num_updates=10595, lr=9.99232e-05, gnorm=2.064, loss_scale=8, train_wall=11, gb_free=2.8, wall=121632 2021-06-20 04:26:09 | INFO | train_inner | epoch 004: 1653 / 3002 loss=2.539, ppl=5.81, wps=5928.2, ups=0.09, wpb=64854, bsz=128, num_updates=10596, lr=9.99232e-05, gnorm=2.059, loss_scale=8, train_wall=10, gb_free=2.8, wall=121643 2021-06-20 04:26:19 | INFO | train_inner | epoch 004: 1654 / 3002 loss=2.503, ppl=5.67, wps=5978.6, ups=0.09, wpb=64824, bsz=128, num_updates=10597, lr=9.99232e-05, gnorm=2.048, loss_scale=8, train_wall=10, gb_free=2.8, wall=121654 2021-06-20 04:26:31 | INFO | train_inner | epoch 004: 1655 / 3002 loss=2.499, ppl=5.65, wps=5780.5, ups=0.09, wpb=64798, bsz=128, num_updates=10598, lr=9.99232e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=121665 2021-06-20 04:26:42 | INFO | train_inner | epoch 004: 1656 / 3002 loss=2.446, ppl=5.45, wps=5879, ups=0.09, wpb=64865, bsz=128, num_updates=10599, lr=9.99232e-05, gnorm=1.879, loss_scale=8, train_wall=11, gb_free=2.8, wall=121676 2021-06-20 04:26:53 | INFO | train_inner | epoch 004: 1657 / 3002 loss=2.565, ppl=5.92, wps=5926.8, ups=0.09, wpb=64804, bsz=128, num_updates=10600, lr=9.99232e-05, gnorm=2.09, loss_scale=8, train_wall=10, gb_free=2.8, wall=121687 2021-06-20 04:27:04 | INFO | train_inner | epoch 004: 1658 / 3002 loss=2.459, ppl=5.5, wps=5854.2, ups=0.09, wpb=64843, bsz=128, num_updates=10601, lr=9.99232e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=121698 2021-06-20 04:27:15 | INFO | train_inner | epoch 004: 1659 / 3002 loss=2.609, ppl=6.1, wps=5742.1, ups=0.09, wpb=64758, bsz=128, num_updates=10602, lr=9.99232e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=121709 2021-06-20 04:27:26 | INFO | train_inner | epoch 004: 1660 / 3002 loss=2.527, ppl=5.76, wps=5830.3, ups=0.09, wpb=64796, bsz=128, num_updates=10603, lr=9.99232e-05, gnorm=2.079, loss_scale=8, train_wall=11, gb_free=2.8, wall=121720 2021-06-20 04:27:37 | INFO | train_inner | epoch 004: 1661 / 3002 loss=2.502, ppl=5.66, wps=5910.2, ups=0.09, wpb=64844, bsz=128, num_updates=10604, lr=9.99232e-05, gnorm=2.055, loss_scale=8, train_wall=10, gb_free=2.8, wall=121731 2021-06-20 04:27:48 | INFO | train_inner | epoch 004: 1662 / 3002 loss=2.481, ppl=5.58, wps=5891.8, ups=0.09, wpb=64829, bsz=128, num_updates=10605, lr=9.99232e-05, gnorm=2.066, loss_scale=8, train_wall=11, gb_free=2.8, wall=121742 2021-06-20 04:27:59 | INFO | train_inner | epoch 004: 1663 / 3002 loss=2.775, ppl=6.84, wps=5897.4, ups=0.09, wpb=64903, bsz=128, num_updates=10606, lr=9.99231e-05, gnorm=2.761, loss_scale=8, train_wall=11, gb_free=2.8, wall=121753 2021-06-20 04:28:10 | INFO | train_inner | epoch 004: 1664 / 3002 loss=2.408, ppl=5.31, wps=5973.9, ups=0.09, wpb=64770, bsz=128, num_updates=10607, lr=9.99231e-05, gnorm=2.139, loss_scale=8, train_wall=10, gb_free=2.8, wall=121764 2021-06-20 04:28:21 | INFO | train_inner | epoch 004: 1665 / 3002 loss=2.587, ppl=6.01, wps=5813.5, ups=0.09, wpb=64820, bsz=128, num_updates=10608, lr=9.99231e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=121775 2021-06-20 04:28:32 | INFO | train_inner | epoch 004: 1666 / 3002 loss=2.454, ppl=5.48, wps=5776.8, ups=0.09, wpb=64832, bsz=128, num_updates=10609, lr=9.99231e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=121787 2021-06-20 04:28:43 | INFO | train_inner | epoch 004: 1667 / 3002 loss=2.526, ppl=5.76, wps=5908.7, ups=0.09, wpb=64836, bsz=128, num_updates=10610, lr=9.99231e-05, gnorm=2.008, loss_scale=8, train_wall=10, gb_free=2.8, wall=121798 2021-06-20 04:28:54 | INFO | train_inner | epoch 004: 1668 / 3002 loss=2.573, ppl=5.95, wps=5771.1, ups=0.09, wpb=64784, bsz=128, num_updates=10611, lr=9.99231e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=121809 2021-06-20 04:29:06 | INFO | train_inner | epoch 004: 1669 / 3002 loss=2.579, ppl=5.98, wps=5847.9, ups=0.09, wpb=64835, bsz=128, num_updates=10612, lr=9.99231e-05, gnorm=2.086, loss_scale=8, train_wall=11, gb_free=2.8, wall=121820 2021-06-20 04:29:17 | INFO | train_inner | epoch 004: 1670 / 3002 loss=2.568, ppl=5.93, wps=5813.1, ups=0.09, wpb=64833, bsz=128, num_updates=10613, lr=9.99231e-05, gnorm=2.054, loss_scale=8, train_wall=11, gb_free=2.8, wall=121831 2021-06-20 04:29:28 | INFO | train_inner | epoch 004: 1671 / 3002 loss=2.528, ppl=5.77, wps=5882.5, ups=0.09, wpb=64804, bsz=128, num_updates=10614, lr=9.99231e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=121842 2021-06-20 04:29:39 | INFO | train_inner | epoch 004: 1672 / 3002 loss=2.504, ppl=5.67, wps=5725.3, ups=0.09, wpb=64797, bsz=128, num_updates=10615, lr=9.99231e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=121853 2021-06-20 04:29:50 | INFO | train_inner | epoch 004: 1673 / 3002 loss=2.632, ppl=6.2, wps=5826.3, ups=0.09, wpb=64863, bsz=128, num_updates=10616, lr=9.99231e-05, gnorm=2.059, loss_scale=8, train_wall=11, gb_free=2.8, wall=121865 2021-06-20 04:30:01 | INFO | train_inner | epoch 004: 1674 / 3002 loss=2.657, ppl=6.31, wps=5894.2, ups=0.09, wpb=64858, bsz=128, num_updates=10617, lr=9.99231e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=121876 2021-06-20 04:30:12 | INFO | train_inner | epoch 004: 1675 / 3002 loss=2.583, ppl=5.99, wps=5830.1, ups=0.09, wpb=64745, bsz=128, num_updates=10618, lr=9.9923e-05, gnorm=4.538, loss_scale=8, train_wall=11, gb_free=2.8, wall=121887 2021-06-20 04:30:23 | INFO | train_inner | epoch 004: 1676 / 3002 loss=2.592, ppl=6.03, wps=5943.9, ups=0.09, wpb=64854, bsz=128, num_updates=10619, lr=9.9923e-05, gnorm=2.145, loss_scale=8, train_wall=10, gb_free=2.8, wall=121898 2021-06-20 04:30:34 | INFO | train_inner | epoch 004: 1677 / 3002 loss=2.437, ppl=5.42, wps=5850.3, ups=0.09, wpb=64818, bsz=128, num_updates=10620, lr=9.9923e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=121909 2021-06-20 04:30:45 | INFO | train_inner | epoch 004: 1678 / 3002 loss=2.582, ppl=5.99, wps=5884, ups=0.09, wpb=64867, bsz=128, num_updates=10621, lr=9.9923e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=121920 2021-06-20 04:30:56 | INFO | train_inner | epoch 004: 1679 / 3002 loss=2.664, ppl=6.34, wps=5940.3, ups=0.09, wpb=64842, bsz=128, num_updates=10622, lr=9.9923e-05, gnorm=3.402, loss_scale=8, train_wall=10, gb_free=2.8, wall=121931 2021-06-20 04:31:07 | INFO | train_inner | epoch 004: 1680 / 3002 loss=2.68, ppl=6.41, wps=5775.5, ups=0.09, wpb=64757, bsz=128, num_updates=10623, lr=9.9923e-05, gnorm=2.159, loss_scale=8, train_wall=11, gb_free=2.8, wall=121942 2021-06-20 04:31:18 | INFO | train_inner | epoch 004: 1681 / 3002 loss=2.632, ppl=6.2, wps=5912.2, ups=0.09, wpb=64880, bsz=128, num_updates=10624, lr=9.9923e-05, gnorm=2.111, loss_scale=8, train_wall=11, gb_free=2.8, wall=121953 2021-06-20 04:31:30 | INFO | train_inner | epoch 004: 1682 / 3002 loss=2.74, ppl=6.68, wps=5828.8, ups=0.09, wpb=64898, bsz=128, num_updates=10625, lr=9.9923e-05, gnorm=2.144, loss_scale=8, train_wall=11, gb_free=2.8, wall=121964 2021-06-20 04:31:41 | INFO | train_inner | epoch 004: 1683 / 3002 loss=2.507, ppl=5.69, wps=5808.4, ups=0.09, wpb=64885, bsz=128, num_updates=10626, lr=9.9923e-05, gnorm=2.076, loss_scale=8, train_wall=11, gb_free=2.8, wall=121975 2021-06-20 04:31:52 | INFO | train_inner | epoch 004: 1684 / 3002 loss=2.567, ppl=5.93, wps=5851.3, ups=0.09, wpb=64750, bsz=128, num_updates=10627, lr=9.9923e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=121986 2021-06-20 04:32:03 | INFO | train_inner | epoch 004: 1685 / 3002 loss=2.581, ppl=5.98, wps=5905.6, ups=0.09, wpb=64919, bsz=128, num_updates=10628, lr=9.9923e-05, gnorm=2.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=121997 2021-06-20 04:32:14 | INFO | train_inner | epoch 004: 1686 / 3002 loss=2.478, ppl=5.57, wps=5870.6, ups=0.09, wpb=64791, bsz=128, num_updates=10629, lr=9.9923e-05, gnorm=2.066, loss_scale=8, train_wall=11, gb_free=2.8, wall=122008 2021-06-20 04:32:25 | INFO | train_inner | epoch 004: 1687 / 3002 loss=2.633, ppl=6.2, wps=5841.9, ups=0.09, wpb=64768, bsz=128, num_updates=10630, lr=9.9923e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=122019 2021-06-20 04:32:36 | INFO | train_inner | epoch 004: 1688 / 3002 loss=2.627, ppl=6.18, wps=5957.1, ups=0.09, wpb=64842, bsz=128, num_updates=10631, lr=9.99229e-05, gnorm=1.999, loss_scale=8, train_wall=10, gb_free=2.8, wall=122030 2021-06-20 04:32:47 | INFO | train_inner | epoch 004: 1689 / 3002 loss=2.569, ppl=5.93, wps=5862.2, ups=0.09, wpb=64801, bsz=128, num_updates=10632, lr=9.99229e-05, gnorm=2.135, loss_scale=8, train_wall=11, gb_free=2.8, wall=122041 2021-06-20 04:32:58 | INFO | train_inner | epoch 004: 1690 / 3002 loss=2.44, ppl=5.43, wps=5879, ups=0.09, wpb=64774, bsz=128, num_updates=10633, lr=9.99229e-05, gnorm=2.108, loss_scale=8, train_wall=11, gb_free=2.8, wall=122052 2021-06-20 04:33:09 | INFO | train_inner | epoch 004: 1691 / 3002 loss=2.601, ppl=6.07, wps=5729.9, ups=0.09, wpb=64791, bsz=128, num_updates=10634, lr=9.99229e-05, gnorm=2.133, loss_scale=8, train_wall=11, gb_free=2.8, wall=122064 2021-06-20 04:33:20 | INFO | train_inner | epoch 004: 1692 / 3002 loss=2.502, ppl=5.66, wps=5936.6, ups=0.09, wpb=64844, bsz=128, num_updates=10635, lr=9.99229e-05, gnorm=2.156, loss_scale=8, train_wall=10, gb_free=2.8, wall=122074 2021-06-20 04:33:31 | INFO | train_inner | epoch 004: 1693 / 3002 loss=2.606, ppl=6.09, wps=5824.1, ups=0.09, wpb=64797, bsz=128, num_updates=10636, lr=9.99229e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=122086 2021-06-20 04:33:42 | INFO | train_inner | epoch 004: 1694 / 3002 loss=2.531, ppl=5.78, wps=5863, ups=0.09, wpb=64838, bsz=128, num_updates=10637, lr=9.99229e-05, gnorm=2.081, loss_scale=8, train_wall=11, gb_free=2.8, wall=122097 2021-06-20 04:33:53 | INFO | train_inner | epoch 004: 1695 / 3002 loss=2.737, ppl=6.67, wps=5806.4, ups=0.09, wpb=64808, bsz=128, num_updates=10638, lr=9.99229e-05, gnorm=2.09, loss_scale=8, train_wall=11, gb_free=2.8, wall=122108 2021-06-20 04:34:05 | INFO | train_inner | epoch 004: 1696 / 3002 loss=2.684, ppl=6.42, wps=5818.1, ups=0.09, wpb=64874, bsz=128, num_updates=10639, lr=9.99229e-05, gnorm=2.057, loss_scale=8, train_wall=11, gb_free=2.8, wall=122119 2021-06-20 04:34:16 | INFO | train_inner | epoch 004: 1697 / 3002 loss=2.61, ppl=6.11, wps=5672.3, ups=0.09, wpb=64756, bsz=128, num_updates=10640, lr=9.99229e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=122130 2021-06-20 04:34:27 | INFO | train_inner | epoch 004: 1698 / 3002 loss=2.404, ppl=5.29, wps=5771.3, ups=0.09, wpb=64808, bsz=128, num_updates=10641, lr=9.99229e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=122142 2021-06-20 04:34:38 | INFO | train_inner | epoch 004: 1699 / 3002 loss=2.508, ppl=5.69, wps=5834, ups=0.09, wpb=64801, bsz=128, num_updates=10642, lr=9.99229e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=122153 2021-06-20 04:34:49 | INFO | train_inner | epoch 004: 1700 / 3002 loss=2.631, ppl=6.19, wps=5827.4, ups=0.09, wpb=64733, bsz=128, num_updates=10643, lr=9.99228e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=122164 2021-06-20 04:35:00 | INFO | train_inner | epoch 004: 1701 / 3002 loss=2.513, ppl=5.71, wps=5929.4, ups=0.09, wpb=64875, bsz=128, num_updates=10644, lr=9.99228e-05, gnorm=2.039, loss_scale=8, train_wall=10, gb_free=2.8, wall=122175 2021-06-20 04:35:11 | INFO | train_inner | epoch 004: 1702 / 3002 loss=2.65, ppl=6.28, wps=5864.2, ups=0.09, wpb=64834, bsz=128, num_updates=10645, lr=9.99228e-05, gnorm=2.094, loss_scale=8, train_wall=11, gb_free=2.8, wall=122186 2021-06-20 04:35:23 | INFO | train_inner | epoch 004: 1703 / 3002 loss=2.592, ppl=6.03, wps=5716.4, ups=0.09, wpb=64806, bsz=128, num_updates=10646, lr=9.99228e-05, gnorm=2.069, loss_scale=8, train_wall=11, gb_free=2.8, wall=122197 2021-06-20 04:35:34 | INFO | train_inner | epoch 004: 1704 / 3002 loss=2.642, ppl=6.24, wps=5770.1, ups=0.09, wpb=64893, bsz=128, num_updates=10647, lr=9.99228e-05, gnorm=3.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=122208 2021-06-20 04:35:45 | INFO | train_inner | epoch 004: 1705 / 3002 loss=2.535, ppl=5.8, wps=5849.7, ups=0.09, wpb=64847, bsz=128, num_updates=10648, lr=9.99228e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=122219 2021-06-20 04:35:56 | INFO | train_inner | epoch 004: 1706 / 3002 loss=2.593, ppl=6.03, wps=5832.7, ups=0.09, wpb=64787, bsz=128, num_updates=10649, lr=9.99228e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=122231 2021-06-20 04:36:07 | INFO | train_inner | epoch 004: 1707 / 3002 loss=2.421, ppl=5.36, wps=5933.5, ups=0.09, wpb=64884, bsz=128, num_updates=10650, lr=9.99228e-05, gnorm=2.052, loss_scale=8, train_wall=10, gb_free=2.8, wall=122242 2021-06-20 04:36:18 | INFO | train_inner | epoch 004: 1708 / 3002 loss=2.624, ppl=6.16, wps=5930.6, ups=0.09, wpb=64788, bsz=128, num_updates=10651, lr=9.99228e-05, gnorm=2.128, loss_scale=8, train_wall=10, gb_free=2.8, wall=122252 2021-06-20 04:36:29 | INFO | train_inner | epoch 004: 1709 / 3002 loss=2.653, ppl=6.29, wps=5779.8, ups=0.09, wpb=64819, bsz=128, num_updates=10652, lr=9.99228e-05, gnorm=2.043, loss_scale=8, train_wall=11, gb_free=2.8, wall=122264 2021-06-20 04:36:40 | INFO | train_inner | epoch 004: 1710 / 3002 loss=2.457, ppl=5.49, wps=5918.8, ups=0.09, wpb=64802, bsz=128, num_updates=10653, lr=9.99228e-05, gnorm=1.958, loss_scale=8, train_wall=10, gb_free=2.8, wall=122275 2021-06-20 04:36:51 | INFO | train_inner | epoch 004: 1711 / 3002 loss=2.63, ppl=6.19, wps=5803, ups=0.09, wpb=64800, bsz=128, num_updates=10654, lr=9.99228e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=122286 2021-06-20 04:37:02 | INFO | train_inner | epoch 004: 1712 / 3002 loss=2.553, ppl=5.87, wps=5962.3, ups=0.09, wpb=64854, bsz=128, num_updates=10655, lr=9.99228e-05, gnorm=2.008, loss_scale=8, train_wall=10, gb_free=2.8, wall=122297 2021-06-20 04:37:13 | INFO | train_inner | epoch 004: 1713 / 3002 loss=2.692, ppl=6.46, wps=5797.8, ups=0.09, wpb=64827, bsz=128, num_updates=10656, lr=9.99227e-05, gnorm=2.742, loss_scale=8, train_wall=11, gb_free=2.8, wall=122308 2021-06-20 04:37:24 | INFO | train_inner | epoch 004: 1714 / 3002 loss=2.642, ppl=6.24, wps=5887.6, ups=0.09, wpb=64777, bsz=128, num_updates=10657, lr=9.99227e-05, gnorm=2.14, loss_scale=8, train_wall=11, gb_free=2.8, wall=122319 2021-06-20 04:37:36 | INFO | train_inner | epoch 004: 1715 / 3002 loss=2.386, ppl=5.23, wps=5812.7, ups=0.09, wpb=64831, bsz=128, num_updates=10658, lr=9.99227e-05, gnorm=2.015, loss_scale=8, train_wall=11, gb_free=2.8, wall=122330 2021-06-20 04:37:47 | INFO | train_inner | epoch 004: 1716 / 3002 loss=2.704, ppl=6.52, wps=5903.6, ups=0.09, wpb=64808, bsz=128, num_updates=10659, lr=9.99227e-05, gnorm=2.117, loss_scale=8, train_wall=10, gb_free=2.8, wall=122341 2021-06-20 04:37:58 | INFO | train_inner | epoch 004: 1717 / 3002 loss=2.569, ppl=5.93, wps=5856.7, ups=0.09, wpb=64778, bsz=128, num_updates=10660, lr=9.99227e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=122352 2021-06-20 04:38:09 | INFO | train_inner | epoch 004: 1718 / 3002 loss=2.636, ppl=6.22, wps=5924.1, ups=0.09, wpb=64817, bsz=128, num_updates=10661, lr=9.99227e-05, gnorm=2.069, loss_scale=8, train_wall=10, gb_free=2.8, wall=122363 2021-06-20 04:38:20 | INFO | train_inner | epoch 004: 1719 / 3002 loss=2.658, ppl=6.31, wps=5889, ups=0.09, wpb=64841, bsz=128, num_updates=10662, lr=9.99227e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=122374 2021-06-20 04:38:31 | INFO | train_inner | epoch 004: 1720 / 3002 loss=2.615, ppl=6.13, wps=5849.7, ups=0.09, wpb=64866, bsz=128, num_updates=10663, lr=9.99227e-05, gnorm=2.081, loss_scale=8, train_wall=11, gb_free=2.8, wall=122385 2021-06-20 04:38:42 | INFO | train_inner | epoch 004: 1721 / 3002 loss=2.532, ppl=5.78, wps=5944.8, ups=0.09, wpb=64808, bsz=128, num_updates=10664, lr=9.99227e-05, gnorm=2.221, loss_scale=8, train_wall=10, gb_free=2.8, wall=122396 2021-06-20 04:38:53 | INFO | train_inner | epoch 004: 1722 / 3002 loss=2.689, ppl=6.45, wps=5873.7, ups=0.09, wpb=64894, bsz=128, num_updates=10665, lr=9.99227e-05, gnorm=2.166, loss_scale=8, train_wall=11, gb_free=2.8, wall=122407 2021-06-20 04:39:04 | INFO | train_inner | epoch 004: 1723 / 3002 loss=2.4, ppl=5.28, wps=5870, ups=0.09, wpb=64859, bsz=128, num_updates=10666, lr=9.99227e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=122418 2021-06-20 04:39:14 | INFO | train_inner | epoch 004: 1724 / 3002 loss=2.481, ppl=5.58, wps=6037, ups=0.09, wpb=64920, bsz=128, num_updates=10667, lr=9.99227e-05, gnorm=2.091, loss_scale=8, train_wall=10, gb_free=2.8, wall=122429 2021-06-20 04:39:25 | INFO | train_inner | epoch 004: 1725 / 3002 loss=2.606, ppl=6.09, wps=5945.6, ups=0.09, wpb=64844, bsz=128, num_updates=10668, lr=9.99226e-05, gnorm=3, loss_scale=8, train_wall=10, gb_free=2.8, wall=122440 2021-06-20 04:39:37 | INFO | train_inner | epoch 004: 1726 / 3002 loss=2.527, ppl=5.76, wps=5828.1, ups=0.09, wpb=64842, bsz=128, num_updates=10669, lr=9.99226e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=122451 2021-06-20 04:39:48 | INFO | train_inner | epoch 004: 1727 / 3002 loss=2.451, ppl=5.47, wps=5847.6, ups=0.09, wpb=64805, bsz=128, num_updates=10670, lr=9.99226e-05, gnorm=2.057, loss_scale=8, train_wall=11, gb_free=2.8, wall=122462 2021-06-20 04:39:59 | INFO | train_inner | epoch 004: 1728 / 3002 loss=2.444, ppl=5.44, wps=5864.4, ups=0.09, wpb=64793, bsz=128, num_updates=10671, lr=9.99226e-05, gnorm=9.521, loss_scale=8, train_wall=11, gb_free=2.8, wall=122473 2021-06-20 04:40:10 | INFO | train_inner | epoch 004: 1729 / 3002 loss=2.616, ppl=6.13, wps=5857.6, ups=0.09, wpb=64846, bsz=128, num_updates=10672, lr=9.99226e-05, gnorm=6.097, loss_scale=8, train_wall=11, gb_free=2.8, wall=122484 2021-06-20 04:40:21 | INFO | train_inner | epoch 004: 1730 / 3002 loss=2.494, ppl=5.63, wps=5712.4, ups=0.09, wpb=64755, bsz=128, num_updates=10673, lr=9.99226e-05, gnorm=2.698, loss_scale=8, train_wall=11, gb_free=2.8, wall=122495 2021-06-20 04:40:32 | INFO | train_inner | epoch 004: 1731 / 3002 loss=2.531, ppl=5.78, wps=5760.5, ups=0.09, wpb=64842, bsz=128, num_updates=10674, lr=9.99226e-05, gnorm=2.285, loss_scale=8, train_wall=11, gb_free=2.8, wall=122507 2021-06-20 04:40:44 | INFO | train_inner | epoch 004: 1732 / 3002 loss=2.499, ppl=5.65, wps=5752.2, ups=0.09, wpb=64817, bsz=128, num_updates=10675, lr=9.99226e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=122518 2021-06-20 04:40:54 | INFO | train_inner | epoch 004: 1733 / 3002 loss=2.633, ppl=6.2, wps=5996.3, ups=0.09, wpb=64866, bsz=128, num_updates=10676, lr=9.99226e-05, gnorm=2.144, loss_scale=8, train_wall=10, gb_free=2.8, wall=122529 2021-06-20 04:41:05 | INFO | train_inner | epoch 004: 1734 / 3002 loss=2.665, ppl=6.34, wps=5882.7, ups=0.09, wpb=64805, bsz=128, num_updates=10677, lr=9.99226e-05, gnorm=2.374, loss_scale=8, train_wall=11, gb_free=2.8, wall=122540 2021-06-20 04:41:16 | INFO | train_inner | epoch 004: 1735 / 3002 loss=2.609, ppl=6.1, wps=5892.7, ups=0.09, wpb=64790, bsz=128, num_updates=10678, lr=9.99226e-05, gnorm=2.206, loss_scale=8, train_wall=11, gb_free=2.8, wall=122551 2021-06-20 04:41:27 | INFO | train_inner | epoch 004: 1736 / 3002 loss=2.537, ppl=5.8, wps=5866.8, ups=0.09, wpb=64748, bsz=128, num_updates=10679, lr=9.99226e-05, gnorm=2.324, loss_scale=8, train_wall=11, gb_free=2.8, wall=122562 2021-06-20 04:41:39 | INFO | train_inner | epoch 004: 1737 / 3002 loss=2.634, ppl=6.21, wps=5809.6, ups=0.09, wpb=64802, bsz=128, num_updates=10680, lr=9.99226e-05, gnorm=2.09, loss_scale=8, train_wall=11, gb_free=2.8, wall=122573 2021-06-20 04:41:50 | INFO | train_inner | epoch 004: 1738 / 3002 loss=2.657, ppl=6.31, wps=5908.5, ups=0.09, wpb=64832, bsz=128, num_updates=10681, lr=9.99225e-05, gnorm=2.292, loss_scale=8, train_wall=11, gb_free=2.8, wall=122584 2021-06-20 04:42:01 | INFO | train_inner | epoch 004: 1739 / 3002 loss=2.562, ppl=5.9, wps=5732.9, ups=0.09, wpb=64842, bsz=128, num_updates=10682, lr=9.99225e-05, gnorm=2.312, loss_scale=8, train_wall=11, gb_free=2.8, wall=122595 2021-06-20 04:42:12 | INFO | train_inner | epoch 004: 1740 / 3002 loss=2.55, ppl=5.85, wps=5732.2, ups=0.09, wpb=64749, bsz=128, num_updates=10683, lr=9.99225e-05, gnorm=2.163, loss_scale=8, train_wall=11, gb_free=2.8, wall=122607 2021-06-20 04:42:23 | INFO | train_inner | epoch 004: 1741 / 3002 loss=2.727, ppl=6.62, wps=5881.8, ups=0.09, wpb=64789, bsz=128, num_updates=10684, lr=9.99225e-05, gnorm=2.128, loss_scale=8, train_wall=11, gb_free=2.8, wall=122618 2021-06-20 04:42:34 | INFO | train_inner | epoch 004: 1742 / 3002 loss=2.576, ppl=5.96, wps=5881.5, ups=0.09, wpb=64810, bsz=128, num_updates=10685, lr=9.99225e-05, gnorm=2.101, loss_scale=8, train_wall=11, gb_free=2.8, wall=122629 2021-06-20 04:42:45 | INFO | train_inner | epoch 004: 1743 / 3002 loss=2.698, ppl=6.49, wps=5821, ups=0.09, wpb=64797, bsz=128, num_updates=10686, lr=9.99225e-05, gnorm=2.064, loss_scale=8, train_wall=11, gb_free=2.8, wall=122640 2021-06-20 04:42:56 | INFO | train_inner | epoch 004: 1744 / 3002 loss=2.487, ppl=5.61, wps=5907.5, ups=0.09, wpb=64855, bsz=128, num_updates=10687, lr=9.99225e-05, gnorm=2.118, loss_scale=8, train_wall=10, gb_free=2.8, wall=122651 2021-06-20 04:43:08 | INFO | train_inner | epoch 004: 1745 / 3002 loss=2.652, ppl=6.29, wps=5699.1, ups=0.09, wpb=64853, bsz=128, num_updates=10688, lr=9.99225e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=122662 2021-06-20 04:43:19 | INFO | train_inner | epoch 004: 1746 / 3002 loss=2.642, ppl=6.24, wps=5752, ups=0.09, wpb=64832, bsz=128, num_updates=10689, lr=9.99225e-05, gnorm=2.167, loss_scale=8, train_wall=11, gb_free=2.8, wall=122673 2021-06-20 04:43:30 | INFO | train_inner | epoch 004: 1747 / 3002 loss=2.628, ppl=6.18, wps=5861.5, ups=0.09, wpb=64802, bsz=128, num_updates=10690, lr=9.99225e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=122684 2021-06-20 04:43:41 | INFO | train_inner | epoch 004: 1748 / 3002 loss=2.6, ppl=6.06, wps=5784, ups=0.09, wpb=64857, bsz=128, num_updates=10691, lr=9.99225e-05, gnorm=2.398, loss_scale=8, train_wall=11, gb_free=2.8, wall=122696 2021-06-20 04:43:52 | INFO | train_inner | epoch 004: 1749 / 3002 loss=2.521, ppl=5.74, wps=6137.5, ups=0.09, wpb=64906, bsz=128, num_updates=10692, lr=9.99225e-05, gnorm=2.322, loss_scale=8, train_wall=10, gb_free=2.8, wall=122706 2021-06-20 04:44:03 | INFO | train_inner | epoch 004: 1750 / 3002 loss=2.683, ppl=6.42, wps=5805, ups=0.09, wpb=64827, bsz=128, num_updates=10693, lr=9.99224e-05, gnorm=2.146, loss_scale=8, train_wall=11, gb_free=2.8, wall=122717 2021-06-20 04:44:14 | INFO | train_inner | epoch 004: 1751 / 3002 loss=2.663, ppl=6.33, wps=5801, ups=0.09, wpb=64820, bsz=128, num_updates=10694, lr=9.99224e-05, gnorm=2.039, loss_scale=8, train_wall=11, gb_free=2.8, wall=122728 2021-06-20 04:44:25 | INFO | train_inner | epoch 004: 1752 / 3002 loss=2.631, ppl=6.19, wps=5830.4, ups=0.09, wpb=64771, bsz=128, num_updates=10695, lr=9.99224e-05, gnorm=2.101, loss_scale=8, train_wall=11, gb_free=2.8, wall=122740 2021-06-20 04:44:36 | INFO | train_inner | epoch 004: 1753 / 3002 loss=2.446, ppl=5.45, wps=5828.5, ups=0.09, wpb=64814, bsz=128, num_updates=10696, lr=9.99224e-05, gnorm=2.133, loss_scale=8, train_wall=11, gb_free=2.8, wall=122751 2021-06-20 04:44:47 | INFO | train_inner | epoch 004: 1754 / 3002 loss=2.588, ppl=6.01, wps=5899.5, ups=0.09, wpb=64871, bsz=128, num_updates=10697, lr=9.99224e-05, gnorm=2.118, loss_scale=8, train_wall=11, gb_free=2.8, wall=122762 2021-06-20 04:44:59 | INFO | train_inner | epoch 004: 1755 / 3002 loss=2.397, ppl=5.27, wps=5818.3, ups=0.09, wpb=64879, bsz=128, num_updates=10698, lr=9.99224e-05, gnorm=3.268, loss_scale=8, train_wall=11, gb_free=2.8, wall=122773 2021-06-20 04:45:10 | INFO | train_inner | epoch 004: 1756 / 3002 loss=2.569, ppl=5.93, wps=5817.6, ups=0.09, wpb=64879, bsz=128, num_updates=10699, lr=9.99224e-05, gnorm=2.012, loss_scale=8, train_wall=11, gb_free=2.8, wall=122784 2021-06-20 04:45:21 | INFO | train_inner | epoch 004: 1757 / 3002 loss=2.559, ppl=5.89, wps=5895.2, ups=0.09, wpb=64846, bsz=128, num_updates=10700, lr=9.99224e-05, gnorm=2.112, loss_scale=8, train_wall=11, gb_free=2.8, wall=122795 2021-06-20 04:45:32 | INFO | train_inner | epoch 004: 1758 / 3002 loss=2.44, ppl=5.43, wps=5884.5, ups=0.09, wpb=64881, bsz=128, num_updates=10701, lr=9.99224e-05, gnorm=2.142, loss_scale=8, train_wall=11, gb_free=2.8, wall=122806 2021-06-20 04:45:43 | INFO | train_inner | epoch 004: 1759 / 3002 loss=2.515, ppl=5.72, wps=5821.1, ups=0.09, wpb=64827, bsz=128, num_updates=10702, lr=9.99224e-05, gnorm=2.11, loss_scale=8, train_wall=11, gb_free=2.8, wall=122817 2021-06-20 04:45:54 | INFO | train_inner | epoch 004: 1760 / 3002 loss=2.382, ppl=5.21, wps=5965, ups=0.09, wpb=64857, bsz=128, num_updates=10703, lr=9.99224e-05, gnorm=2.002, loss_scale=8, train_wall=10, gb_free=2.8, wall=122828 2021-06-20 04:46:05 | INFO | train_inner | epoch 004: 1761 / 3002 loss=2.651, ppl=6.28, wps=5873.6, ups=0.09, wpb=64773, bsz=128, num_updates=10704, lr=9.99224e-05, gnorm=2.135, loss_scale=8, train_wall=11, gb_free=2.8, wall=122839 2021-06-20 04:46:16 | INFO | train_inner | epoch 004: 1762 / 3002 loss=2.515, ppl=5.71, wps=5973.1, ups=0.09, wpb=64848, bsz=128, num_updates=10705, lr=9.99224e-05, gnorm=2.272, loss_scale=8, train_wall=10, gb_free=2.8, wall=122850 2021-06-20 04:46:27 | INFO | train_inner | epoch 004: 1763 / 3002 loss=2.576, ppl=5.96, wps=5910.8, ups=0.09, wpb=64887, bsz=128, num_updates=10706, lr=9.99223e-05, gnorm=2.481, loss_scale=8, train_wall=11, gb_free=2.8, wall=122861 2021-06-20 04:46:38 | INFO | train_inner | epoch 004: 1764 / 3002 loss=2.662, ppl=6.33, wps=5897.5, ups=0.09, wpb=64857, bsz=128, num_updates=10707, lr=9.99223e-05, gnorm=2.125, loss_scale=8, train_wall=11, gb_free=2.8, wall=122872 2021-06-20 04:46:49 | INFO | train_inner | epoch 004: 1765 / 3002 loss=2.616, ppl=6.13, wps=5873.7, ups=0.09, wpb=64846, bsz=128, num_updates=10708, lr=9.99223e-05, gnorm=2.051, loss_scale=8, train_wall=11, gb_free=2.8, wall=122883 2021-06-20 04:47:00 | INFO | train_inner | epoch 004: 1766 / 3002 loss=2.596, ppl=6.05, wps=5960.9, ups=0.09, wpb=64872, bsz=128, num_updates=10709, lr=9.99223e-05, gnorm=2.038, loss_scale=16, train_wall=10, gb_free=2.8, wall=122894 2021-06-20 04:47:11 | INFO | train_inner | epoch 004: 1767 / 3002 loss=2.533, ppl=5.79, wps=5861.7, ups=0.09, wpb=64872, bsz=128, num_updates=10710, lr=9.99223e-05, gnorm=2.085, loss_scale=16, train_wall=11, gb_free=2.8, wall=122905 2021-06-20 04:47:22 | INFO | train_inner | epoch 004: 1768 / 3002 loss=2.615, ppl=6.12, wps=5913.7, ups=0.09, wpb=64896, bsz=128, num_updates=10711, lr=9.99223e-05, gnorm=2.121, loss_scale=16, train_wall=11, gb_free=2.8, wall=122916 2021-06-20 04:47:33 | INFO | train_inner | epoch 004: 1769 / 3002 loss=2.451, ppl=5.47, wps=5830.6, ups=0.09, wpb=64774, bsz=128, num_updates=10712, lr=9.99223e-05, gnorm=2.021, loss_scale=16, train_wall=11, gb_free=2.8, wall=122927 2021-06-20 04:47:44 | INFO | train_inner | epoch 004: 1770 / 3002 loss=2.602, ppl=6.07, wps=5755.7, ups=0.09, wpb=64811, bsz=128, num_updates=10713, lr=9.99223e-05, gnorm=2.018, loss_scale=16, train_wall=11, gb_free=2.8, wall=122938 2021-06-20 04:47:55 | INFO | train_inner | epoch 004: 1771 / 3002 loss=2.497, ppl=5.64, wps=5770.5, ups=0.09, wpb=64852, bsz=128, num_updates=10714, lr=9.99223e-05, gnorm=2.043, loss_scale=16, train_wall=11, gb_free=2.8, wall=122949 2021-06-20 04:48:06 | INFO | train_inner | epoch 004: 1772 / 3002 loss=2.71, ppl=6.54, wps=5799.3, ups=0.09, wpb=64775, bsz=128, num_updates=10715, lr=9.99223e-05, gnorm=2.278, loss_scale=16, train_wall=11, gb_free=2.8, wall=122961 2021-06-20 04:48:17 | INFO | train_inner | epoch 004: 1773 / 3002 loss=2.561, ppl=5.9, wps=5806.4, ups=0.09, wpb=64846, bsz=128, num_updates=10716, lr=9.99223e-05, gnorm=3.744, loss_scale=16, train_wall=11, gb_free=2.8, wall=122972 2021-06-20 04:48:29 | INFO | train_inner | epoch 004: 1774 / 3002 loss=2.657, ppl=6.31, wps=5745.4, ups=0.09, wpb=64802, bsz=128, num_updates=10717, lr=9.99223e-05, gnorm=2.205, loss_scale=16, train_wall=11, gb_free=2.8, wall=122983 2021-06-20 04:48:40 | INFO | train_inner | epoch 004: 1775 / 3002 loss=2.608, ppl=6.1, wps=5858.8, ups=0.09, wpb=64826, bsz=128, num_updates=10718, lr=9.99222e-05, gnorm=2.138, loss_scale=16, train_wall=11, gb_free=2.8, wall=122994 2021-06-20 04:48:51 | INFO | train_inner | epoch 004: 1776 / 3002 loss=2.564, ppl=5.91, wps=5894.1, ups=0.09, wpb=64897, bsz=128, num_updates=10719, lr=9.99222e-05, gnorm=2.078, loss_scale=16, train_wall=11, gb_free=2.8, wall=123005 2021-06-20 04:49:02 | INFO | train_inner | epoch 004: 1777 / 3002 loss=2.435, ppl=5.41, wps=5872.8, ups=0.09, wpb=64802, bsz=128, num_updates=10720, lr=9.99222e-05, gnorm=2.012, loss_scale=16, train_wall=11, gb_free=2.8, wall=123016 2021-06-20 04:49:13 | INFO | train_inner | epoch 004: 1778 / 3002 loss=2.608, ppl=6.1, wps=5916.1, ups=0.09, wpb=64887, bsz=128, num_updates=10721, lr=9.99222e-05, gnorm=2.039, loss_scale=16, train_wall=11, gb_free=2.8, wall=123027 2021-06-20 04:49:24 | INFO | train_inner | epoch 004: 1779 / 3002 loss=2.489, ppl=5.61, wps=5917, ups=0.09, wpb=64814, bsz=128, num_updates=10722, lr=9.99222e-05, gnorm=1.947, loss_scale=16, train_wall=10, gb_free=2.8, wall=123038 2021-06-20 04:49:35 | INFO | train_inner | epoch 004: 1780 / 3002 loss=2.578, ppl=5.97, wps=5936.4, ups=0.09, wpb=64914, bsz=128, num_updates=10723, lr=9.99222e-05, gnorm=1.994, loss_scale=16, train_wall=11, gb_free=2.8, wall=123049 2021-06-20 04:49:46 | INFO | train_inner | epoch 004: 1781 / 3002 loss=2.605, ppl=6.08, wps=5845.2, ups=0.09, wpb=64799, bsz=128, num_updates=10724, lr=9.99222e-05, gnorm=2.082, loss_scale=16, train_wall=11, gb_free=2.8, wall=123060 2021-06-20 04:49:57 | INFO | train_inner | epoch 004: 1782 / 3002 loss=2.628, ppl=6.18, wps=6016.6, ups=0.09, wpb=64787, bsz=128, num_updates=10725, lr=9.99222e-05, gnorm=2.004, loss_scale=16, train_wall=10, gb_free=2.8, wall=123071 2021-06-20 04:50:08 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 04:50:19 | INFO | train_inner | epoch 004: 1784 / 3002 loss=2.495, ppl=5.64, wps=2941.9, ups=0.05, wpb=64766, bsz=128, num_updates=10726, lr=9.99222e-05, gnorm=2.084, loss_scale=8, train_wall=21, gb_free=2.8, wall=123093 2021-06-20 04:50:30 | INFO | train_inner | epoch 004: 1785 / 3002 loss=2.479, ppl=5.57, wps=5884, ups=0.09, wpb=64842, bsz=128, num_updates=10727, lr=9.99222e-05, gnorm=2.503, loss_scale=8, train_wall=11, gb_free=2.8, wall=123104 2021-06-20 04:50:41 | INFO | train_inner | epoch 004: 1786 / 3002 loss=2.541, ppl=5.82, wps=5773.5, ups=0.09, wpb=64902, bsz=128, num_updates=10728, lr=9.99222e-05, gnorm=2.261, loss_scale=8, train_wall=11, gb_free=2.8, wall=123115 2021-06-20 04:50:52 | INFO | train_inner | epoch 004: 1787 / 3002 loss=2.593, ppl=6.03, wps=5805.5, ups=0.09, wpb=64871, bsz=128, num_updates=10729, lr=9.99222e-05, gnorm=2.091, loss_scale=8, train_wall=11, gb_free=2.8, wall=123126 2021-06-20 04:51:03 | INFO | train_inner | epoch 004: 1788 / 3002 loss=2.573, ppl=5.95, wps=5908.2, ups=0.09, wpb=64855, bsz=128, num_updates=10730, lr=9.99222e-05, gnorm=2.107, loss_scale=8, train_wall=11, gb_free=2.8, wall=123137 2021-06-20 04:51:14 | INFO | train_inner | epoch 004: 1789 / 3002 loss=2.524, ppl=5.75, wps=5880.3, ups=0.09, wpb=64823, bsz=128, num_updates=10731, lr=9.99221e-05, gnorm=2.118, loss_scale=8, train_wall=11, gb_free=2.8, wall=123148 2021-06-20 04:51:25 | INFO | train_inner | epoch 004: 1790 / 3002 loss=2.531, ppl=5.78, wps=5872.3, ups=0.09, wpb=64764, bsz=128, num_updates=10732, lr=9.99221e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=123159 2021-06-20 04:51:36 | INFO | train_inner | epoch 004: 1791 / 3002 loss=2.528, ppl=5.77, wps=5861, ups=0.09, wpb=64810, bsz=128, num_updates=10733, lr=9.99221e-05, gnorm=2.186, loss_scale=8, train_wall=11, gb_free=2.8, wall=123170 2021-06-20 04:51:47 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-20 04:51:58 | INFO | train_inner | epoch 004: 1793 / 3002 loss=2.55, ppl=5.86, wps=2949.6, ups=0.05, wpb=64794, bsz=128, num_updates=10734, lr=9.99221e-05, gnorm=2.061, loss_scale=4, train_wall=21, gb_free=2.8, wall=123192 2021-06-20 04:52:09 | INFO | train_inner | epoch 004: 1794 / 3002 loss=2.612, ppl=6.11, wps=5861.2, ups=0.09, wpb=64832, bsz=128, num_updates=10735, lr=9.99221e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=123203 2021-06-20 04:52:20 | INFO | train_inner | epoch 004: 1795 / 3002 loss=2.509, ppl=5.69, wps=5827.2, ups=0.09, wpb=64805, bsz=128, num_updates=10736, lr=9.99221e-05, gnorm=2.522, loss_scale=4, train_wall=11, gb_free=2.8, wall=123215 2021-06-20 04:52:32 | INFO | train_inner | epoch 004: 1796 / 3002 loss=2.482, ppl=5.59, wps=5746.2, ups=0.09, wpb=64817, bsz=128, num_updates=10737, lr=9.99221e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=123226 2021-06-20 04:52:43 | INFO | train_inner | epoch 004: 1797 / 3002 loss=2.425, ppl=5.37, wps=5801.5, ups=0.09, wpb=64841, bsz=128, num_updates=10738, lr=9.99221e-05, gnorm=2.069, loss_scale=4, train_wall=11, gb_free=2.8, wall=123237 2021-06-20 04:52:54 | INFO | train_inner | epoch 004: 1798 / 3002 loss=2.643, ppl=6.25, wps=5868.7, ups=0.09, wpb=64755, bsz=128, num_updates=10739, lr=9.99221e-05, gnorm=5.98, loss_scale=4, train_wall=11, gb_free=2.8, wall=123248 2021-06-20 04:53:05 | INFO | train_inner | epoch 004: 1799 / 3002 loss=2.568, ppl=5.93, wps=5828.8, ups=0.09, wpb=64735, bsz=128, num_updates=10740, lr=9.99221e-05, gnorm=2.09, loss_scale=4, train_wall=11, gb_free=2.8, wall=123259 2021-06-20 04:53:16 | INFO | train_inner | epoch 004: 1800 / 3002 loss=2.547, ppl=5.84, wps=5796.8, ups=0.09, wpb=64839, bsz=128, num_updates=10741, lr=9.99221e-05, gnorm=1.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=123270 2021-06-20 04:53:27 | INFO | train_inner | epoch 004: 1801 / 3002 loss=2.542, ppl=5.82, wps=5800.3, ups=0.09, wpb=64912, bsz=128, num_updates=10742, lr=9.99221e-05, gnorm=2.535, loss_scale=4, train_wall=11, gb_free=2.8, wall=123282 2021-06-20 04:53:38 | INFO | train_inner | epoch 004: 1802 / 3002 loss=2.565, ppl=5.92, wps=5862.3, ups=0.09, wpb=64773, bsz=128, num_updates=10743, lr=9.9922e-05, gnorm=2.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=123293 2021-06-20 04:53:49 | INFO | train_inner | epoch 004: 1803 / 3002 loss=2.429, ppl=5.38, wps=5827.5, ups=0.09, wpb=64775, bsz=128, num_updates=10744, lr=9.9922e-05, gnorm=2.102, loss_scale=4, train_wall=11, gb_free=2.8, wall=123304 2021-06-20 04:54:00 | INFO | train_inner | epoch 004: 1804 / 3002 loss=2.607, ppl=6.09, wps=5864, ups=0.09, wpb=64822, bsz=128, num_updates=10745, lr=9.9922e-05, gnorm=2.462, loss_scale=4, train_wall=11, gb_free=2.8, wall=123315 2021-06-20 04:54:12 | INFO | train_inner | epoch 004: 1805 / 3002 loss=2.499, ppl=5.65, wps=5790.3, ups=0.09, wpb=64781, bsz=128, num_updates=10746, lr=9.9922e-05, gnorm=2.065, loss_scale=4, train_wall=11, gb_free=2.8, wall=123326 2021-06-20 04:54:23 | INFO | train_inner | epoch 004: 1806 / 3002 loss=2.513, ppl=5.71, wps=5788, ups=0.09, wpb=64828, bsz=128, num_updates=10747, lr=9.9922e-05, gnorm=2.075, loss_scale=4, train_wall=11, gb_free=2.8, wall=123337 2021-06-20 04:54:34 | INFO | train_inner | epoch 004: 1807 / 3002 loss=2.614, ppl=6.12, wps=5920.6, ups=0.09, wpb=64865, bsz=128, num_updates=10748, lr=9.9922e-05, gnorm=2.083, loss_scale=4, train_wall=11, gb_free=2.8, wall=123348 2021-06-20 04:54:45 | INFO | train_inner | epoch 004: 1808 / 3002 loss=2.529, ppl=5.77, wps=5870, ups=0.09, wpb=64882, bsz=128, num_updates=10749, lr=9.9922e-05, gnorm=2.103, loss_scale=4, train_wall=11, gb_free=2.8, wall=123359 2021-06-20 04:54:56 | INFO | train_inner | epoch 004: 1809 / 3002 loss=2.463, ppl=5.51, wps=5925.9, ups=0.09, wpb=64881, bsz=128, num_updates=10750, lr=9.9922e-05, gnorm=2.044, loss_scale=4, train_wall=10, gb_free=2.8, wall=123370 2021-06-20 04:55:07 | INFO | train_inner | epoch 004: 1810 / 3002 loss=2.739, ppl=6.68, wps=5884.3, ups=0.09, wpb=64797, bsz=128, num_updates=10751, lr=9.9922e-05, gnorm=2.052, loss_scale=4, train_wall=11, gb_free=2.8, wall=123381 2021-06-20 04:55:18 | INFO | train_inner | epoch 004: 1811 / 3002 loss=2.569, ppl=5.93, wps=5832.4, ups=0.09, wpb=64827, bsz=128, num_updates=10752, lr=9.9922e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=123392 2021-06-20 04:55:29 | INFO | train_inner | epoch 004: 1812 / 3002 loss=2.577, ppl=5.97, wps=5771.7, ups=0.09, wpb=64836, bsz=128, num_updates=10753, lr=9.9922e-05, gnorm=8.358, loss_scale=4, train_wall=11, gb_free=2.8, wall=123404 2021-06-20 04:55:40 | INFO | train_inner | epoch 004: 1813 / 3002 loss=2.657, ppl=6.31, wps=5864.7, ups=0.09, wpb=64881, bsz=128, num_updates=10754, lr=9.9922e-05, gnorm=1.997, loss_scale=4, train_wall=11, gb_free=2.8, wall=123415 2021-06-20 04:55:51 | INFO | train_inner | epoch 004: 1814 / 3002 loss=2.661, ppl=6.33, wps=5884.2, ups=0.09, wpb=64859, bsz=128, num_updates=10755, lr=9.9922e-05, gnorm=2.113, loss_scale=4, train_wall=11, gb_free=2.8, wall=123426 2021-06-20 04:56:03 | INFO | train_inner | epoch 004: 1815 / 3002 loss=2.677, ppl=6.4, wps=5732.1, ups=0.09, wpb=64795, bsz=128, num_updates=10756, lr=9.99219e-05, gnorm=2.089, loss_scale=4, train_wall=11, gb_free=2.8, wall=123437 2021-06-20 04:56:14 | INFO | train_inner | epoch 004: 1816 / 3002 loss=2.607, ppl=6.09, wps=5832.2, ups=0.09, wpb=64789, bsz=128, num_updates=10757, lr=9.99219e-05, gnorm=2.141, loss_scale=4, train_wall=11, gb_free=2.8, wall=123448 2021-06-20 04:56:25 | INFO | train_inner | epoch 004: 1817 / 3002 loss=2.718, ppl=6.58, wps=5825.9, ups=0.09, wpb=64830, bsz=128, num_updates=10758, lr=9.99219e-05, gnorm=2.071, loss_scale=4, train_wall=11, gb_free=2.8, wall=123459 2021-06-20 04:56:36 | INFO | train_inner | epoch 004: 1818 / 3002 loss=2.691, ppl=6.46, wps=5815.8, ups=0.09, wpb=64851, bsz=128, num_updates=10759, lr=9.99219e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=123470 2021-06-20 04:56:47 | INFO | train_inner | epoch 004: 1819 / 3002 loss=2.469, ppl=5.53, wps=5820.1, ups=0.09, wpb=64822, bsz=128, num_updates=10760, lr=9.99219e-05, gnorm=2.075, loss_scale=4, train_wall=11, gb_free=2.8, wall=123481 2021-06-20 04:56:58 | INFO | train_inner | epoch 004: 1820 / 3002 loss=2.463, ppl=5.51, wps=5897.1, ups=0.09, wpb=64867, bsz=128, num_updates=10761, lr=9.99219e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=123492 2021-06-20 04:57:09 | INFO | train_inner | epoch 004: 1821 / 3002 loss=2.42, ppl=5.35, wps=5900.2, ups=0.09, wpb=64787, bsz=128, num_updates=10762, lr=9.99219e-05, gnorm=2.008, loss_scale=4, train_wall=10, gb_free=2.8, wall=123503 2021-06-20 04:57:20 | INFO | train_inner | epoch 004: 1822 / 3002 loss=2.529, ppl=5.77, wps=5853.3, ups=0.09, wpb=64832, bsz=128, num_updates=10763, lr=9.99219e-05, gnorm=2.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=123514 2021-06-20 04:57:31 | INFO | train_inner | epoch 004: 1823 / 3002 loss=2.442, ppl=5.43, wps=5765.5, ups=0.09, wpb=64826, bsz=128, num_updates=10764, lr=9.99219e-05, gnorm=2.072, loss_scale=4, train_wall=11, gb_free=2.8, wall=123526 2021-06-20 04:57:43 | INFO | train_inner | epoch 004: 1824 / 3002 loss=2.719, ppl=6.58, wps=5763, ups=0.09, wpb=64845, bsz=128, num_updates=10765, lr=9.99219e-05, gnorm=2.069, loss_scale=4, train_wall=11, gb_free=2.8, wall=123537 2021-06-20 04:57:54 | INFO | train_inner | epoch 004: 1825 / 3002 loss=2.511, ppl=5.7, wps=5942.6, ups=0.09, wpb=64884, bsz=128, num_updates=10766, lr=9.99219e-05, gnorm=2.087, loss_scale=4, train_wall=10, gb_free=2.8, wall=123548 2021-06-20 04:58:05 | INFO | train_inner | epoch 004: 1826 / 3002 loss=2.551, ppl=5.86, wps=5728.7, ups=0.09, wpb=64824, bsz=128, num_updates=10767, lr=9.99219e-05, gnorm=2.109, loss_scale=4, train_wall=11, gb_free=2.8, wall=123559 2021-06-20 04:58:16 | INFO | train_inner | epoch 004: 1827 / 3002 loss=2.565, ppl=5.92, wps=5876.6, ups=0.09, wpb=64819, bsz=128, num_updates=10768, lr=9.99218e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=123570 2021-06-20 04:58:27 | INFO | train_inner | epoch 004: 1828 / 3002 loss=2.478, ppl=5.57, wps=5887.6, ups=0.09, wpb=64782, bsz=128, num_updates=10769, lr=9.99218e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=123581 2021-06-20 04:58:38 | INFO | train_inner | epoch 004: 1829 / 3002 loss=2.527, ppl=5.76, wps=5795.4, ups=0.09, wpb=64895, bsz=128, num_updates=10770, lr=9.99218e-05, gnorm=2.085, loss_scale=4, train_wall=11, gb_free=2.8, wall=123592 2021-06-20 04:58:49 | INFO | train_inner | epoch 004: 1830 / 3002 loss=2.543, ppl=5.83, wps=5848.1, ups=0.09, wpb=64807, bsz=128, num_updates=10771, lr=9.99218e-05, gnorm=2.036, loss_scale=4, train_wall=11, gb_free=2.8, wall=123604 2021-06-20 04:59:00 | INFO | train_inner | epoch 004: 1831 / 3002 loss=2.587, ppl=6.01, wps=5873.4, ups=0.09, wpb=64892, bsz=128, num_updates=10772, lr=9.99218e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=123615 2021-06-20 04:59:11 | INFO | train_inner | epoch 004: 1832 / 3002 loss=2.812, ppl=7.02, wps=5759.5, ups=0.09, wpb=64756, bsz=128, num_updates=10773, lr=9.99218e-05, gnorm=2.069, loss_scale=4, train_wall=11, gb_free=2.8, wall=123626 2021-06-20 04:59:22 | INFO | train_inner | epoch 004: 1833 / 3002 loss=2.481, ppl=5.58, wps=5894.5, ups=0.09, wpb=64795, bsz=128, num_updates=10774, lr=9.99218e-05, gnorm=2.107, loss_scale=4, train_wall=11, gb_free=2.8, wall=123637 2021-06-20 04:59:34 | INFO | train_inner | epoch 004: 1834 / 3002 loss=2.594, ppl=6.04, wps=5781.9, ups=0.09, wpb=64904, bsz=128, num_updates=10775, lr=9.99218e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=123648 2021-06-20 04:59:45 | INFO | train_inner | epoch 004: 1835 / 3002 loss=2.546, ppl=5.84, wps=5801.4, ups=0.09, wpb=64779, bsz=128, num_updates=10776, lr=9.99218e-05, gnorm=2.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=123659 2021-06-20 04:59:56 | INFO | train_inner | epoch 004: 1836 / 3002 loss=2.565, ppl=5.92, wps=5834.4, ups=0.09, wpb=64783, bsz=128, num_updates=10777, lr=9.99218e-05, gnorm=2.084, loss_scale=4, train_wall=11, gb_free=2.8, wall=123670 2021-06-20 05:00:07 | INFO | train_inner | epoch 004: 1837 / 3002 loss=2.565, ppl=5.92, wps=6062.8, ups=0.09, wpb=64818, bsz=128, num_updates=10778, lr=9.99218e-05, gnorm=1.939, loss_scale=4, train_wall=10, gb_free=2.8, wall=123681 2021-06-20 05:00:18 | INFO | train_inner | epoch 004: 1838 / 3002 loss=2.457, ppl=5.49, wps=5963.4, ups=0.09, wpb=64914, bsz=128, num_updates=10779, lr=9.99218e-05, gnorm=1.989, loss_scale=4, train_wall=10, gb_free=2.8, wall=123692 2021-06-20 05:00:29 | INFO | train_inner | epoch 004: 1839 / 3002 loss=2.446, ppl=5.45, wps=5834.6, ups=0.09, wpb=64899, bsz=128, num_updates=10780, lr=9.99218e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=123703 2021-06-20 05:00:39 | INFO | train_inner | epoch 004: 1840 / 3002 loss=2.65, ppl=6.28, wps=6009.2, ups=0.09, wpb=64746, bsz=128, num_updates=10781, lr=9.99217e-05, gnorm=2.009, loss_scale=4, train_wall=10, gb_free=2.8, wall=123714 2021-06-20 05:00:51 | INFO | train_inner | epoch 004: 1841 / 3002 loss=2.474, ppl=5.56, wps=5768.3, ups=0.09, wpb=64853, bsz=128, num_updates=10782, lr=9.99217e-05, gnorm=2.047, loss_scale=4, train_wall=11, gb_free=2.8, wall=123725 2021-06-20 05:01:02 | INFO | train_inner | epoch 004: 1842 / 3002 loss=2.622, ppl=6.15, wps=5855.9, ups=0.09, wpb=64821, bsz=128, num_updates=10783, lr=9.99217e-05, gnorm=2.076, loss_scale=4, train_wall=11, gb_free=2.8, wall=123736 2021-06-20 05:01:13 | INFO | train_inner | epoch 004: 1843 / 3002 loss=2.523, ppl=5.75, wps=5971.4, ups=0.09, wpb=64825, bsz=128, num_updates=10784, lr=9.99217e-05, gnorm=2.066, loss_scale=4, train_wall=10, gb_free=2.8, wall=123747 2021-06-20 05:01:24 | INFO | train_inner | epoch 004: 1844 / 3002 loss=2.555, ppl=5.88, wps=5725.5, ups=0.09, wpb=64758, bsz=128, num_updates=10785, lr=9.99217e-05, gnorm=2.094, loss_scale=4, train_wall=11, gb_free=2.8, wall=123758 2021-06-20 05:01:35 | INFO | train_inner | epoch 004: 1845 / 3002 loss=2.519, ppl=5.73, wps=5921.3, ups=0.09, wpb=64879, bsz=128, num_updates=10786, lr=9.99217e-05, gnorm=1.951, loss_scale=4, train_wall=11, gb_free=2.8, wall=123769 2021-06-20 05:01:46 | INFO | train_inner | epoch 004: 1846 / 3002 loss=2.672, ppl=6.37, wps=5777.4, ups=0.09, wpb=64823, bsz=128, num_updates=10787, lr=9.99217e-05, gnorm=1.943, loss_scale=4, train_wall=11, gb_free=2.8, wall=123780 2021-06-20 05:01:57 | INFO | train_inner | epoch 004: 1847 / 3002 loss=2.538, ppl=5.81, wps=5794.3, ups=0.09, wpb=64899, bsz=128, num_updates=10788, lr=9.99217e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=123792 2021-06-20 05:02:08 | INFO | train_inner | epoch 004: 1848 / 3002 loss=2.507, ppl=5.68, wps=5883.7, ups=0.09, wpb=64838, bsz=128, num_updates=10789, lr=9.99217e-05, gnorm=2.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=123803 2021-06-20 05:02:19 | INFO | train_inner | epoch 004: 1849 / 3002 loss=2.505, ppl=5.67, wps=5866.3, ups=0.09, wpb=64883, bsz=128, num_updates=10790, lr=9.99217e-05, gnorm=2.014, loss_scale=4, train_wall=11, gb_free=2.8, wall=123814 2021-06-20 05:02:30 | INFO | train_inner | epoch 004: 1850 / 3002 loss=2.701, ppl=6.5, wps=5881.7, ups=0.09, wpb=64835, bsz=128, num_updates=10791, lr=9.99217e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=123825 2021-06-20 05:02:41 | INFO | train_inner | epoch 004: 1851 / 3002 loss=2.524, ppl=5.75, wps=5859, ups=0.09, wpb=64795, bsz=128, num_updates=10792, lr=9.99217e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=123836 2021-06-20 05:02:53 | INFO | train_inner | epoch 004: 1852 / 3002 loss=2.649, ppl=6.27, wps=5881.6, ups=0.09, wpb=64852, bsz=128, num_updates=10793, lr=9.99216e-05, gnorm=2.096, loss_scale=4, train_wall=11, gb_free=2.8, wall=123847 2021-06-20 05:03:03 | INFO | train_inner | epoch 004: 1853 / 3002 loss=2.598, ppl=6.05, wps=5967, ups=0.09, wpb=64809, bsz=128, num_updates=10794, lr=9.99216e-05, gnorm=2.39, loss_scale=4, train_wall=10, gb_free=2.8, wall=123858 2021-06-20 05:03:14 | INFO | train_inner | epoch 004: 1854 / 3002 loss=2.675, ppl=6.39, wps=5969, ups=0.09, wpb=64767, bsz=128, num_updates=10795, lr=9.99216e-05, gnorm=1.999, loss_scale=4, train_wall=10, gb_free=2.8, wall=123869 2021-06-20 05:03:25 | INFO | train_inner | epoch 004: 1855 / 3002 loss=2.492, ppl=5.62, wps=5878.9, ups=0.09, wpb=64913, bsz=128, num_updates=10796, lr=9.99216e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=123880 2021-06-20 05:03:36 | INFO | train_inner | epoch 004: 1856 / 3002 loss=2.574, ppl=5.95, wps=5962.5, ups=0.09, wpb=64913, bsz=128, num_updates=10797, lr=9.99216e-05, gnorm=2.032, loss_scale=4, train_wall=10, gb_free=2.8, wall=123890 2021-06-20 05:03:47 | INFO | train_inner | epoch 004: 1857 / 3002 loss=2.622, ppl=6.16, wps=5830.1, ups=0.09, wpb=64859, bsz=128, num_updates=10798, lr=9.99216e-05, gnorm=1.977, loss_scale=4, train_wall=11, gb_free=2.8, wall=123902 2021-06-20 05:03:58 | INFO | train_inner | epoch 004: 1858 / 3002 loss=2.431, ppl=5.39, wps=5902.3, ups=0.09, wpb=64884, bsz=128, num_updates=10799, lr=9.99216e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=123913 2021-06-20 05:04:09 | INFO | train_inner | epoch 004: 1859 / 3002 loss=2.486, ppl=5.6, wps=5807.9, ups=0.09, wpb=64907, bsz=128, num_updates=10800, lr=9.99216e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=123924 2021-06-20 05:04:20 | INFO | train_inner | epoch 004: 1860 / 3002 loss=2.7, ppl=6.5, wps=5907.1, ups=0.09, wpb=64882, bsz=128, num_updates=10801, lr=9.99216e-05, gnorm=2.173, loss_scale=4, train_wall=11, gb_free=2.8, wall=123935 2021-06-20 05:04:31 | INFO | train_inner | epoch 004: 1861 / 3002 loss=2.475, ppl=5.56, wps=5901.3, ups=0.09, wpb=64837, bsz=128, num_updates=10802, lr=9.99216e-05, gnorm=2.119, loss_scale=4, train_wall=11, gb_free=2.8, wall=123946 2021-06-20 05:04:42 | INFO | train_inner | epoch 004: 1862 / 3002 loss=2.563, ppl=5.91, wps=5985.5, ups=0.09, wpb=64888, bsz=128, num_updates=10803, lr=9.99216e-05, gnorm=2.133, loss_scale=4, train_wall=10, gb_free=2.8, wall=123957 2021-06-20 05:04:53 | INFO | train_inner | epoch 004: 1863 / 3002 loss=2.488, ppl=5.61, wps=5801.7, ups=0.09, wpb=64749, bsz=128, num_updates=10804, lr=9.99216e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=123968 2021-06-20 05:05:04 | INFO | train_inner | epoch 004: 1864 / 3002 loss=2.603, ppl=6.07, wps=5922.3, ups=0.09, wpb=64810, bsz=128, num_updates=10805, lr=9.99216e-05, gnorm=2.044, loss_scale=4, train_wall=10, gb_free=2.8, wall=123979 2021-06-20 05:05:15 | INFO | train_inner | epoch 004: 1865 / 3002 loss=2.681, ppl=6.41, wps=5902.7, ups=0.09, wpb=64834, bsz=128, num_updates=10806, lr=9.99215e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=123990 2021-06-20 05:05:26 | INFO | train_inner | epoch 004: 1866 / 3002 loss=2.602, ppl=6.07, wps=5916.6, ups=0.09, wpb=64816, bsz=128, num_updates=10807, lr=9.99215e-05, gnorm=2.072, loss_scale=4, train_wall=11, gb_free=2.8, wall=124001 2021-06-20 05:05:37 | INFO | train_inner | epoch 004: 1867 / 3002 loss=2.562, ppl=5.91, wps=5852.4, ups=0.09, wpb=64852, bsz=128, num_updates=10808, lr=9.99215e-05, gnorm=2.031, loss_scale=4, train_wall=11, gb_free=2.8, wall=124012 2021-06-20 05:05:48 | INFO | train_inner | epoch 004: 1868 / 3002 loss=2.538, ppl=5.81, wps=5857.2, ups=0.09, wpb=64805, bsz=128, num_updates=10809, lr=9.99215e-05, gnorm=7.37, loss_scale=4, train_wall=11, gb_free=2.8, wall=124023 2021-06-20 05:06:00 | INFO | train_inner | epoch 004: 1869 / 3002 loss=2.597, ppl=6.05, wps=5778.7, ups=0.09, wpb=64760, bsz=128, num_updates=10810, lr=9.99215e-05, gnorm=2.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=124034 2021-06-20 05:06:11 | INFO | train_inner | epoch 004: 1870 / 3002 loss=2.692, ppl=6.46, wps=5879, ups=0.09, wpb=64820, bsz=128, num_updates=10811, lr=9.99215e-05, gnorm=1.962, loss_scale=4, train_wall=11, gb_free=2.8, wall=124045 2021-06-20 05:06:22 | INFO | train_inner | epoch 004: 1871 / 3002 loss=2.514, ppl=5.71, wps=5903.3, ups=0.09, wpb=64752, bsz=128, num_updates=10812, lr=9.99215e-05, gnorm=2.166, loss_scale=4, train_wall=11, gb_free=2.8, wall=124056 2021-06-20 05:06:33 | INFO | train_inner | epoch 004: 1872 / 3002 loss=2.623, ppl=6.16, wps=5943.4, ups=0.09, wpb=64901, bsz=128, num_updates=10813, lr=9.99215e-05, gnorm=2.139, loss_scale=4, train_wall=10, gb_free=2.8, wall=124067 2021-06-20 05:06:44 | INFO | train_inner | epoch 004: 1873 / 3002 loss=2.497, ppl=5.64, wps=5800.9, ups=0.09, wpb=64750, bsz=128, num_updates=10814, lr=9.99215e-05, gnorm=1.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=124078 2021-06-20 05:06:55 | INFO | train_inner | epoch 004: 1874 / 3002 loss=2.515, ppl=5.72, wps=5843.6, ups=0.09, wpb=64858, bsz=128, num_updates=10815, lr=9.99215e-05, gnorm=1.959, loss_scale=4, train_wall=11, gb_free=2.8, wall=124089 2021-06-20 05:07:06 | INFO | train_inner | epoch 004: 1875 / 3002 loss=2.488, ppl=5.61, wps=5821.9, ups=0.09, wpb=64767, bsz=128, num_updates=10816, lr=9.99215e-05, gnorm=2.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=124100 2021-06-20 05:07:17 | INFO | train_inner | epoch 004: 1876 / 3002 loss=2.624, ppl=6.16, wps=5867.9, ups=0.09, wpb=64807, bsz=128, num_updates=10817, lr=9.99215e-05, gnorm=2.212, loss_scale=4, train_wall=11, gb_free=2.8, wall=124111 2021-06-20 05:07:28 | INFO | train_inner | epoch 004: 1877 / 3002 loss=2.435, ppl=5.41, wps=5975.9, ups=0.09, wpb=64872, bsz=128, num_updates=10818, lr=9.99214e-05, gnorm=2.053, loss_scale=4, train_wall=10, gb_free=2.8, wall=124122 2021-06-20 05:07:39 | INFO | train_inner | epoch 004: 1878 / 3002 loss=2.709, ppl=6.54, wps=5994.3, ups=0.09, wpb=64855, bsz=128, num_updates=10819, lr=9.99214e-05, gnorm=2.109, loss_scale=4, train_wall=10, gb_free=2.8, wall=124133 2021-06-20 05:07:50 | INFO | train_inner | epoch 004: 1879 / 3002 loss=2.545, ppl=5.84, wps=5856.1, ups=0.09, wpb=64842, bsz=128, num_updates=10820, lr=9.99214e-05, gnorm=2.06, loss_scale=4, train_wall=11, gb_free=2.8, wall=124144 2021-06-20 05:08:01 | INFO | train_inner | epoch 004: 1880 / 3002 loss=2.541, ppl=5.82, wps=5882.5, ups=0.09, wpb=64740, bsz=128, num_updates=10821, lr=9.99214e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=124155 2021-06-20 05:08:12 | INFO | train_inner | epoch 004: 1881 / 3002 loss=2.603, ppl=6.08, wps=5706.3, ups=0.09, wpb=64840, bsz=128, num_updates=10822, lr=9.99214e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=124166 2021-06-20 05:08:23 | INFO | train_inner | epoch 004: 1882 / 3002 loss=2.689, ppl=6.45, wps=5817.1, ups=0.09, wpb=64865, bsz=128, num_updates=10823, lr=9.99214e-05, gnorm=2.127, loss_scale=4, train_wall=11, gb_free=2.8, wall=124178 2021-06-20 05:08:34 | INFO | train_inner | epoch 004: 1883 / 3002 loss=2.558, ppl=5.89, wps=5946, ups=0.09, wpb=64932, bsz=128, num_updates=10824, lr=9.99214e-05, gnorm=2.054, loss_scale=4, train_wall=10, gb_free=2.8, wall=124189 2021-06-20 05:08:45 | INFO | train_inner | epoch 004: 1884 / 3002 loss=2.562, ppl=5.9, wps=5869.9, ups=0.09, wpb=64855, bsz=128, num_updates=10825, lr=9.99214e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=124200 2021-06-20 05:08:56 | INFO | train_inner | epoch 004: 1885 / 3002 loss=2.574, ppl=5.95, wps=5931.1, ups=0.09, wpb=64860, bsz=128, num_updates=10826, lr=9.99214e-05, gnorm=2.147, loss_scale=4, train_wall=10, gb_free=2.8, wall=124211 2021-06-20 05:09:07 | INFO | train_inner | epoch 004: 1886 / 3002 loss=2.56, ppl=5.9, wps=5886.4, ups=0.09, wpb=64871, bsz=128, num_updates=10827, lr=9.99214e-05, gnorm=2.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=124222 2021-06-20 05:09:18 | INFO | train_inner | epoch 004: 1887 / 3002 loss=2.671, ppl=6.37, wps=5903.3, ups=0.09, wpb=64892, bsz=128, num_updates=10828, lr=9.99214e-05, gnorm=2.143, loss_scale=4, train_wall=11, gb_free=2.8, wall=124233 2021-06-20 05:09:29 | INFO | train_inner | epoch 004: 1888 / 3002 loss=2.652, ppl=6.29, wps=5926.8, ups=0.09, wpb=64917, bsz=128, num_updates=10829, lr=9.99214e-05, gnorm=2.21, loss_scale=4, train_wall=10, gb_free=2.8, wall=124243 2021-06-20 05:09:40 | INFO | train_inner | epoch 004: 1889 / 3002 loss=2.527, ppl=5.76, wps=5872.2, ups=0.09, wpb=64815, bsz=128, num_updates=10830, lr=9.99214e-05, gnorm=2.484, loss_scale=4, train_wall=11, gb_free=2.8, wall=124255 2021-06-20 05:09:51 | INFO | train_inner | epoch 004: 1890 / 3002 loss=2.537, ppl=5.81, wps=5841.3, ups=0.09, wpb=64757, bsz=128, num_updates=10831, lr=9.99213e-05, gnorm=2.135, loss_scale=4, train_wall=11, gb_free=2.8, wall=124266 2021-06-20 05:10:02 | INFO | train_inner | epoch 004: 1891 / 3002 loss=2.658, ppl=6.31, wps=5912.2, ups=0.09, wpb=64889, bsz=128, num_updates=10832, lr=9.99213e-05, gnorm=2.053, loss_scale=4, train_wall=11, gb_free=2.8, wall=124277 2021-06-20 05:10:13 | INFO | train_inner | epoch 004: 1892 / 3002 loss=2.653, ppl=6.29, wps=5785.1, ups=0.09, wpb=64848, bsz=128, num_updates=10833, lr=9.99213e-05, gnorm=2.815, loss_scale=4, train_wall=11, gb_free=2.8, wall=124288 2021-06-20 05:10:24 | INFO | train_inner | epoch 004: 1893 / 3002 loss=2.727, ppl=6.62, wps=5941.4, ups=0.09, wpb=64865, bsz=128, num_updates=10834, lr=9.99213e-05, gnorm=3.259, loss_scale=4, train_wall=10, gb_free=2.8, wall=124299 2021-06-20 05:10:35 | INFO | train_inner | epoch 004: 1894 / 3002 loss=2.563, ppl=5.91, wps=5841.6, ups=0.09, wpb=64763, bsz=128, num_updates=10835, lr=9.99213e-05, gnorm=2.869, loss_scale=4, train_wall=11, gb_free=2.8, wall=124310 2021-06-20 05:10:46 | INFO | train_inner | epoch 004: 1895 / 3002 loss=2.336, ppl=5.05, wps=5896.6, ups=0.09, wpb=64859, bsz=128, num_updates=10836, lr=9.99213e-05, gnorm=2.029, loss_scale=4, train_wall=11, gb_free=2.8, wall=124321 2021-06-20 05:10:57 | INFO | train_inner | epoch 004: 1896 / 3002 loss=2.474, ppl=5.56, wps=5977.9, ups=0.09, wpb=64854, bsz=128, num_updates=10837, lr=9.99213e-05, gnorm=5.839, loss_scale=4, train_wall=10, gb_free=2.8, wall=124332 2021-06-20 05:11:08 | INFO | train_inner | epoch 004: 1897 / 3002 loss=2.358, ppl=5.13, wps=5855.1, ups=0.09, wpb=64846, bsz=128, num_updates=10838, lr=9.99213e-05, gnorm=2.044, loss_scale=4, train_wall=11, gb_free=2.8, wall=124343 2021-06-20 05:11:19 | INFO | train_inner | epoch 004: 1898 / 3002 loss=2.427, ppl=5.38, wps=5850.1, ups=0.09, wpb=64873, bsz=128, num_updates=10839, lr=9.99213e-05, gnorm=5.15, loss_scale=4, train_wall=11, gb_free=2.8, wall=124354 2021-06-20 05:11:31 | INFO | train_inner | epoch 004: 1899 / 3002 loss=2.525, ppl=5.76, wps=5844.8, ups=0.09, wpb=64888, bsz=128, num_updates=10840, lr=9.99213e-05, gnorm=2.088, loss_scale=4, train_wall=11, gb_free=2.8, wall=124365 2021-06-20 05:11:41 | INFO | train_inner | epoch 004: 1900 / 3002 loss=2.688, ppl=6.44, wps=5992, ups=0.09, wpb=64769, bsz=128, num_updates=10841, lr=9.99213e-05, gnorm=1.99, loss_scale=4, train_wall=10, gb_free=2.8, wall=124376 2021-06-20 05:11:52 | INFO | train_inner | epoch 004: 1901 / 3002 loss=2.515, ppl=5.72, wps=5856.8, ups=0.09, wpb=64842, bsz=128, num_updates=10842, lr=9.99213e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=124387 2021-06-20 05:12:04 | INFO | train_inner | epoch 004: 1902 / 3002 loss=2.474, ppl=5.55, wps=5797, ups=0.09, wpb=64821, bsz=128, num_updates=10843, lr=9.99212e-05, gnorm=2.416, loss_scale=4, train_wall=11, gb_free=2.8, wall=124398 2021-06-20 05:12:15 | INFO | train_inner | epoch 004: 1903 / 3002 loss=2.397, ppl=5.27, wps=5839.5, ups=0.09, wpb=64853, bsz=128, num_updates=10844, lr=9.99212e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=124409 2021-06-20 05:12:26 | INFO | train_inner | epoch 004: 1904 / 3002 loss=2.548, ppl=5.85, wps=5732.9, ups=0.09, wpb=64814, bsz=128, num_updates=10845, lr=9.99212e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=124420 2021-06-20 05:12:37 | INFO | train_inner | epoch 004: 1905 / 3002 loss=2.442, ppl=5.43, wps=5835.8, ups=0.09, wpb=64855, bsz=128, num_updates=10846, lr=9.99212e-05, gnorm=1.975, loss_scale=4, train_wall=11, gb_free=2.8, wall=124431 2021-06-20 05:12:48 | INFO | train_inner | epoch 004: 1906 / 3002 loss=2.66, ppl=6.32, wps=5811.8, ups=0.09, wpb=64870, bsz=128, num_updates=10847, lr=9.99212e-05, gnorm=2.083, loss_scale=4, train_wall=11, gb_free=2.8, wall=124443 2021-06-20 05:12:59 | INFO | train_inner | epoch 004: 1907 / 3002 loss=2.499, ppl=5.65, wps=5897.9, ups=0.09, wpb=64872, bsz=128, num_updates=10848, lr=9.99212e-05, gnorm=2.02, loss_scale=4, train_wall=11, gb_free=2.8, wall=124454 2021-06-20 05:13:10 | INFO | train_inner | epoch 004: 1908 / 3002 loss=2.596, ppl=6.04, wps=5959.5, ups=0.09, wpb=64901, bsz=128, num_updates=10849, lr=9.99212e-05, gnorm=2.173, loss_scale=4, train_wall=10, gb_free=2.8, wall=124465 2021-06-20 05:13:21 | INFO | train_inner | epoch 004: 1909 / 3002 loss=2.688, ppl=6.44, wps=5917.6, ups=0.09, wpb=64853, bsz=128, num_updates=10850, lr=9.99212e-05, gnorm=2.396, loss_scale=4, train_wall=10, gb_free=2.8, wall=124476 2021-06-20 05:13:32 | INFO | train_inner | epoch 004: 1910 / 3002 loss=2.619, ppl=6.14, wps=5811.2, ups=0.09, wpb=64854, bsz=128, num_updates=10851, lr=9.99212e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=124487 2021-06-20 05:13:43 | INFO | train_inner | epoch 004: 1911 / 3002 loss=2.461, ppl=5.51, wps=5853, ups=0.09, wpb=64775, bsz=128, num_updates=10852, lr=9.99212e-05, gnorm=2.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=124498 2021-06-20 05:13:54 | INFO | train_inner | epoch 004: 1912 / 3002 loss=2.644, ppl=6.25, wps=5872.1, ups=0.09, wpb=64781, bsz=128, num_updates=10853, lr=9.99212e-05, gnorm=2.071, loss_scale=4, train_wall=11, gb_free=2.8, wall=124509 2021-06-20 05:14:06 | INFO | train_inner | epoch 004: 1913 / 3002 loss=2.539, ppl=5.81, wps=5789.8, ups=0.09, wpb=64836, bsz=128, num_updates=10854, lr=9.99212e-05, gnorm=2.167, loss_scale=4, train_wall=11, gb_free=2.8, wall=124520 2021-06-20 05:14:17 | INFO | train_inner | epoch 004: 1914 / 3002 loss=2.742, ppl=6.69, wps=5797.9, ups=0.09, wpb=64829, bsz=128, num_updates=10855, lr=9.99212e-05, gnorm=2.109, loss_scale=4, train_wall=11, gb_free=2.8, wall=124531 2021-06-20 05:14:28 | INFO | train_inner | epoch 004: 1915 / 3002 loss=2.576, ppl=5.96, wps=5867.1, ups=0.09, wpb=64790, bsz=128, num_updates=10856, lr=9.99211e-05, gnorm=2.083, loss_scale=4, train_wall=11, gb_free=2.8, wall=124542 2021-06-20 05:14:39 | INFO | train_inner | epoch 004: 1916 / 3002 loss=2.507, ppl=5.68, wps=5925.6, ups=0.09, wpb=64863, bsz=128, num_updates=10857, lr=9.99211e-05, gnorm=1.995, loss_scale=4, train_wall=10, gb_free=2.8, wall=124553 2021-06-20 05:14:50 | INFO | train_inner | epoch 004: 1917 / 3002 loss=2.593, ppl=6.03, wps=5874.3, ups=0.09, wpb=64717, bsz=128, num_updates=10858, lr=9.99211e-05, gnorm=2.06, loss_scale=4, train_wall=11, gb_free=2.8, wall=124564 2021-06-20 05:15:01 | INFO | train_inner | epoch 004: 1918 / 3002 loss=2.551, ppl=5.86, wps=5814.4, ups=0.09, wpb=64805, bsz=128, num_updates=10859, lr=9.99211e-05, gnorm=1.986, loss_scale=4, train_wall=11, gb_free=2.8, wall=124575 2021-06-20 05:15:12 | INFO | train_inner | epoch 004: 1919 / 3002 loss=2.648, ppl=6.27, wps=5897.4, ups=0.09, wpb=64833, bsz=128, num_updates=10860, lr=9.99211e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=124586 2021-06-20 05:15:23 | INFO | train_inner | epoch 004: 1920 / 3002 loss=2.504, ppl=5.67, wps=5907.5, ups=0.09, wpb=64804, bsz=128, num_updates=10861, lr=9.99211e-05, gnorm=2.763, loss_scale=8, train_wall=11, gb_free=2.8, wall=124597 2021-06-20 05:15:34 | INFO | train_inner | epoch 004: 1921 / 3002 loss=2.599, ppl=6.06, wps=5871, ups=0.09, wpb=64848, bsz=128, num_updates=10862, lr=9.99211e-05, gnorm=2.063, loss_scale=8, train_wall=11, gb_free=2.8, wall=124608 2021-06-20 05:15:45 | INFO | train_inner | epoch 004: 1922 / 3002 loss=2.519, ppl=5.73, wps=5874.6, ups=0.09, wpb=64835, bsz=128, num_updates=10863, lr=9.99211e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=124619 2021-06-20 05:15:56 | INFO | train_inner | epoch 004: 1923 / 3002 loss=2.566, ppl=5.92, wps=5894.3, ups=0.09, wpb=64934, bsz=128, num_updates=10864, lr=9.99211e-05, gnorm=2.046, loss_scale=8, train_wall=11, gb_free=2.8, wall=124630 2021-06-20 05:16:07 | INFO | train_inner | epoch 004: 1924 / 3002 loss=2.546, ppl=5.84, wps=5836.2, ups=0.09, wpb=64800, bsz=128, num_updates=10865, lr=9.99211e-05, gnorm=2.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=124641 2021-06-20 05:16:18 | INFO | train_inner | epoch 004: 1925 / 3002 loss=2.597, ppl=6.05, wps=5768.6, ups=0.09, wpb=64844, bsz=128, num_updates=10866, lr=9.99211e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=124653 2021-06-20 05:16:29 | INFO | train_inner | epoch 004: 1926 / 3002 loss=2.762, ppl=6.79, wps=5854.9, ups=0.09, wpb=64801, bsz=128, num_updates=10867, lr=9.99211e-05, gnorm=2.039, loss_scale=8, train_wall=11, gb_free=2.8, wall=124664 2021-06-20 05:16:41 | INFO | train_inner | epoch 004: 1927 / 3002 loss=2.544, ppl=5.83, wps=5773.4, ups=0.09, wpb=64806, bsz=128, num_updates=10868, lr=9.9921e-05, gnorm=1.922, loss_scale=8, train_wall=11, gb_free=2.8, wall=124675 2021-06-20 05:16:52 | INFO | train_inner | epoch 004: 1928 / 3002 loss=2.525, ppl=5.75, wps=5879.9, ups=0.09, wpb=64788, bsz=128, num_updates=10869, lr=9.9921e-05, gnorm=2.099, loss_scale=8, train_wall=11, gb_free=2.8, wall=124686 2021-06-20 05:17:03 | INFO | train_inner | epoch 004: 1929 / 3002 loss=2.706, ppl=6.52, wps=5855.4, ups=0.09, wpb=64839, bsz=128, num_updates=10870, lr=9.9921e-05, gnorm=2.051, loss_scale=8, train_wall=11, gb_free=2.8, wall=124697 2021-06-20 05:17:14 | INFO | train_inner | epoch 004: 1930 / 3002 loss=2.599, ppl=6.06, wps=5885.8, ups=0.09, wpb=64848, bsz=128, num_updates=10871, lr=9.9921e-05, gnorm=2.106, loss_scale=8, train_wall=11, gb_free=2.8, wall=124708 2021-06-20 05:17:25 | INFO | train_inner | epoch 004: 1931 / 3002 loss=2.522, ppl=5.74, wps=5873, ups=0.09, wpb=64833, bsz=128, num_updates=10872, lr=9.9921e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=124719 2021-06-20 05:17:36 | INFO | train_inner | epoch 004: 1932 / 3002 loss=2.588, ppl=6.01, wps=5772.5, ups=0.09, wpb=64741, bsz=128, num_updates=10873, lr=9.9921e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=124730 2021-06-20 05:17:47 | INFO | train_inner | epoch 004: 1933 / 3002 loss=2.58, ppl=5.98, wps=5907.7, ups=0.09, wpb=64818, bsz=128, num_updates=10874, lr=9.9921e-05, gnorm=2.083, loss_scale=8, train_wall=11, gb_free=2.8, wall=124741 2021-06-20 05:17:58 | INFO | train_inner | epoch 004: 1934 / 3002 loss=2.542, ppl=5.82, wps=5782.7, ups=0.09, wpb=64805, bsz=128, num_updates=10875, lr=9.9921e-05, gnorm=2.078, loss_scale=8, train_wall=11, gb_free=2.8, wall=124753 2021-06-20 05:18:09 | INFO | train_inner | epoch 004: 1935 / 3002 loss=2.569, ppl=5.93, wps=5794.8, ups=0.09, wpb=64860, bsz=128, num_updates=10876, lr=9.9921e-05, gnorm=2.076, loss_scale=8, train_wall=11, gb_free=2.8, wall=124764 2021-06-20 05:18:21 | INFO | train_inner | epoch 004: 1936 / 3002 loss=2.432, ppl=5.4, wps=5711.8, ups=0.09, wpb=64908, bsz=128, num_updates=10877, lr=9.9921e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=124775 2021-06-20 05:18:32 | INFO | train_inner | epoch 004: 1937 / 3002 loss=2.761, ppl=6.78, wps=5807.7, ups=0.09, wpb=64876, bsz=128, num_updates=10878, lr=9.9921e-05, gnorm=2.308, loss_scale=8, train_wall=11, gb_free=2.8, wall=124786 2021-06-20 05:18:43 | INFO | train_inner | epoch 004: 1938 / 3002 loss=2.577, ppl=5.97, wps=5926.3, ups=0.09, wpb=64858, bsz=128, num_updates=10879, lr=9.9921e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=124797 2021-06-20 05:18:54 | INFO | train_inner | epoch 004: 1939 / 3002 loss=2.564, ppl=5.91, wps=5871.3, ups=0.09, wpb=64718, bsz=128, num_updates=10880, lr=9.9921e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=124808 2021-06-20 05:19:05 | INFO | train_inner | epoch 004: 1940 / 3002 loss=2.778, ppl=6.86, wps=5799.5, ups=0.09, wpb=64791, bsz=128, num_updates=10881, lr=9.99209e-05, gnorm=2.076, loss_scale=8, train_wall=11, gb_free=2.8, wall=124819 2021-06-20 05:19:16 | INFO | train_inner | epoch 004: 1941 / 3002 loss=2.497, ppl=5.64, wps=5767.3, ups=0.09, wpb=64834, bsz=128, num_updates=10882, lr=9.99209e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=124831 2021-06-20 05:19:28 | INFO | train_inner | epoch 004: 1942 / 3002 loss=2.48, ppl=5.58, wps=5746.9, ups=0.09, wpb=64928, bsz=128, num_updates=10883, lr=9.99209e-05, gnorm=2.101, loss_scale=8, train_wall=11, gb_free=2.8, wall=124842 2021-06-20 05:19:39 | INFO | train_inner | epoch 004: 1943 / 3002 loss=2.432, ppl=5.4, wps=5885, ups=0.09, wpb=64801, bsz=128, num_updates=10884, lr=9.99209e-05, gnorm=2.058, loss_scale=8, train_wall=11, gb_free=2.8, wall=124853 2021-06-20 05:19:50 | INFO | train_inner | epoch 004: 1944 / 3002 loss=2.568, ppl=5.93, wps=5940.8, ups=0.09, wpb=64799, bsz=128, num_updates=10885, lr=9.99209e-05, gnorm=2.894, loss_scale=8, train_wall=10, gb_free=2.8, wall=124864 2021-06-20 05:20:00 | INFO | train_inner | epoch 004: 1945 / 3002 loss=2.683, ppl=6.42, wps=5965, ups=0.09, wpb=64813, bsz=128, num_updates=10886, lr=9.99209e-05, gnorm=2.145, loss_scale=8, train_wall=10, gb_free=2.8, wall=124875 2021-06-20 05:20:11 | INFO | train_inner | epoch 004: 1946 / 3002 loss=2.859, ppl=7.26, wps=5853.8, ups=0.09, wpb=64886, bsz=128, num_updates=10887, lr=9.99209e-05, gnorm=2.131, loss_scale=8, train_wall=11, gb_free=2.8, wall=124886 2021-06-20 05:20:23 | INFO | train_inner | epoch 004: 1947 / 3002 loss=2.582, ppl=5.99, wps=5815.2, ups=0.09, wpb=64771, bsz=128, num_updates=10888, lr=9.99209e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=124897 2021-06-20 05:20:34 | INFO | train_inner | epoch 004: 1948 / 3002 loss=2.562, ppl=5.9, wps=5796.5, ups=0.09, wpb=64848, bsz=128, num_updates=10889, lr=9.99209e-05, gnorm=2.025, loss_scale=8, train_wall=11, gb_free=2.8, wall=124908 2021-06-20 05:20:45 | INFO | train_inner | epoch 004: 1949 / 3002 loss=2.519, ppl=5.73, wps=5791.1, ups=0.09, wpb=64809, bsz=128, num_updates=10890, lr=9.99209e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=124919 2021-06-20 05:20:56 | INFO | train_inner | epoch 004: 1950 / 3002 loss=2.655, ppl=6.3, wps=5863.9, ups=0.09, wpb=64832, bsz=128, num_updates=10891, lr=9.99209e-05, gnorm=2.089, loss_scale=8, train_wall=11, gb_free=2.8, wall=124930 2021-06-20 05:21:07 | INFO | train_inner | epoch 004: 1951 / 3002 loss=2.462, ppl=5.51, wps=5740.9, ups=0.09, wpb=64858, bsz=128, num_updates=10892, lr=9.99209e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=124942 2021-06-20 05:21:18 | INFO | train_inner | epoch 004: 1952 / 3002 loss=2.652, ppl=6.29, wps=5839.2, ups=0.09, wpb=64833, bsz=128, num_updates=10893, lr=9.99208e-05, gnorm=2.051, loss_scale=8, train_wall=11, gb_free=2.8, wall=124953 2021-06-20 05:21:30 | INFO | train_inner | epoch 004: 1953 / 3002 loss=2.541, ppl=5.82, wps=5828, ups=0.09, wpb=64817, bsz=128, num_updates=10894, lr=9.99208e-05, gnorm=2.111, loss_scale=8, train_wall=11, gb_free=2.8, wall=124964 2021-06-20 05:21:41 | INFO | train_inner | epoch 004: 1954 / 3002 loss=2.405, ppl=5.3, wps=5797.8, ups=0.09, wpb=64819, bsz=128, num_updates=10895, lr=9.99208e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=124975 2021-06-20 05:21:52 | INFO | train_inner | epoch 004: 1955 / 3002 loss=2.562, ppl=5.91, wps=5838.4, ups=0.09, wpb=64773, bsz=128, num_updates=10896, lr=9.99208e-05, gnorm=2.169, loss_scale=8, train_wall=11, gb_free=2.8, wall=124986 2021-06-20 05:22:03 | INFO | train_inner | epoch 004: 1956 / 3002 loss=2.464, ppl=5.52, wps=5945.8, ups=0.09, wpb=64772, bsz=128, num_updates=10897, lr=9.99208e-05, gnorm=3.661, loss_scale=8, train_wall=10, gb_free=2.8, wall=124997 2021-06-20 05:22:14 | INFO | train_inner | epoch 004: 1957 / 3002 loss=2.683, ppl=6.42, wps=5757.8, ups=0.09, wpb=64830, bsz=128, num_updates=10898, lr=9.99208e-05, gnorm=2.056, loss_scale=8, train_wall=11, gb_free=2.8, wall=125008 2021-06-20 05:22:25 | INFO | train_inner | epoch 004: 1958 / 3002 loss=2.682, ppl=6.42, wps=5864.4, ups=0.09, wpb=64813, bsz=128, num_updates=10899, lr=9.99208e-05, gnorm=2.075, loss_scale=8, train_wall=11, gb_free=2.8, wall=125019 2021-06-20 05:22:36 | INFO | train_inner | epoch 004: 1959 / 3002 loss=2.495, ppl=5.64, wps=5821.9, ups=0.09, wpb=64835, bsz=128, num_updates=10900, lr=9.99208e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=125031 2021-06-20 05:22:47 | INFO | train_inner | epoch 004: 1960 / 3002 loss=2.522, ppl=5.75, wps=5901.1, ups=0.09, wpb=64891, bsz=128, num_updates=10901, lr=9.99208e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=125042 2021-06-20 05:22:58 | INFO | train_inner | epoch 004: 1961 / 3002 loss=2.519, ppl=5.73, wps=5830.6, ups=0.09, wpb=64853, bsz=128, num_updates=10902, lr=9.99208e-05, gnorm=2.266, loss_scale=8, train_wall=11, gb_free=2.8, wall=125053 2021-06-20 05:23:09 | INFO | train_inner | epoch 004: 1962 / 3002 loss=2.68, ppl=6.41, wps=5937.9, ups=0.09, wpb=64835, bsz=128, num_updates=10903, lr=9.99208e-05, gnorm=2.038, loss_scale=8, train_wall=10, gb_free=2.8, wall=125064 2021-06-20 05:23:20 | INFO | train_inner | epoch 004: 1963 / 3002 loss=2.81, ppl=7.01, wps=5886, ups=0.09, wpb=64868, bsz=128, num_updates=10904, lr=9.99208e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=125075 2021-06-20 05:23:32 | INFO | train_inner | epoch 004: 1964 / 3002 loss=2.419, ppl=5.35, wps=5766, ups=0.09, wpb=64865, bsz=128, num_updates=10905, lr=9.99208e-05, gnorm=3.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=125086 2021-06-20 05:23:43 | INFO | train_inner | epoch 004: 1965 / 3002 loss=2.534, ppl=5.79, wps=5788.1, ups=0.09, wpb=64813, bsz=128, num_updates=10906, lr=9.99207e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=125097 2021-06-20 05:23:54 | INFO | train_inner | epoch 004: 1966 / 3002 loss=2.515, ppl=5.72, wps=5856.2, ups=0.09, wpb=64817, bsz=128, num_updates=10907, lr=9.99207e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=125108 2021-06-20 05:24:05 | INFO | train_inner | epoch 004: 1967 / 3002 loss=2.563, ppl=5.91, wps=5875.4, ups=0.09, wpb=64788, bsz=128, num_updates=10908, lr=9.99207e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=125119 2021-06-20 05:24:16 | INFO | train_inner | epoch 004: 1968 / 3002 loss=2.481, ppl=5.58, wps=5821, ups=0.09, wpb=64941, bsz=128, num_updates=10909, lr=9.99207e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=125130 2021-06-20 05:24:27 | INFO | train_inner | epoch 004: 1969 / 3002 loss=2.602, ppl=6.07, wps=5893.7, ups=0.09, wpb=64771, bsz=128, num_updates=10910, lr=9.99207e-05, gnorm=2.89, loss_scale=8, train_wall=11, gb_free=2.8, wall=125141 2021-06-20 05:24:38 | INFO | train_inner | epoch 004: 1970 / 3002 loss=2.452, ppl=5.47, wps=5745.7, ups=0.09, wpb=64877, bsz=128, num_updates=10911, lr=9.99207e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=125153 2021-06-20 05:24:49 | INFO | train_inner | epoch 004: 1971 / 3002 loss=2.614, ppl=6.12, wps=5886.9, ups=0.09, wpb=64870, bsz=128, num_updates=10912, lr=9.99207e-05, gnorm=2.053, loss_scale=8, train_wall=11, gb_free=2.8, wall=125164 2021-06-20 05:25:00 | INFO | train_inner | epoch 004: 1972 / 3002 loss=2.599, ppl=6.06, wps=5785.2, ups=0.09, wpb=64814, bsz=128, num_updates=10913, lr=9.99207e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=125175 2021-06-20 05:25:11 | INFO | train_inner | epoch 004: 1973 / 3002 loss=2.55, ppl=5.86, wps=5881.7, ups=0.09, wpb=64857, bsz=128, num_updates=10914, lr=9.99207e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=125186 2021-06-20 05:25:23 | INFO | train_inner | epoch 004: 1974 / 3002 loss=2.522, ppl=5.74, wps=5817.1, ups=0.09, wpb=64727, bsz=128, num_updates=10915, lr=9.99207e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=125197 2021-06-20 05:25:34 | INFO | train_inner | epoch 004: 1975 / 3002 loss=2.56, ppl=5.9, wps=5797.1, ups=0.09, wpb=64768, bsz=128, num_updates=10916, lr=9.99207e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=125208 2021-06-20 05:25:45 | INFO | train_inner | epoch 004: 1976 / 3002 loss=2.628, ppl=6.18, wps=5886.3, ups=0.09, wpb=64816, bsz=128, num_updates=10917, lr=9.99207e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=125219 2021-06-20 05:25:56 | INFO | train_inner | epoch 004: 1977 / 3002 loss=2.551, ppl=5.86, wps=5926.6, ups=0.09, wpb=64863, bsz=128, num_updates=10918, lr=9.99206e-05, gnorm=2.125, loss_scale=8, train_wall=10, gb_free=2.8, wall=125230 2021-06-20 05:26:07 | INFO | train_inner | epoch 004: 1978 / 3002 loss=2.587, ppl=6.01, wps=5786.5, ups=0.09, wpb=64826, bsz=128, num_updates=10919, lr=9.99206e-05, gnorm=2.2, loss_scale=8, train_wall=11, gb_free=2.8, wall=125241 2021-06-20 05:26:18 | INFO | train_inner | epoch 004: 1979 / 3002 loss=2.662, ppl=6.33, wps=5895.2, ups=0.09, wpb=64830, bsz=128, num_updates=10920, lr=9.99206e-05, gnorm=2.011, loss_scale=8, train_wall=11, gb_free=2.8, wall=125252 2021-06-20 05:26:29 | INFO | train_inner | epoch 004: 1980 / 3002 loss=2.614, ppl=6.12, wps=5764.9, ups=0.09, wpb=64864, bsz=128, num_updates=10921, lr=9.99206e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=125264 2021-06-20 05:26:40 | INFO | train_inner | epoch 004: 1981 / 3002 loss=2.691, ppl=6.46, wps=5971.5, ups=0.09, wpb=64932, bsz=128, num_updates=10922, lr=9.99206e-05, gnorm=2.045, loss_scale=8, train_wall=10, gb_free=2.8, wall=125274 2021-06-20 05:26:51 | INFO | train_inner | epoch 004: 1982 / 3002 loss=2.482, ppl=5.59, wps=5862.5, ups=0.09, wpb=64819, bsz=128, num_updates=10923, lr=9.99206e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=125285 2021-06-20 05:27:02 | INFO | train_inner | epoch 004: 1983 / 3002 loss=2.549, ppl=5.85, wps=5865.1, ups=0.09, wpb=64796, bsz=128, num_updates=10924, lr=9.99206e-05, gnorm=2.085, loss_scale=8, train_wall=11, gb_free=2.8, wall=125297 2021-06-20 05:27:13 | INFO | train_inner | epoch 004: 1984 / 3002 loss=2.654, ppl=6.29, wps=5877, ups=0.09, wpb=64856, bsz=128, num_updates=10925, lr=9.99206e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=125308 2021-06-20 05:27:24 | INFO | train_inner | epoch 004: 1985 / 3002 loss=2.528, ppl=5.77, wps=5876.1, ups=0.09, wpb=64895, bsz=128, num_updates=10926, lr=9.99206e-05, gnorm=2.432, loss_scale=8, train_wall=11, gb_free=2.8, wall=125319 2021-06-20 05:27:35 | INFO | train_inner | epoch 004: 1986 / 3002 loss=2.536, ppl=5.8, wps=5914.4, ups=0.09, wpb=64930, bsz=128, num_updates=10927, lr=9.99206e-05, gnorm=2.138, loss_scale=8, train_wall=10, gb_free=2.8, wall=125330 2021-06-20 05:27:46 | INFO | train_inner | epoch 004: 1987 / 3002 loss=2.658, ppl=6.31, wps=5905.3, ups=0.09, wpb=64796, bsz=128, num_updates=10928, lr=9.99206e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=125341 2021-06-20 05:27:57 | INFO | train_inner | epoch 004: 1988 / 3002 loss=2.713, ppl=6.56, wps=5804.1, ups=0.09, wpb=64841, bsz=128, num_updates=10929, lr=9.99206e-05, gnorm=2.081, loss_scale=8, train_wall=11, gb_free=2.8, wall=125352 2021-06-20 05:28:09 | INFO | train_inner | epoch 004: 1989 / 3002 loss=2.638, ppl=6.23, wps=5809.6, ups=0.09, wpb=64870, bsz=128, num_updates=10930, lr=9.99206e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=125363 2021-06-20 05:28:20 | INFO | train_inner | epoch 004: 1990 / 3002 loss=2.566, ppl=5.92, wps=5718, ups=0.09, wpb=64771, bsz=128, num_updates=10931, lr=9.99205e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=125374 2021-06-20 05:28:31 | INFO | train_inner | epoch 004: 1991 / 3002 loss=2.528, ppl=5.77, wps=5832.7, ups=0.09, wpb=64844, bsz=128, num_updates=10932, lr=9.99205e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=125385 2021-06-20 05:28:42 | INFO | train_inner | epoch 004: 1992 / 3002 loss=2.56, ppl=5.9, wps=5760, ups=0.09, wpb=64808, bsz=128, num_updates=10933, lr=9.99205e-05, gnorm=2.065, loss_scale=8, train_wall=11, gb_free=2.8, wall=125397 2021-06-20 05:28:53 | INFO | train_inner | epoch 004: 1993 / 3002 loss=2.668, ppl=6.36, wps=5923.1, ups=0.09, wpb=64796, bsz=128, num_updates=10934, lr=9.99205e-05, gnorm=2.006, loss_scale=8, train_wall=10, gb_free=2.8, wall=125408 2021-06-20 05:29:04 | INFO | train_inner | epoch 004: 1994 / 3002 loss=2.708, ppl=6.53, wps=5839.7, ups=0.09, wpb=64771, bsz=128, num_updates=10935, lr=9.99205e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=125419 2021-06-20 05:29:16 | INFO | train_inner | epoch 004: 1995 / 3002 loss=2.602, ppl=6.07, wps=5766.8, ups=0.09, wpb=64852, bsz=128, num_updates=10936, lr=9.99205e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=125430 2021-06-20 05:29:27 | INFO | train_inner | epoch 004: 1996 / 3002 loss=2.553, ppl=5.87, wps=5849.2, ups=0.09, wpb=64906, bsz=128, num_updates=10937, lr=9.99205e-05, gnorm=2.044, loss_scale=8, train_wall=11, gb_free=2.8, wall=125441 2021-06-20 05:29:38 | INFO | train_inner | epoch 004: 1997 / 3002 loss=2.566, ppl=5.92, wps=5861.5, ups=0.09, wpb=64784, bsz=128, num_updates=10938, lr=9.99205e-05, gnorm=2.029, loss_scale=8, train_wall=11, gb_free=2.8, wall=125452 2021-06-20 05:29:49 | INFO | train_inner | epoch 004: 1998 / 3002 loss=2.554, ppl=5.87, wps=5873.3, ups=0.09, wpb=64852, bsz=128, num_updates=10939, lr=9.99205e-05, gnorm=2.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=125463 2021-06-20 05:30:00 | INFO | train_inner | epoch 004: 1999 / 3002 loss=2.532, ppl=5.78, wps=5867.3, ups=0.09, wpb=64761, bsz=128, num_updates=10940, lr=9.99205e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=125474 2021-06-20 05:30:11 | INFO | train_inner | epoch 004: 2000 / 3002 loss=2.688, ppl=6.44, wps=5859.6, ups=0.09, wpb=64774, bsz=128, num_updates=10941, lr=9.99205e-05, gnorm=2.061, loss_scale=8, train_wall=11, gb_free=2.8, wall=125485 2021-06-20 05:30:22 | INFO | train_inner | epoch 004: 2001 / 3002 loss=2.517, ppl=5.73, wps=5903.7, ups=0.09, wpb=64787, bsz=128, num_updates=10942, lr=9.99205e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=125496 2021-06-20 05:30:33 | INFO | train_inner | epoch 004: 2002 / 3002 loss=2.599, ppl=6.06, wps=6012.2, ups=0.09, wpb=64761, bsz=128, num_updates=10943, lr=9.99204e-05, gnorm=2.041, loss_scale=8, train_wall=10, gb_free=2.8, wall=125507 2021-06-20 05:30:44 | INFO | train_inner | epoch 004: 2003 / 3002 loss=2.614, ppl=6.12, wps=5867.4, ups=0.09, wpb=64853, bsz=128, num_updates=10944, lr=9.99204e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=125518 2021-06-20 05:30:54 | INFO | train_inner | epoch 004: 2004 / 3002 loss=2.651, ppl=6.28, wps=6009.8, ups=0.09, wpb=64840, bsz=128, num_updates=10945, lr=9.99204e-05, gnorm=2.07, loss_scale=8, train_wall=10, gb_free=2.8, wall=125529 2021-06-20 05:31:06 | INFO | train_inner | epoch 004: 2005 / 3002 loss=2.481, ppl=5.58, wps=5731.7, ups=0.09, wpb=64782, bsz=128, num_updates=10946, lr=9.99204e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=125540 2021-06-20 05:31:17 | INFO | train_inner | epoch 004: 2006 / 3002 loss=2.626, ppl=6.17, wps=5782.6, ups=0.09, wpb=64822, bsz=128, num_updates=10947, lr=9.99204e-05, gnorm=2.078, loss_scale=8, train_wall=11, gb_free=2.8, wall=125551 2021-06-20 05:31:28 | INFO | train_inner | epoch 004: 2007 / 3002 loss=2.544, ppl=5.83, wps=5791.5, ups=0.09, wpb=64830, bsz=128, num_updates=10948, lr=9.99204e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=125562 2021-06-20 05:31:39 | INFO | train_inner | epoch 004: 2008 / 3002 loss=2.523, ppl=5.75, wps=5930.3, ups=0.09, wpb=64843, bsz=128, num_updates=10949, lr=9.99204e-05, gnorm=2.039, loss_scale=8, train_wall=11, gb_free=2.8, wall=125573 2021-06-20 05:31:50 | INFO | train_inner | epoch 004: 2009 / 3002 loss=2.57, ppl=5.94, wps=5895.7, ups=0.09, wpb=64801, bsz=128, num_updates=10950, lr=9.99204e-05, gnorm=2.138, loss_scale=8, train_wall=11, gb_free=2.8, wall=125584 2021-06-20 05:32:01 | INFO | train_inner | epoch 004: 2010 / 3002 loss=2.642, ppl=6.24, wps=5899.2, ups=0.09, wpb=64823, bsz=128, num_updates=10951, lr=9.99204e-05, gnorm=2.097, loss_scale=8, train_wall=11, gb_free=2.8, wall=125595 2021-06-20 05:32:12 | INFO | train_inner | epoch 004: 2011 / 3002 loss=2.517, ppl=5.72, wps=5831.6, ups=0.09, wpb=64756, bsz=128, num_updates=10952, lr=9.99204e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=125606 2021-06-20 05:32:23 | INFO | train_inner | epoch 004: 2012 / 3002 loss=2.493, ppl=5.63, wps=5868.3, ups=0.09, wpb=64816, bsz=128, num_updates=10953, lr=9.99204e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=125618 2021-06-20 05:32:34 | INFO | train_inner | epoch 004: 2013 / 3002 loss=2.641, ppl=6.24, wps=5903.1, ups=0.09, wpb=64840, bsz=128, num_updates=10954, lr=9.99204e-05, gnorm=2.029, loss_scale=8, train_wall=11, gb_free=2.8, wall=125628 2021-06-20 05:32:45 | INFO | train_inner | epoch 004: 2014 / 3002 loss=2.519, ppl=5.73, wps=5927.9, ups=0.09, wpb=64855, bsz=128, num_updates=10955, lr=9.99204e-05, gnorm=2.238, loss_scale=8, train_wall=10, gb_free=2.8, wall=125639 2021-06-20 05:32:56 | INFO | train_inner | epoch 004: 2015 / 3002 loss=2.53, ppl=5.78, wps=5845.4, ups=0.09, wpb=64854, bsz=128, num_updates=10956, lr=9.99203e-05, gnorm=2.051, loss_scale=8, train_wall=11, gb_free=2.8, wall=125651 2021-06-20 05:33:07 | INFO | train_inner | epoch 004: 2016 / 3002 loss=2.599, ppl=6.06, wps=5919.6, ups=0.09, wpb=64926, bsz=128, num_updates=10957, lr=9.99203e-05, gnorm=2.105, loss_scale=8, train_wall=11, gb_free=2.8, wall=125662 2021-06-20 05:33:18 | INFO | train_inner | epoch 004: 2017 / 3002 loss=2.574, ppl=5.95, wps=5797.6, ups=0.09, wpb=64809, bsz=128, num_updates=10958, lr=9.99203e-05, gnorm=2.012, loss_scale=8, train_wall=11, gb_free=2.8, wall=125673 2021-06-20 05:33:29 | INFO | train_inner | epoch 004: 2018 / 3002 loss=2.521, ppl=5.74, wps=5858.3, ups=0.09, wpb=64832, bsz=128, num_updates=10959, lr=9.99203e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=125684 2021-06-20 05:33:41 | INFO | train_inner | epoch 004: 2019 / 3002 loss=2.395, ppl=5.26, wps=5758.6, ups=0.09, wpb=64974, bsz=128, num_updates=10960, lr=9.99203e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=125695 2021-06-20 05:33:52 | INFO | train_inner | epoch 004: 2020 / 3002 loss=2.578, ppl=5.97, wps=5922.6, ups=0.09, wpb=64857, bsz=128, num_updates=10961, lr=9.99203e-05, gnorm=2.104, loss_scale=8, train_wall=10, gb_free=2.8, wall=125706 2021-06-20 05:34:03 | INFO | train_inner | epoch 004: 2021 / 3002 loss=2.534, ppl=5.79, wps=5808.9, ups=0.09, wpb=64872, bsz=128, num_updates=10962, lr=9.99203e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=125717 2021-06-20 05:34:14 | INFO | train_inner | epoch 004: 2022 / 3002 loss=2.608, ppl=6.1, wps=5928, ups=0.09, wpb=64926, bsz=128, num_updates=10963, lr=9.99203e-05, gnorm=2.086, loss_scale=8, train_wall=11, gb_free=2.8, wall=125728 2021-06-20 05:34:25 | INFO | train_inner | epoch 004: 2023 / 3002 loss=2.555, ppl=5.88, wps=5851.4, ups=0.09, wpb=64799, bsz=128, num_updates=10964, lr=9.99203e-05, gnorm=2.032, loss_scale=8, train_wall=11, gb_free=2.8, wall=125739 2021-06-20 05:34:36 | INFO | train_inner | epoch 004: 2024 / 3002 loss=2.544, ppl=5.83, wps=5923.9, ups=0.09, wpb=64909, bsz=128, num_updates=10965, lr=9.99203e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=125750 2021-06-20 05:34:47 | INFO | train_inner | epoch 004: 2025 / 3002 loss=2.451, ppl=5.47, wps=5871.5, ups=0.09, wpb=64859, bsz=128, num_updates=10966, lr=9.99203e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=125761 2021-06-20 05:34:58 | INFO | train_inner | epoch 004: 2026 / 3002 loss=2.801, ppl=6.97, wps=5881.9, ups=0.09, wpb=64869, bsz=128, num_updates=10967, lr=9.99203e-05, gnorm=2.119, loss_scale=8, train_wall=11, gb_free=2.8, wall=125772 2021-06-20 05:35:09 | INFO | train_inner | epoch 004: 2027 / 3002 loss=2.485, ppl=5.6, wps=5876.1, ups=0.09, wpb=64791, bsz=128, num_updates=10968, lr=9.99202e-05, gnorm=2.434, loss_scale=8, train_wall=11, gb_free=2.8, wall=125783 2021-06-20 05:35:20 | INFO | train_inner | epoch 004: 2028 / 3002 loss=2.678, ppl=6.4, wps=5771.8, ups=0.09, wpb=64748, bsz=128, num_updates=10969, lr=9.99202e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=125794 2021-06-20 05:35:31 | INFO | train_inner | epoch 004: 2029 / 3002 loss=2.627, ppl=6.18, wps=5865.4, ups=0.09, wpb=64863, bsz=128, num_updates=10970, lr=9.99202e-05, gnorm=2.01, loss_scale=8, train_wall=11, gb_free=2.8, wall=125806 2021-06-20 05:35:42 | INFO | train_inner | epoch 004: 2030 / 3002 loss=2.448, ppl=5.46, wps=5851.7, ups=0.09, wpb=64801, bsz=128, num_updates=10971, lr=9.99202e-05, gnorm=2.023, loss_scale=8, train_wall=11, gb_free=2.8, wall=125817 2021-06-20 05:35:54 | INFO | train_inner | epoch 004: 2031 / 3002 loss=2.606, ppl=6.09, wps=5758.5, ups=0.09, wpb=64791, bsz=128, num_updates=10972, lr=9.99202e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=125828 2021-06-20 05:36:04 | INFO | train_inner | epoch 004: 2032 / 3002 loss=2.381, ppl=5.21, wps=5921.3, ups=0.09, wpb=64916, bsz=128, num_updates=10973, lr=9.99202e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=125839 2021-06-20 05:36:15 | INFO | train_inner | epoch 004: 2033 / 3002 loss=2.49, ppl=5.62, wps=5898.9, ups=0.09, wpb=64876, bsz=128, num_updates=10974, lr=9.99202e-05, gnorm=2.071, loss_scale=8, train_wall=11, gb_free=2.8, wall=125850 2021-06-20 05:36:27 | INFO | train_inner | epoch 004: 2034 / 3002 loss=2.491, ppl=5.62, wps=5801.9, ups=0.09, wpb=64827, bsz=128, num_updates=10975, lr=9.99202e-05, gnorm=2.166, loss_scale=8, train_wall=11, gb_free=2.8, wall=125861 2021-06-20 05:36:38 | INFO | train_inner | epoch 004: 2035 / 3002 loss=2.497, ppl=5.65, wps=5886.1, ups=0.09, wpb=64843, bsz=128, num_updates=10976, lr=9.99202e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=125872 2021-06-20 05:36:49 | INFO | train_inner | epoch 004: 2036 / 3002 loss=2.671, ppl=6.37, wps=5910.4, ups=0.09, wpb=64864, bsz=128, num_updates=10977, lr=9.99202e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=125883 2021-06-20 05:37:00 | INFO | train_inner | epoch 004: 2037 / 3002 loss=2.506, ppl=5.68, wps=5789.9, ups=0.09, wpb=64886, bsz=128, num_updates=10978, lr=9.99202e-05, gnorm=2.096, loss_scale=8, train_wall=11, gb_free=2.8, wall=125894 2021-06-20 05:37:11 | INFO | train_inner | epoch 004: 2038 / 3002 loss=2.542, ppl=5.82, wps=5936.8, ups=0.09, wpb=64785, bsz=128, num_updates=10979, lr=9.99202e-05, gnorm=2.219, loss_scale=8, train_wall=10, gb_free=2.8, wall=125905 2021-06-20 05:37:22 | INFO | train_inner | epoch 004: 2039 / 3002 loss=2.646, ppl=6.26, wps=5814.5, ups=0.09, wpb=64820, bsz=128, num_updates=10980, lr=9.99202e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=125916 2021-06-20 05:37:33 | INFO | train_inner | epoch 004: 2040 / 3002 loss=2.658, ppl=6.31, wps=5853.1, ups=0.09, wpb=64784, bsz=128, num_updates=10981, lr=9.99201e-05, gnorm=2.083, loss_scale=8, train_wall=11, gb_free=2.8, wall=125927 2021-06-20 05:37:44 | INFO | train_inner | epoch 004: 2041 / 3002 loss=2.524, ppl=5.75, wps=5842.8, ups=0.09, wpb=64836, bsz=128, num_updates=10982, lr=9.99201e-05, gnorm=2.106, loss_scale=8, train_wall=11, gb_free=2.8, wall=125938 2021-06-20 05:37:55 | INFO | train_inner | epoch 004: 2042 / 3002 loss=2.536, ppl=5.8, wps=5759.2, ups=0.09, wpb=64873, bsz=128, num_updates=10983, lr=9.99201e-05, gnorm=2.126, loss_scale=8, train_wall=11, gb_free=2.8, wall=125950 2021-06-20 05:38:06 | INFO | train_inner | epoch 004: 2043 / 3002 loss=2.472, ppl=5.55, wps=5812.1, ups=0.09, wpb=64793, bsz=128, num_updates=10984, lr=9.99201e-05, gnorm=2.542, loss_scale=8, train_wall=11, gb_free=2.8, wall=125961 2021-06-20 05:38:17 | INFO | train_inner | epoch 004: 2044 / 3002 loss=2.394, ppl=5.26, wps=5993.4, ups=0.09, wpb=64764, bsz=128, num_updates=10985, lr=9.99201e-05, gnorm=2.05, loss_scale=8, train_wall=10, gb_free=2.8, wall=125972 2021-06-20 05:38:28 | INFO | train_inner | epoch 004: 2045 / 3002 loss=2.554, ppl=5.87, wps=5805.5, ups=0.09, wpb=64788, bsz=128, num_updates=10986, lr=9.99201e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=125983 2021-06-20 05:38:39 | INFO | train_inner | epoch 004: 2046 / 3002 loss=2.592, ppl=6.03, wps=5895.7, ups=0.09, wpb=64870, bsz=128, num_updates=10987, lr=9.99201e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=125994 2021-06-20 05:38:51 | INFO | train_inner | epoch 004: 2047 / 3002 loss=2.61, ppl=6.11, wps=5799.3, ups=0.09, wpb=64871, bsz=128, num_updates=10988, lr=9.99201e-05, gnorm=2.342, loss_scale=8, train_wall=11, gb_free=2.8, wall=126005 2021-06-20 05:39:02 | INFO | train_inner | epoch 004: 2048 / 3002 loss=2.405, ppl=5.3, wps=5881.2, ups=0.09, wpb=64791, bsz=128, num_updates=10989, lr=9.99201e-05, gnorm=2.087, loss_scale=16, train_wall=11, gb_free=2.8, wall=126016 2021-06-20 05:39:13 | INFO | train_inner | epoch 004: 2049 / 3002 loss=2.664, ppl=6.34, wps=5856.2, ups=0.09, wpb=64816, bsz=128, num_updates=10990, lr=9.99201e-05, gnorm=2.007, loss_scale=16, train_wall=11, gb_free=2.8, wall=126027 2021-06-20 05:39:24 | INFO | train_inner | epoch 004: 2050 / 3002 loss=2.561, ppl=5.9, wps=5794.8, ups=0.09, wpb=64850, bsz=128, num_updates=10991, lr=9.99201e-05, gnorm=2.019, loss_scale=16, train_wall=11, gb_free=2.8, wall=126038 2021-06-20 05:39:35 | INFO | train_inner | epoch 004: 2051 / 3002 loss=2.555, ppl=5.88, wps=5923, ups=0.09, wpb=64793, bsz=128, num_updates=10992, lr=9.99201e-05, gnorm=2.063, loss_scale=16, train_wall=10, gb_free=2.8, wall=126049 2021-06-20 05:39:46 | INFO | train_inner | epoch 004: 2052 / 3002 loss=2.612, ppl=6.11, wps=5820.4, ups=0.09, wpb=64819, bsz=128, num_updates=10993, lr=9.992e-05, gnorm=2.004, loss_scale=16, train_wall=11, gb_free=2.8, wall=126060 2021-06-20 05:39:57 | INFO | train_inner | epoch 004: 2053 / 3002 loss=2.542, ppl=5.83, wps=5910.7, ups=0.09, wpb=64896, bsz=128, num_updates=10994, lr=9.992e-05, gnorm=2.06, loss_scale=16, train_wall=11, gb_free=2.8, wall=126071 2021-06-20 05:40:08 | INFO | train_inner | epoch 004: 2054 / 3002 loss=2.438, ppl=5.42, wps=5814.6, ups=0.09, wpb=64835, bsz=128, num_updates=10995, lr=9.992e-05, gnorm=2.064, loss_scale=16, train_wall=11, gb_free=2.8, wall=126082 2021-06-20 05:40:19 | INFO | train_inner | epoch 004: 2055 / 3002 loss=2.546, ppl=5.84, wps=5792.7, ups=0.09, wpb=64772, bsz=128, num_updates=10996, lr=9.992e-05, gnorm=2.032, loss_scale=16, train_wall=11, gb_free=2.8, wall=126094 2021-06-20 05:40:30 | INFO | train_inner | epoch 004: 2056 / 3002 loss=2.456, ppl=5.49, wps=5834.1, ups=0.09, wpb=64896, bsz=128, num_updates=10997, lr=9.992e-05, gnorm=2.096, loss_scale=16, train_wall=11, gb_free=2.8, wall=126105 2021-06-20 05:40:42 | INFO | train_inner | epoch 004: 2057 / 3002 loss=2.635, ppl=6.21, wps=5823.9, ups=0.09, wpb=64828, bsz=128, num_updates=10998, lr=9.992e-05, gnorm=2.086, loss_scale=16, train_wall=11, gb_free=2.8, wall=126116 2021-06-20 05:40:53 | INFO | train_inner | epoch 004: 2058 / 3002 loss=2.484, ppl=5.6, wps=5905.6, ups=0.09, wpb=64913, bsz=128, num_updates=10999, lr=9.992e-05, gnorm=2.21, loss_scale=16, train_wall=11, gb_free=2.8, wall=126127 2021-06-20 05:41:04 | INFO | train_inner | epoch 004: 2059 / 3002 loss=2.629, ppl=6.18, wps=5909.3, ups=0.09, wpb=64871, bsz=128, num_updates=11000, lr=9.992e-05, gnorm=2.144, loss_scale=16, train_wall=11, gb_free=2.8, wall=126138 2021-06-20 05:41:15 | INFO | train_inner | epoch 004: 2060 / 3002 loss=2.547, ppl=5.84, wps=5853.6, ups=0.09, wpb=64908, bsz=128, num_updates=11001, lr=9.992e-05, gnorm=2.069, loss_scale=16, train_wall=11, gb_free=2.8, wall=126149 2021-06-20 05:41:26 | INFO | train_inner | epoch 004: 2061 / 3002 loss=2.565, ppl=5.92, wps=5849.6, ups=0.09, wpb=64855, bsz=128, num_updates=11002, lr=9.992e-05, gnorm=2.043, loss_scale=16, train_wall=11, gb_free=2.8, wall=126160 2021-06-20 05:41:37 | INFO | train_inner | epoch 004: 2062 / 3002 loss=2.576, ppl=5.96, wps=5728.6, ups=0.09, wpb=64824, bsz=128, num_updates=11003, lr=9.992e-05, gnorm=1.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=126171 2021-06-20 05:41:48 | INFO | train_inner | epoch 004: 2063 / 3002 loss=2.647, ppl=6.26, wps=5867.2, ups=0.09, wpb=64883, bsz=128, num_updates=11004, lr=9.992e-05, gnorm=2.005, loss_scale=16, train_wall=11, gb_free=2.8, wall=126182 2021-06-20 05:41:59 | INFO | train_inner | epoch 004: 2064 / 3002 loss=2.533, ppl=5.79, wps=5844.5, ups=0.09, wpb=64851, bsz=128, num_updates=11005, lr=9.992e-05, gnorm=2.055, loss_scale=16, train_wall=11, gb_free=2.8, wall=126194 2021-06-20 05:42:10 | INFO | train_inner | epoch 004: 2065 / 3002 loss=2.472, ppl=5.55, wps=5989.2, ups=0.09, wpb=64829, bsz=128, num_updates=11006, lr=9.99199e-05, gnorm=2.555, loss_scale=16, train_wall=10, gb_free=2.8, wall=126204 2021-06-20 05:42:21 | INFO | train_inner | epoch 004: 2066 / 3002 loss=2.458, ppl=5.5, wps=5854.1, ups=0.09, wpb=64755, bsz=128, num_updates=11007, lr=9.99199e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=126215 2021-06-20 05:42:32 | INFO | train_inner | epoch 004: 2067 / 3002 loss=2.415, ppl=5.33, wps=5933.7, ups=0.09, wpb=64864, bsz=128, num_updates=11008, lr=9.99199e-05, gnorm=2.061, loss_scale=16, train_wall=10, gb_free=2.8, wall=126226 2021-06-20 05:42:43 | INFO | train_inner | epoch 004: 2068 / 3002 loss=2.547, ppl=5.84, wps=5846.9, ups=0.09, wpb=64836, bsz=128, num_updates=11009, lr=9.99199e-05, gnorm=2.048, loss_scale=16, train_wall=11, gb_free=2.8, wall=126237 2021-06-20 05:42:54 | INFO | train_inner | epoch 004: 2069 / 3002 loss=2.51, ppl=5.7, wps=5843.3, ups=0.09, wpb=64857, bsz=128, num_updates=11010, lr=9.99199e-05, gnorm=2.527, loss_scale=16, train_wall=11, gb_free=2.8, wall=126249 2021-06-20 05:43:05 | INFO | train_inner | epoch 004: 2070 / 3002 loss=2.497, ppl=5.65, wps=5959, ups=0.09, wpb=64874, bsz=128, num_updates=11011, lr=9.99199e-05, gnorm=2.075, loss_scale=16, train_wall=10, gb_free=2.8, wall=126259 2021-06-20 05:43:16 | INFO | train_inner | epoch 004: 2071 / 3002 loss=2.476, ppl=5.56, wps=5759, ups=0.09, wpb=64818, bsz=128, num_updates=11012, lr=9.99199e-05, gnorm=2.041, loss_scale=16, train_wall=11, gb_free=2.8, wall=126271 2021-06-20 05:43:27 | INFO | train_inner | epoch 004: 2072 / 3002 loss=2.649, ppl=6.27, wps=5805.6, ups=0.09, wpb=64771, bsz=128, num_updates=11013, lr=9.99199e-05, gnorm=2.053, loss_scale=16, train_wall=11, gb_free=2.8, wall=126282 2021-06-20 05:43:39 | INFO | train_inner | epoch 004: 2073 / 3002 loss=2.479, ppl=5.58, wps=5823.8, ups=0.09, wpb=64826, bsz=128, num_updates=11014, lr=9.99199e-05, gnorm=2.158, loss_scale=16, train_wall=11, gb_free=2.8, wall=126293 2021-06-20 05:43:50 | INFO | train_inner | epoch 004: 2074 / 3002 loss=2.647, ppl=6.26, wps=5741, ups=0.09, wpb=64803, bsz=128, num_updates=11015, lr=9.99199e-05, gnorm=2.09, loss_scale=16, train_wall=11, gb_free=2.8, wall=126304 2021-06-20 05:44:01 | INFO | train_inner | epoch 004: 2075 / 3002 loss=2.664, ppl=6.34, wps=5984.6, ups=0.09, wpb=64807, bsz=128, num_updates=11016, lr=9.99199e-05, gnorm=2.125, loss_scale=16, train_wall=10, gb_free=2.8, wall=126315 2021-06-20 05:44:12 | INFO | train_inner | epoch 004: 2076 / 3002 loss=2.588, ppl=6.01, wps=5976.9, ups=0.09, wpb=64918, bsz=128, num_updates=11017, lr=9.99199e-05, gnorm=2.02, loss_scale=16, train_wall=10, gb_free=2.8, wall=126326 2021-06-20 05:44:23 | INFO | train_inner | epoch 004: 2077 / 3002 loss=2.336, ppl=5.05, wps=5908.3, ups=0.09, wpb=64872, bsz=128, num_updates=11018, lr=9.99198e-05, gnorm=2.056, loss_scale=16, train_wall=10, gb_free=2.8, wall=126337 2021-06-20 05:44:34 | INFO | train_inner | epoch 004: 2078 / 3002 loss=2.567, ppl=5.92, wps=5790.6, ups=0.09, wpb=64856, bsz=128, num_updates=11019, lr=9.99198e-05, gnorm=2.015, loss_scale=16, train_wall=11, gb_free=2.8, wall=126348 2021-06-20 05:44:45 | INFO | train_inner | epoch 004: 2079 / 3002 loss=2.652, ppl=6.28, wps=5762.9, ups=0.09, wpb=64808, bsz=128, num_updates=11020, lr=9.99198e-05, gnorm=2.113, loss_scale=16, train_wall=11, gb_free=2.8, wall=126359 2021-06-20 05:44:56 | INFO | train_inner | epoch 004: 2080 / 3002 loss=2.481, ppl=5.58, wps=5817.4, ups=0.09, wpb=64811, bsz=128, num_updates=11021, lr=9.99198e-05, gnorm=2.024, loss_scale=16, train_wall=11, gb_free=2.8, wall=126370 2021-06-20 05:45:07 | INFO | train_inner | epoch 004: 2081 / 3002 loss=2.481, ppl=5.58, wps=5875, ups=0.09, wpb=64833, bsz=128, num_updates=11022, lr=9.99198e-05, gnorm=2.357, loss_scale=16, train_wall=11, gb_free=2.8, wall=126382 2021-06-20 05:45:18 | INFO | train_inner | epoch 004: 2082 / 3002 loss=2.645, ppl=6.26, wps=5789.9, ups=0.09, wpb=64822, bsz=128, num_updates=11023, lr=9.99198e-05, gnorm=2.157, loss_scale=16, train_wall=11, gb_free=2.8, wall=126393 2021-06-20 05:45:29 | INFO | train_inner | epoch 004: 2083 / 3002 loss=2.472, ppl=5.55, wps=5939.8, ups=0.09, wpb=64874, bsz=128, num_updates=11024, lr=9.99198e-05, gnorm=1.977, loss_scale=16, train_wall=10, gb_free=2.8, wall=126404 2021-06-20 05:45:41 | INFO | train_inner | epoch 004: 2084 / 3002 loss=2.544, ppl=5.83, wps=5777.8, ups=0.09, wpb=64756, bsz=128, num_updates=11025, lr=9.99198e-05, gnorm=2.048, loss_scale=16, train_wall=11, gb_free=2.8, wall=126415 2021-06-20 05:45:52 | INFO | train_inner | epoch 004: 2085 / 3002 loss=2.508, ppl=5.69, wps=5864.4, ups=0.09, wpb=64783, bsz=128, num_updates=11026, lr=9.99198e-05, gnorm=2.142, loss_scale=16, train_wall=11, gb_free=2.8, wall=126426 2021-06-20 05:46:02 | INFO | train_inner | epoch 004: 2086 / 3002 loss=2.626, ppl=6.18, wps=6035.1, ups=0.09, wpb=64900, bsz=128, num_updates=11027, lr=9.99198e-05, gnorm=2.079, loss_scale=16, train_wall=10, gb_free=2.8, wall=126437 2021-06-20 05:46:14 | INFO | train_inner | epoch 004: 2087 / 3002 loss=2.453, ppl=5.48, wps=5784.1, ups=0.09, wpb=64834, bsz=128, num_updates=11028, lr=9.99198e-05, gnorm=1.987, loss_scale=16, train_wall=11, gb_free=2.8, wall=126448 2021-06-20 05:46:25 | INFO | train_inner | epoch 004: 2088 / 3002 loss=2.611, ppl=6.11, wps=5892.7, ups=0.09, wpb=64825, bsz=128, num_updates=11029, lr=9.99198e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=126459 2021-06-20 05:46:36 | INFO | train_inner | epoch 004: 2089 / 3002 loss=2.728, ppl=6.63, wps=5803.5, ups=0.09, wpb=64783, bsz=128, num_updates=11030, lr=9.99198e-05, gnorm=2.019, loss_scale=16, train_wall=11, gb_free=2.8, wall=126470 2021-06-20 05:46:47 | INFO | train_inner | epoch 004: 2090 / 3002 loss=2.623, ppl=6.16, wps=5821.7, ups=0.09, wpb=64851, bsz=128, num_updates=11031, lr=9.99197e-05, gnorm=2.07, loss_scale=16, train_wall=11, gb_free=2.8, wall=126481 2021-06-20 05:46:58 | INFO | train_inner | epoch 004: 2091 / 3002 loss=2.554, ppl=5.87, wps=5839.6, ups=0.09, wpb=64806, bsz=128, num_updates=11032, lr=9.99197e-05, gnorm=1.987, loss_scale=16, train_wall=11, gb_free=2.8, wall=126492 2021-06-20 05:47:09 | INFO | train_inner | epoch 004: 2092 / 3002 loss=2.691, ppl=6.46, wps=5877.2, ups=0.09, wpb=64778, bsz=128, num_updates=11033, lr=9.99197e-05, gnorm=2.071, loss_scale=16, train_wall=11, gb_free=2.8, wall=126503 2021-06-20 05:47:20 | INFO | train_inner | epoch 004: 2093 / 3002 loss=2.46, ppl=5.5, wps=5818.7, ups=0.09, wpb=64920, bsz=128, num_updates=11034, lr=9.99197e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=126514 2021-06-20 05:47:31 | INFO | train_inner | epoch 004: 2094 / 3002 loss=2.698, ppl=6.49, wps=5780.7, ups=0.09, wpb=64856, bsz=128, num_updates=11035, lr=9.99197e-05, gnorm=2.025, loss_scale=16, train_wall=11, gb_free=2.8, wall=126526 2021-06-20 05:47:42 | INFO | train_inner | epoch 004: 2095 / 3002 loss=2.509, ppl=5.69, wps=5844.3, ups=0.09, wpb=64814, bsz=128, num_updates=11036, lr=9.99197e-05, gnorm=1.973, loss_scale=16, train_wall=11, gb_free=2.8, wall=126537 2021-06-20 05:47:54 | INFO | train_inner | epoch 004: 2096 / 3002 loss=2.599, ppl=6.06, wps=5836, ups=0.09, wpb=64825, bsz=128, num_updates=11037, lr=9.99197e-05, gnorm=2.074, loss_scale=16, train_wall=11, gb_free=2.8, wall=126548 2021-06-20 05:48:05 | INFO | train_inner | epoch 004: 2097 / 3002 loss=2.552, ppl=5.86, wps=5760.5, ups=0.09, wpb=64887, bsz=128, num_updates=11038, lr=9.99197e-05, gnorm=2.395, loss_scale=16, train_wall=11, gb_free=2.8, wall=126559 2021-06-20 05:48:16 | INFO | train_inner | epoch 004: 2098 / 3002 loss=2.391, ppl=5.24, wps=5801.9, ups=0.09, wpb=64732, bsz=128, num_updates=11039, lr=9.99197e-05, gnorm=2.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=126570 2021-06-20 05:48:27 | INFO | train_inner | epoch 004: 2099 / 3002 loss=2.579, ppl=5.98, wps=5948.2, ups=0.09, wpb=64874, bsz=128, num_updates=11040, lr=9.99197e-05, gnorm=1.958, loss_scale=16, train_wall=10, gb_free=2.8, wall=126581 2021-06-20 05:48:38 | INFO | train_inner | epoch 004: 2100 / 3002 loss=2.583, ppl=5.99, wps=5696, ups=0.09, wpb=64800, bsz=128, num_updates=11041, lr=9.99197e-05, gnorm=2.195, loss_scale=16, train_wall=11, gb_free=2.8, wall=126593 2021-06-20 05:48:49 | INFO | train_inner | epoch 004: 2101 / 3002 loss=2.69, ppl=6.45, wps=5764.6, ups=0.09, wpb=64756, bsz=128, num_updates=11042, lr=9.99197e-05, gnorm=2.073, loss_scale=16, train_wall=11, gb_free=2.8, wall=126604 2021-06-20 05:49:01 | INFO | train_inner | epoch 004: 2102 / 3002 loss=2.697, ppl=6.49, wps=5815.6, ups=0.09, wpb=64815, bsz=128, num_updates=11043, lr=9.99196e-05, gnorm=2.058, loss_scale=16, train_wall=11, gb_free=2.8, wall=126615 2021-06-20 05:49:12 | INFO | train_inner | epoch 004: 2103 / 3002 loss=2.439, ppl=5.42, wps=5898.5, ups=0.09, wpb=64781, bsz=128, num_updates=11044, lr=9.99196e-05, gnorm=2.067, loss_scale=16, train_wall=11, gb_free=2.8, wall=126626 2021-06-20 05:49:23 | INFO | train_inner | epoch 004: 2104 / 3002 loss=2.534, ppl=5.79, wps=5781.6, ups=0.09, wpb=64797, bsz=128, num_updates=11045, lr=9.99196e-05, gnorm=2.036, loss_scale=16, train_wall=11, gb_free=2.8, wall=126637 2021-06-20 05:49:34 | INFO | train_inner | epoch 004: 2105 / 3002 loss=2.745, ppl=6.7, wps=5700, ups=0.09, wpb=64804, bsz=128, num_updates=11046, lr=9.99196e-05, gnorm=2.118, loss_scale=16, train_wall=11, gb_free=2.8, wall=126649 2021-06-20 05:49:45 | INFO | train_inner | epoch 004: 2106 / 3002 loss=2.549, ppl=5.85, wps=5876.2, ups=0.09, wpb=64834, bsz=128, num_updates=11047, lr=9.99196e-05, gnorm=2.084, loss_scale=16, train_wall=11, gb_free=2.8, wall=126660 2021-06-20 05:49:56 | INFO | train_inner | epoch 004: 2107 / 3002 loss=2.511, ppl=5.7, wps=5871.3, ups=0.09, wpb=64898, bsz=128, num_updates=11048, lr=9.99196e-05, gnorm=2.03, loss_scale=16, train_wall=11, gb_free=2.8, wall=126671 2021-06-20 05:50:07 | INFO | train_inner | epoch 004: 2108 / 3002 loss=2.422, ppl=5.36, wps=5896.6, ups=0.09, wpb=64802, bsz=128, num_updates=11049, lr=9.99196e-05, gnorm=1.936, loss_scale=16, train_wall=11, gb_free=2.8, wall=126682 2021-06-20 05:50:18 | INFO | train_inner | epoch 004: 2109 / 3002 loss=2.561, ppl=5.9, wps=5862.2, ups=0.09, wpb=64792, bsz=128, num_updates=11050, lr=9.99196e-05, gnorm=2.098, loss_scale=16, train_wall=11, gb_free=2.8, wall=126693 2021-06-20 05:50:29 | INFO | train_inner | epoch 004: 2110 / 3002 loss=2.492, ppl=5.62, wps=5854.3, ups=0.09, wpb=64788, bsz=128, num_updates=11051, lr=9.99196e-05, gnorm=2.119, loss_scale=16, train_wall=11, gb_free=2.8, wall=126704 2021-06-20 05:50:40 | INFO | train_inner | epoch 004: 2111 / 3002 loss=2.565, ppl=5.92, wps=5909.8, ups=0.09, wpb=64935, bsz=128, num_updates=11052, lr=9.99196e-05, gnorm=2.059, loss_scale=16, train_wall=11, gb_free=2.8, wall=126715 2021-06-20 05:50:51 | INFO | train_inner | epoch 004: 2112 / 3002 loss=2.719, ppl=6.58, wps=5899.1, ups=0.09, wpb=64898, bsz=128, num_updates=11053, lr=9.99196e-05, gnorm=2.074, loss_scale=16, train_wall=11, gb_free=2.8, wall=126726 2021-06-20 05:51:03 | INFO | train_inner | epoch 004: 2113 / 3002 loss=2.64, ppl=6.23, wps=5817.6, ups=0.09, wpb=64820, bsz=128, num_updates=11054, lr=9.99196e-05, gnorm=2.121, loss_scale=16, train_wall=11, gb_free=2.8, wall=126737 2021-06-20 05:51:14 | INFO | train_inner | epoch 004: 2114 / 3002 loss=2.585, ppl=6, wps=5814.2, ups=0.09, wpb=64837, bsz=128, num_updates=11055, lr=9.99196e-05, gnorm=2.011, loss_scale=16, train_wall=11, gb_free=2.8, wall=126748 2021-06-20 05:51:25 | INFO | train_inner | epoch 004: 2115 / 3002 loss=2.507, ppl=5.68, wps=5781.8, ups=0.09, wpb=64797, bsz=128, num_updates=11056, lr=9.99195e-05, gnorm=2.079, loss_scale=16, train_wall=11, gb_free=2.8, wall=126759 2021-06-20 05:51:36 | INFO | train_inner | epoch 004: 2116 / 3002 loss=2.488, ppl=5.61, wps=5831.3, ups=0.09, wpb=64711, bsz=128, num_updates=11057, lr=9.99195e-05, gnorm=2.168, loss_scale=16, train_wall=11, gb_free=2.8, wall=126770 2021-06-20 05:51:47 | INFO | train_inner | epoch 004: 2117 / 3002 loss=2.416, ppl=5.34, wps=5789.9, ups=0.09, wpb=64851, bsz=128, num_updates=11058, lr=9.99195e-05, gnorm=2.266, loss_scale=16, train_wall=11, gb_free=2.8, wall=126781 2021-06-20 05:51:58 | INFO | train_inner | epoch 004: 2118 / 3002 loss=2.552, ppl=5.87, wps=5983.7, ups=0.09, wpb=64929, bsz=128, num_updates=11059, lr=9.99195e-05, gnorm=1.979, loss_scale=16, train_wall=10, gb_free=2.8, wall=126792 2021-06-20 05:52:09 | INFO | train_inner | epoch 004: 2119 / 3002 loss=2.679, ppl=6.41, wps=5788.2, ups=0.09, wpb=64886, bsz=128, num_updates=11060, lr=9.99195e-05, gnorm=2.027, loss_scale=16, train_wall=11, gb_free=2.8, wall=126804 2021-06-20 05:52:20 | INFO | train_inner | epoch 004: 2120 / 3002 loss=2.512, ppl=5.7, wps=5949.8, ups=0.09, wpb=64771, bsz=128, num_updates=11061, lr=9.99195e-05, gnorm=2.228, loss_scale=16, train_wall=10, gb_free=2.8, wall=126814 2021-06-20 05:52:31 | INFO | train_inner | epoch 004: 2121 / 3002 loss=2.534, ppl=5.79, wps=5781.5, ups=0.09, wpb=64822, bsz=128, num_updates=11062, lr=9.99195e-05, gnorm=2.064, loss_scale=16, train_wall=11, gb_free=2.8, wall=126826 2021-06-20 05:52:42 | INFO | train_inner | epoch 004: 2122 / 3002 loss=2.675, ppl=6.39, wps=5807.6, ups=0.09, wpb=64832, bsz=128, num_updates=11063, lr=9.99195e-05, gnorm=2.218, loss_scale=16, train_wall=11, gb_free=2.8, wall=126837 2021-06-20 05:52:54 | INFO | train_inner | epoch 004: 2123 / 3002 loss=2.496, ppl=5.64, wps=5731.6, ups=0.09, wpb=64788, bsz=128, num_updates=11064, lr=9.99195e-05, gnorm=2.04, loss_scale=16, train_wall=11, gb_free=2.8, wall=126848 2021-06-20 05:53:05 | INFO | train_inner | epoch 004: 2124 / 3002 loss=2.862, ppl=7.27, wps=5995.2, ups=0.09, wpb=64894, bsz=128, num_updates=11065, lr=9.99195e-05, gnorm=2.042, loss_scale=16, train_wall=10, gb_free=2.8, wall=126859 2021-06-20 05:53:16 | INFO | train_inner | epoch 004: 2125 / 3002 loss=2.548, ppl=5.85, wps=5797.7, ups=0.09, wpb=64819, bsz=128, num_updates=11066, lr=9.99195e-05, gnorm=2.006, loss_scale=16, train_wall=11, gb_free=2.8, wall=126870 2021-06-20 05:53:27 | INFO | train_inner | epoch 004: 2126 / 3002 loss=2.67, ppl=6.37, wps=6020.6, ups=0.09, wpb=64869, bsz=128, num_updates=11067, lr=9.99195e-05, gnorm=2.048, loss_scale=16, train_wall=10, gb_free=2.8, wall=126881 2021-06-20 05:53:38 | INFO | train_inner | epoch 004: 2127 / 3002 loss=2.595, ppl=6.04, wps=5832.2, ups=0.09, wpb=64815, bsz=128, num_updates=11068, lr=9.99194e-05, gnorm=2.05, loss_scale=16, train_wall=11, gb_free=2.8, wall=126892 2021-06-20 05:53:49 | INFO | train_inner | epoch 004: 2128 / 3002 loss=2.542, ppl=5.82, wps=5814.8, ups=0.09, wpb=64775, bsz=128, num_updates=11069, lr=9.99194e-05, gnorm=2.005, loss_scale=16, train_wall=11, gb_free=2.8, wall=126903 2021-06-20 05:54:00 | INFO | train_inner | epoch 004: 2129 / 3002 loss=2.518, ppl=5.73, wps=5809.8, ups=0.09, wpb=64817, bsz=128, num_updates=11070, lr=9.99194e-05, gnorm=2.015, loss_scale=16, train_wall=11, gb_free=2.8, wall=126914 2021-06-20 05:54:11 | INFO | train_inner | epoch 004: 2130 / 3002 loss=2.474, ppl=5.55, wps=5855.2, ups=0.09, wpb=64902, bsz=128, num_updates=11071, lr=9.99194e-05, gnorm=1.989, loss_scale=16, train_wall=11, gb_free=2.8, wall=126925 2021-06-20 05:54:22 | INFO | train_inner | epoch 004: 2131 / 3002 loss=2.577, ppl=5.97, wps=5902.7, ups=0.09, wpb=64898, bsz=128, num_updates=11072, lr=9.99194e-05, gnorm=2.077, loss_scale=16, train_wall=11, gb_free=2.8, wall=126936 2021-06-20 05:54:33 | INFO | train_inner | epoch 004: 2132 / 3002 loss=2.419, ppl=5.35, wps=5926.8, ups=0.09, wpb=64760, bsz=128, num_updates=11073, lr=9.99194e-05, gnorm=1.934, loss_scale=16, train_wall=10, gb_free=2.8, wall=126947 2021-06-20 05:54:44 | INFO | train_inner | epoch 004: 2133 / 3002 loss=2.434, ppl=5.4, wps=5871.3, ups=0.09, wpb=64956, bsz=128, num_updates=11074, lr=9.99194e-05, gnorm=2.025, loss_scale=16, train_wall=11, gb_free=2.8, wall=126958 2021-06-20 05:54:55 | INFO | train_inner | epoch 004: 2134 / 3002 loss=2.629, ppl=6.18, wps=5849.2, ups=0.09, wpb=64846, bsz=128, num_updates=11075, lr=9.99194e-05, gnorm=2.059, loss_scale=16, train_wall=11, gb_free=2.8, wall=126969 2021-06-20 05:55:06 | INFO | train_inner | epoch 004: 2135 / 3002 loss=2.6, ppl=6.06, wps=5817.3, ups=0.09, wpb=64858, bsz=128, num_updates=11076, lr=9.99194e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=126981 2021-06-20 05:55:17 | INFO | train_inner | epoch 004: 2136 / 3002 loss=2.531, ppl=5.78, wps=5849.9, ups=0.09, wpb=64888, bsz=128, num_updates=11077, lr=9.99194e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=126992 2021-06-20 05:55:28 | INFO | train_inner | epoch 004: 2137 / 3002 loss=2.57, ppl=5.94, wps=5854.1, ups=0.09, wpb=64821, bsz=128, num_updates=11078, lr=9.99194e-05, gnorm=2.053, loss_scale=16, train_wall=11, gb_free=2.8, wall=127003 2021-06-20 05:55:40 | INFO | train_inner | epoch 004: 2138 / 3002 loss=2.631, ppl=6.19, wps=5818.5, ups=0.09, wpb=64770, bsz=128, num_updates=11079, lr=9.99194e-05, gnorm=2.413, loss_scale=16, train_wall=11, gb_free=2.8, wall=127014 2021-06-20 05:55:51 | INFO | train_inner | epoch 004: 2139 / 3002 loss=2.652, ppl=6.28, wps=5912.9, ups=0.09, wpb=64861, bsz=128, num_updates=11080, lr=9.99194e-05, gnorm=2.011, loss_scale=16, train_wall=10, gb_free=2.8, wall=127025 2021-06-20 05:56:02 | INFO | train_inner | epoch 004: 2140 / 3002 loss=2.485, ppl=5.6, wps=5842.8, ups=0.09, wpb=64891, bsz=128, num_updates=11081, lr=9.99193e-05, gnorm=1.908, loss_scale=16, train_wall=11, gb_free=2.8, wall=127036 2021-06-20 05:56:13 | INFO | train_inner | epoch 004: 2141 / 3002 loss=2.475, ppl=5.56, wps=5785.6, ups=0.09, wpb=64882, bsz=128, num_updates=11082, lr=9.99193e-05, gnorm=1.882, loss_scale=16, train_wall=11, gb_free=2.8, wall=127047 2021-06-20 05:56:24 | INFO | train_inner | epoch 004: 2142 / 3002 loss=2.648, ppl=6.27, wps=5812.8, ups=0.09, wpb=64898, bsz=128, num_updates=11083, lr=9.99193e-05, gnorm=2.015, loss_scale=16, train_wall=11, gb_free=2.8, wall=127058 2021-06-20 05:56:35 | INFO | train_inner | epoch 004: 2143 / 3002 loss=2.57, ppl=5.94, wps=5865.8, ups=0.09, wpb=64767, bsz=128, num_updates=11084, lr=9.99193e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=127069 2021-06-20 05:56:46 | INFO | train_inner | epoch 004: 2144 / 3002 loss=2.503, ppl=5.67, wps=5830.1, ups=0.09, wpb=64756, bsz=128, num_updates=11085, lr=9.99193e-05, gnorm=2.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=127081 2021-06-20 05:56:57 | INFO | train_inner | epoch 004: 2145 / 3002 loss=2.618, ppl=6.14, wps=5750.3, ups=0.09, wpb=64749, bsz=128, num_updates=11086, lr=9.99193e-05, gnorm=2.059, loss_scale=16, train_wall=11, gb_free=2.8, wall=127092 2021-06-20 05:57:09 | INFO | train_inner | epoch 004: 2146 / 3002 loss=2.537, ppl=5.8, wps=5765.4, ups=0.09, wpb=64769, bsz=128, num_updates=11087, lr=9.99193e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=127103 2021-06-20 05:57:20 | INFO | train_inner | epoch 004: 2147 / 3002 loss=2.63, ppl=6.19, wps=5807.6, ups=0.09, wpb=64850, bsz=128, num_updates=11088, lr=9.99193e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=127114 2021-06-20 05:57:31 | INFO | train_inner | epoch 004: 2148 / 3002 loss=2.554, ppl=5.87, wps=5872, ups=0.09, wpb=64887, bsz=128, num_updates=11089, lr=9.99193e-05, gnorm=2.015, loss_scale=16, train_wall=11, gb_free=2.8, wall=127125 2021-06-20 05:57:42 | INFO | train_inner | epoch 004: 2149 / 3002 loss=2.436, ppl=5.41, wps=5898.1, ups=0.09, wpb=64902, bsz=128, num_updates=11090, lr=9.99193e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=127136 2021-06-20 05:57:53 | INFO | train_inner | epoch 004: 2150 / 3002 loss=2.621, ppl=6.15, wps=5908.4, ups=0.09, wpb=64833, bsz=128, num_updates=11091, lr=9.99193e-05, gnorm=2.04, loss_scale=16, train_wall=11, gb_free=2.8, wall=127147 2021-06-20 05:58:04 | INFO | train_inner | epoch 004: 2151 / 3002 loss=2.609, ppl=6.1, wps=5957.5, ups=0.09, wpb=64846, bsz=128, num_updates=11092, lr=9.99193e-05, gnorm=2.107, loss_scale=16, train_wall=10, gb_free=2.8, wall=127158 2021-06-20 05:58:15 | INFO | train_inner | epoch 004: 2152 / 3002 loss=2.543, ppl=5.83, wps=5861.4, ups=0.09, wpb=64836, bsz=128, num_updates=11093, lr=9.99192e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=127169 2021-06-20 05:58:26 | INFO | train_inner | epoch 004: 2153 / 3002 loss=2.543, ppl=5.83, wps=5798.9, ups=0.09, wpb=64841, bsz=128, num_updates=11094, lr=9.99192e-05, gnorm=2.001, loss_scale=16, train_wall=11, gb_free=2.8, wall=127180 2021-06-20 05:58:37 | INFO | train_inner | epoch 004: 2154 / 3002 loss=2.511, ppl=5.7, wps=5890.7, ups=0.09, wpb=64852, bsz=128, num_updates=11095, lr=9.99192e-05, gnorm=2.137, loss_scale=16, train_wall=11, gb_free=2.8, wall=127191 2021-06-20 05:58:48 | INFO | train_inner | epoch 004: 2155 / 3002 loss=2.537, ppl=5.8, wps=5844.8, ups=0.09, wpb=64826, bsz=128, num_updates=11096, lr=9.99192e-05, gnorm=1.964, loss_scale=16, train_wall=11, gb_free=2.8, wall=127202 2021-06-20 05:58:59 | INFO | train_inner | epoch 004: 2156 / 3002 loss=2.625, ppl=6.17, wps=5839.8, ups=0.09, wpb=64743, bsz=128, num_updates=11097, lr=9.99192e-05, gnorm=2.067, loss_scale=16, train_wall=11, gb_free=2.8, wall=127214 2021-06-20 05:59:10 | INFO | train_inner | epoch 004: 2157 / 3002 loss=2.551, ppl=5.86, wps=5885.6, ups=0.09, wpb=64786, bsz=128, num_updates=11098, lr=9.99192e-05, gnorm=1.991, loss_scale=16, train_wall=11, gb_free=2.8, wall=127225 2021-06-20 05:59:21 | INFO | train_inner | epoch 004: 2158 / 3002 loss=2.594, ppl=6.04, wps=5882.5, ups=0.09, wpb=64897, bsz=128, num_updates=11099, lr=9.99192e-05, gnorm=2.089, loss_scale=16, train_wall=11, gb_free=2.8, wall=127236 2021-06-20 05:59:32 | INFO | train_inner | epoch 004: 2159 / 3002 loss=2.423, ppl=5.36, wps=5918, ups=0.09, wpb=64825, bsz=128, num_updates=11100, lr=9.99192e-05, gnorm=1.987, loss_scale=16, train_wall=11, gb_free=2.8, wall=127247 2021-06-20 05:59:43 | INFO | train_inner | epoch 004: 2160 / 3002 loss=2.597, ppl=6.05, wps=5886, ups=0.09, wpb=64850, bsz=128, num_updates=11101, lr=9.99192e-05, gnorm=2.145, loss_scale=16, train_wall=11, gb_free=2.8, wall=127258 2021-06-20 05:59:54 | INFO | train_inner | epoch 004: 2161 / 3002 loss=2.659, ppl=6.32, wps=5837.2, ups=0.09, wpb=64868, bsz=128, num_updates=11102, lr=9.99192e-05, gnorm=2.304, loss_scale=16, train_wall=11, gb_free=2.8, wall=127269 2021-06-20 06:00:05 | INFO | train_inner | epoch 004: 2162 / 3002 loss=2.486, ppl=5.6, wps=5869.4, ups=0.09, wpb=64776, bsz=128, num_updates=11103, lr=9.99192e-05, gnorm=2.022, loss_scale=16, train_wall=11, gb_free=2.8, wall=127280 2021-06-20 06:00:16 | INFO | train_inner | epoch 004: 2163 / 3002 loss=2.782, ppl=6.88, wps=5822.5, ups=0.09, wpb=64780, bsz=128, num_updates=11104, lr=9.99192e-05, gnorm=2.075, loss_scale=16, train_wall=11, gb_free=2.8, wall=127291 2021-06-20 06:00:28 | INFO | train_inner | epoch 004: 2164 / 3002 loss=2.603, ppl=6.08, wps=5752.8, ups=0.09, wpb=64828, bsz=128, num_updates=11105, lr=9.99192e-05, gnorm=2.155, loss_scale=16, train_wall=11, gb_free=2.8, wall=127302 2021-06-20 06:00:39 | INFO | train_inner | epoch 004: 2165 / 3002 loss=2.416, ppl=5.34, wps=5959.6, ups=0.09, wpb=64898, bsz=128, num_updates=11106, lr=9.99191e-05, gnorm=1.97, loss_scale=16, train_wall=10, gb_free=2.8, wall=127313 2021-06-20 06:00:50 | INFO | train_inner | epoch 004: 2166 / 3002 loss=2.66, ppl=6.32, wps=5723.3, ups=0.09, wpb=64820, bsz=128, num_updates=11107, lr=9.99191e-05, gnorm=2.001, loss_scale=16, train_wall=11, gb_free=2.8, wall=127324 2021-06-20 06:01:01 | INFO | train_inner | epoch 004: 2167 / 3002 loss=2.697, ppl=6.49, wps=5819.1, ups=0.09, wpb=64830, bsz=128, num_updates=11108, lr=9.99191e-05, gnorm=2.056, loss_scale=16, train_wall=11, gb_free=2.8, wall=127335 2021-06-20 06:01:12 | INFO | train_inner | epoch 004: 2168 / 3002 loss=2.542, ppl=5.83, wps=5820.1, ups=0.09, wpb=64731, bsz=128, num_updates=11109, lr=9.99191e-05, gnorm=2.108, loss_scale=16, train_wall=11, gb_free=2.8, wall=127347 2021-06-20 06:01:23 | INFO | train_inner | epoch 004: 2169 / 3002 loss=2.482, ppl=5.59, wps=5892.7, ups=0.09, wpb=64912, bsz=128, num_updates=11110, lr=9.99191e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=127358 2021-06-20 06:01:34 | INFO | train_inner | epoch 004: 2170 / 3002 loss=2.497, ppl=5.65, wps=5793.8, ups=0.09, wpb=64875, bsz=128, num_updates=11111, lr=9.99191e-05, gnorm=1.968, loss_scale=16, train_wall=11, gb_free=2.8, wall=127369 2021-06-20 06:01:46 | INFO | train_inner | epoch 004: 2171 / 3002 loss=2.709, ppl=6.54, wps=5854.9, ups=0.09, wpb=64791, bsz=128, num_updates=11112, lr=9.99191e-05, gnorm=2.048, loss_scale=16, train_wall=11, gb_free=2.8, wall=127380 2021-06-20 06:01:57 | INFO | train_inner | epoch 004: 2172 / 3002 loss=2.628, ppl=6.18, wps=5892.9, ups=0.09, wpb=64802, bsz=128, num_updates=11113, lr=9.99191e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=127391 2021-06-20 06:02:07 | INFO | train_inner | epoch 004: 2173 / 3002 loss=2.588, ppl=6.01, wps=5987.6, ups=0.09, wpb=64812, bsz=128, num_updates=11114, lr=9.99191e-05, gnorm=2.189, loss_scale=16, train_wall=10, gb_free=2.8, wall=127402 2021-06-20 06:02:19 | INFO | train_inner | epoch 004: 2174 / 3002 loss=2.567, ppl=5.93, wps=5776.3, ups=0.09, wpb=64762, bsz=128, num_updates=11115, lr=9.99191e-05, gnorm=1.982, loss_scale=16, train_wall=11, gb_free=2.8, wall=127413 2021-06-20 06:02:30 | INFO | train_inner | epoch 004: 2175 / 3002 loss=2.629, ppl=6.18, wps=5801.6, ups=0.09, wpb=64873, bsz=128, num_updates=11116, lr=9.99191e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=127424 2021-06-20 06:02:41 | INFO | train_inner | epoch 004: 2176 / 3002 loss=2.496, ppl=5.64, wps=5779, ups=0.09, wpb=64906, bsz=128, num_updates=11117, lr=9.99191e-05, gnorm=2.02, loss_scale=32, train_wall=11, gb_free=2.8, wall=127435 2021-06-20 06:02:52 | INFO | train_inner | epoch 004: 2177 / 3002 loss=2.736, ppl=6.66, wps=5909.8, ups=0.09, wpb=64759, bsz=128, num_updates=11118, lr=9.9919e-05, gnorm=2.014, loss_scale=32, train_wall=11, gb_free=2.8, wall=127446 2021-06-20 06:03:03 | INFO | train_inner | epoch 004: 2178 / 3002 loss=2.561, ppl=5.9, wps=5905, ups=0.09, wpb=64851, bsz=128, num_updates=11119, lr=9.9919e-05, gnorm=2.007, loss_scale=32, train_wall=11, gb_free=2.8, wall=127457 2021-06-20 06:03:14 | INFO | train_inner | epoch 004: 2179 / 3002 loss=2.431, ppl=5.39, wps=5783.9, ups=0.09, wpb=64844, bsz=128, num_updates=11120, lr=9.9919e-05, gnorm=2.162, loss_scale=32, train_wall=11, gb_free=2.8, wall=127468 2021-06-20 06:03:25 | INFO | train_inner | epoch 004: 2180 / 3002 loss=2.643, ppl=6.25, wps=5867.6, ups=0.09, wpb=64847, bsz=128, num_updates=11121, lr=9.9919e-05, gnorm=2.119, loss_scale=32, train_wall=11, gb_free=2.8, wall=127479 2021-06-20 06:03:36 | INFO | train_inner | epoch 004: 2181 / 3002 loss=2.653, ppl=6.29, wps=5768, ups=0.09, wpb=64794, bsz=128, num_updates=11122, lr=9.9919e-05, gnorm=2.078, loss_scale=32, train_wall=11, gb_free=2.8, wall=127491 2021-06-20 06:03:48 | INFO | train_inner | epoch 004: 2182 / 3002 loss=2.496, ppl=5.64, wps=5776, ups=0.09, wpb=64897, bsz=128, num_updates=11123, lr=9.9919e-05, gnorm=2.047, loss_scale=32, train_wall=11, gb_free=2.8, wall=127502 2021-06-20 06:03:59 | INFO | train_inner | epoch 004: 2183 / 3002 loss=2.665, ppl=6.34, wps=5884.9, ups=0.09, wpb=64856, bsz=128, num_updates=11124, lr=9.9919e-05, gnorm=2.075, loss_scale=32, train_wall=11, gb_free=2.8, wall=127513 2021-06-20 06:04:10 | INFO | train_inner | epoch 004: 2184 / 3002 loss=2.588, ppl=6.01, wps=5918.2, ups=0.09, wpb=64784, bsz=128, num_updates=11125, lr=9.9919e-05, gnorm=1.994, loss_scale=32, train_wall=10, gb_free=2.8, wall=127524 2021-06-20 06:04:21 | INFO | train_inner | epoch 004: 2185 / 3002 loss=2.568, ppl=5.93, wps=5880.2, ups=0.09, wpb=64855, bsz=128, num_updates=11126, lr=9.9919e-05, gnorm=2, loss_scale=32, train_wall=11, gb_free=2.8, wall=127535 2021-06-20 06:04:32 | INFO | train_inner | epoch 004: 2186 / 3002 loss=2.633, ppl=6.2, wps=5727.4, ups=0.09, wpb=64830, bsz=128, num_updates=11127, lr=9.9919e-05, gnorm=2.014, loss_scale=32, train_wall=11, gb_free=2.8, wall=127546 2021-06-20 06:04:43 | INFO | train_inner | epoch 004: 2187 / 3002 loss=2.591, ppl=6.03, wps=5865.1, ups=0.09, wpb=64851, bsz=128, num_updates=11128, lr=9.9919e-05, gnorm=2.096, loss_scale=32, train_wall=11, gb_free=2.8, wall=127557 2021-06-20 06:04:54 | INFO | train_inner | epoch 004: 2188 / 3002 loss=2.51, ppl=5.7, wps=5951, ups=0.09, wpb=64846, bsz=128, num_updates=11129, lr=9.9919e-05, gnorm=1.974, loss_scale=32, train_wall=10, gb_free=2.8, wall=127568 2021-06-20 06:05:05 | INFO | train_inner | epoch 004: 2189 / 3002 loss=2.576, ppl=5.96, wps=5793.2, ups=0.09, wpb=64828, bsz=128, num_updates=11130, lr=9.9919e-05, gnorm=2.11, loss_scale=32, train_wall=11, gb_free=2.8, wall=127579 2021-06-20 06:05:16 | INFO | train_inner | epoch 004: 2190 / 3002 loss=2.514, ppl=5.71, wps=5802.4, ups=0.09, wpb=64834, bsz=128, num_updates=11131, lr=9.99189e-05, gnorm=2.256, loss_scale=32, train_wall=11, gb_free=2.8, wall=127591 2021-06-20 06:05:27 | INFO | train_inner | epoch 004: 2191 / 3002 loss=2.618, ppl=6.14, wps=5806, ups=0.09, wpb=64798, bsz=128, num_updates=11132, lr=9.99189e-05, gnorm=2.75, loss_scale=32, train_wall=11, gb_free=2.8, wall=127602 2021-06-20 06:05:38 | INFO | train_inner | epoch 004: 2192 / 3002 loss=2.543, ppl=5.83, wps=5863.5, ups=0.09, wpb=64819, bsz=128, num_updates=11133, lr=9.99189e-05, gnorm=2.11, loss_scale=32, train_wall=11, gb_free=2.8, wall=127613 2021-06-20 06:05:49 | INFO | train_inner | epoch 004: 2193 / 3002 loss=2.469, ppl=5.54, wps=5917.1, ups=0.09, wpb=64848, bsz=128, num_updates=11134, lr=9.99189e-05, gnorm=2.01, loss_scale=32, train_wall=11, gb_free=2.8, wall=127624 2021-06-20 06:06:01 | INFO | train_inner | epoch 004: 2194 / 3002 loss=2.53, ppl=5.78, wps=5826.5, ups=0.09, wpb=64779, bsz=128, num_updates=11135, lr=9.99189e-05, gnorm=2.14, loss_scale=32, train_wall=11, gb_free=2.8, wall=127635 2021-06-20 06:06:12 | INFO | train_inner | epoch 004: 2195 / 3002 loss=2.62, ppl=6.15, wps=5839.6, ups=0.09, wpb=64747, bsz=128, num_updates=11136, lr=9.99189e-05, gnorm=1.991, loss_scale=32, train_wall=11, gb_free=2.8, wall=127646 2021-06-20 06:06:23 | INFO | train_inner | epoch 004: 2196 / 3002 loss=2.543, ppl=5.83, wps=5835.5, ups=0.09, wpb=64799, bsz=128, num_updates=11137, lr=9.99189e-05, gnorm=1.998, loss_scale=32, train_wall=11, gb_free=2.8, wall=127657 2021-06-20 06:06:34 | INFO | train_inner | epoch 004: 2197 / 3002 loss=2.594, ppl=6.04, wps=5898.7, ups=0.09, wpb=64799, bsz=128, num_updates=11138, lr=9.99189e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=127668 2021-06-20 06:06:45 | INFO | train_inner | epoch 004: 2198 / 3002 loss=2.604, ppl=6.08, wps=5862.5, ups=0.09, wpb=64736, bsz=128, num_updates=11139, lr=9.99189e-05, gnorm=2.013, loss_scale=32, train_wall=11, gb_free=2.8, wall=127679 2021-06-20 06:06:56 | INFO | train_inner | epoch 004: 2199 / 3002 loss=2.543, ppl=5.83, wps=5866.5, ups=0.09, wpb=64822, bsz=128, num_updates=11140, lr=9.99189e-05, gnorm=1.972, loss_scale=32, train_wall=11, gb_free=2.8, wall=127690 2021-06-20 06:07:07 | INFO | train_inner | epoch 004: 2200 / 3002 loss=2.525, ppl=5.76, wps=5847.9, ups=0.09, wpb=64827, bsz=128, num_updates=11141, lr=9.99189e-05, gnorm=2.019, loss_scale=32, train_wall=11, gb_free=2.8, wall=127701 2021-06-20 06:07:18 | INFO | train_inner | epoch 004: 2201 / 3002 loss=2.507, ppl=5.69, wps=5915, ups=0.09, wpb=64829, bsz=128, num_updates=11142, lr=9.99189e-05, gnorm=2.069, loss_scale=32, train_wall=10, gb_free=2.8, wall=127712 2021-06-20 06:07:29 | INFO | train_inner | epoch 004: 2202 / 3002 loss=2.623, ppl=6.16, wps=5914.7, ups=0.09, wpb=64794, bsz=128, num_updates=11143, lr=9.99188e-05, gnorm=2.142, loss_scale=32, train_wall=10, gb_free=2.8, wall=127723 2021-06-20 06:07:40 | INFO | train_inner | epoch 004: 2203 / 3002 loss=2.38, ppl=5.2, wps=5895, ups=0.09, wpb=64818, bsz=128, num_updates=11144, lr=9.99188e-05, gnorm=2.356, loss_scale=32, train_wall=11, gb_free=2.8, wall=127734 2021-06-20 06:07:51 | INFO | train_inner | epoch 004: 2204 / 3002 loss=2.48, ppl=5.58, wps=5978.2, ups=0.09, wpb=64777, bsz=128, num_updates=11145, lr=9.99188e-05, gnorm=2.069, loss_scale=32, train_wall=10, gb_free=2.8, wall=127745 2021-06-20 06:08:02 | INFO | train_inner | epoch 004: 2205 / 3002 loss=2.534, ppl=5.79, wps=5806.4, ups=0.09, wpb=64828, bsz=128, num_updates=11146, lr=9.99188e-05, gnorm=2.243, loss_scale=32, train_wall=11, gb_free=2.8, wall=127756 2021-06-20 06:08:13 | INFO | train_inner | epoch 004: 2206 / 3002 loss=2.535, ppl=5.79, wps=5794.6, ups=0.09, wpb=64719, bsz=128, num_updates=11147, lr=9.99188e-05, gnorm=2.008, loss_scale=32, train_wall=11, gb_free=2.8, wall=127767 2021-06-20 06:08:24 | INFO | train_inner | epoch 004: 2207 / 3002 loss=2.579, ppl=5.98, wps=5800, ups=0.09, wpb=64837, bsz=128, num_updates=11148, lr=9.99188e-05, gnorm=2.115, loss_scale=32, train_wall=11, gb_free=2.8, wall=127779 2021-06-20 06:08:35 | INFO | train_inner | epoch 004: 2208 / 3002 loss=2.509, ppl=5.69, wps=5761, ups=0.09, wpb=64908, bsz=128, num_updates=11149, lr=9.99188e-05, gnorm=2.017, loss_scale=32, train_wall=11, gb_free=2.8, wall=127790 2021-06-20 06:08:47 | INFO | train_inner | epoch 004: 2209 / 3002 loss=2.608, ppl=6.1, wps=5779.3, ups=0.09, wpb=64854, bsz=128, num_updates=11150, lr=9.99188e-05, gnorm=2.073, loss_scale=32, train_wall=11, gb_free=2.8, wall=127801 2021-06-20 06:08:58 | INFO | train_inner | epoch 004: 2210 / 3002 loss=2.734, ppl=6.65, wps=5910.3, ups=0.09, wpb=64858, bsz=128, num_updates=11151, lr=9.99188e-05, gnorm=1.996, loss_scale=32, train_wall=11, gb_free=2.8, wall=127812 2021-06-20 06:09:09 | INFO | train_inner | epoch 004: 2211 / 3002 loss=2.543, ppl=5.83, wps=5759.5, ups=0.09, wpb=64793, bsz=128, num_updates=11152, lr=9.99188e-05, gnorm=1.999, loss_scale=32, train_wall=11, gb_free=2.8, wall=127823 2021-06-20 06:09:20 | INFO | train_inner | epoch 004: 2212 / 3002 loss=2.592, ppl=6.03, wps=5941.2, ups=0.09, wpb=64765, bsz=128, num_updates=11153, lr=9.99188e-05, gnorm=2.123, loss_scale=32, train_wall=10, gb_free=2.8, wall=127834 2021-06-20 06:09:31 | INFO | train_inner | epoch 004: 2213 / 3002 loss=2.524, ppl=5.75, wps=5884, ups=0.09, wpb=64839, bsz=128, num_updates=11154, lr=9.99188e-05, gnorm=2.185, loss_scale=32, train_wall=11, gb_free=2.8, wall=127845 2021-06-20 06:09:42 | INFO | train_inner | epoch 004: 2214 / 3002 loss=2.606, ppl=6.09, wps=5839, ups=0.09, wpb=64867, bsz=128, num_updates=11155, lr=9.99188e-05, gnorm=2.095, loss_scale=32, train_wall=11, gb_free=2.8, wall=127856 2021-06-20 06:09:53 | INFO | train_inner | epoch 004: 2215 / 3002 loss=2.513, ppl=5.71, wps=5767, ups=0.09, wpb=64881, bsz=128, num_updates=11156, lr=9.99187e-05, gnorm=2.101, loss_scale=32, train_wall=11, gb_free=2.8, wall=127868 2021-06-20 06:10:04 | INFO | train_inner | epoch 004: 2216 / 3002 loss=2.57, ppl=5.94, wps=5856.6, ups=0.09, wpb=64817, bsz=128, num_updates=11157, lr=9.99187e-05, gnorm=2.017, loss_scale=32, train_wall=11, gb_free=2.8, wall=127879 2021-06-20 06:10:15 | INFO | train_inner | epoch 004: 2217 / 3002 loss=2.611, ppl=6.11, wps=5819.8, ups=0.09, wpb=64852, bsz=128, num_updates=11158, lr=9.99187e-05, gnorm=2.152, loss_scale=32, train_wall=11, gb_free=2.8, wall=127890 2021-06-20 06:10:27 | INFO | train_inner | epoch 004: 2218 / 3002 loss=2.549, ppl=5.85, wps=5785.1, ups=0.09, wpb=64863, bsz=128, num_updates=11159, lr=9.99187e-05, gnorm=2.051, loss_scale=32, train_wall=11, gb_free=2.8, wall=127901 2021-06-20 06:10:38 | INFO | train_inner | epoch 004: 2219 / 3002 loss=2.729, ppl=6.63, wps=5800.9, ups=0.09, wpb=64782, bsz=128, num_updates=11160, lr=9.99187e-05, gnorm=2.007, loss_scale=32, train_wall=11, gb_free=2.8, wall=127912 2021-06-20 06:10:49 | INFO | train_inner | epoch 004: 2220 / 3002 loss=2.527, ppl=5.76, wps=5847.6, ups=0.09, wpb=64852, bsz=128, num_updates=11161, lr=9.99187e-05, gnorm=1.965, loss_scale=32, train_wall=11, gb_free=2.8, wall=127923 2021-06-20 06:11:00 | INFO | train_inner | epoch 004: 2221 / 3002 loss=2.7, ppl=6.5, wps=5811.6, ups=0.09, wpb=64849, bsz=128, num_updates=11162, lr=9.99187e-05, gnorm=2.032, loss_scale=32, train_wall=11, gb_free=2.8, wall=127934 2021-06-20 06:11:11 | INFO | train_inner | epoch 004: 2222 / 3002 loss=2.65, ppl=6.28, wps=5828.2, ups=0.09, wpb=64730, bsz=128, num_updates=11163, lr=9.99187e-05, gnorm=2.039, loss_scale=32, train_wall=11, gb_free=2.8, wall=127945 2021-06-20 06:11:22 | INFO | train_inner | epoch 004: 2223 / 3002 loss=2.504, ppl=5.67, wps=5804.8, ups=0.09, wpb=64801, bsz=128, num_updates=11164, lr=9.99187e-05, gnorm=1.934, loss_scale=32, train_wall=11, gb_free=2.8, wall=127957 2021-06-20 06:11:33 | INFO | train_inner | epoch 004: 2224 / 3002 loss=2.467, ppl=5.53, wps=5901.7, ups=0.09, wpb=64893, bsz=128, num_updates=11165, lr=9.99187e-05, gnorm=2.12, loss_scale=32, train_wall=11, gb_free=2.8, wall=127968 2021-06-20 06:11:44 | INFO | train_inner | epoch 004: 2225 / 3002 loss=2.588, ppl=6.01, wps=5817.4, ups=0.09, wpb=64819, bsz=128, num_updates=11166, lr=9.99187e-05, gnorm=2.037, loss_scale=32, train_wall=11, gb_free=2.8, wall=127979 2021-06-20 06:11:56 | INFO | train_inner | epoch 004: 2226 / 3002 loss=2.52, ppl=5.73, wps=5783.1, ups=0.09, wpb=64833, bsz=128, num_updates=11167, lr=9.99187e-05, gnorm=1.977, loss_scale=32, train_wall=11, gb_free=2.8, wall=127990 2021-06-20 06:12:07 | INFO | train_inner | epoch 004: 2227 / 3002 loss=2.527, ppl=5.76, wps=5897.3, ups=0.09, wpb=64907, bsz=128, num_updates=11168, lr=9.99186e-05, gnorm=2.054, loss_scale=32, train_wall=11, gb_free=2.8, wall=128001 2021-06-20 06:12:18 | INFO | train_inner | epoch 004: 2228 / 3002 loss=2.57, ppl=5.94, wps=5737.9, ups=0.09, wpb=64861, bsz=128, num_updates=11169, lr=9.99186e-05, gnorm=2.132, loss_scale=32, train_wall=11, gb_free=2.8, wall=128012 2021-06-20 06:12:29 | INFO | train_inner | epoch 004: 2229 / 3002 loss=2.493, ppl=5.63, wps=5820.7, ups=0.09, wpb=64834, bsz=128, num_updates=11170, lr=9.99186e-05, gnorm=2.122, loss_scale=32, train_wall=11, gb_free=2.8, wall=128023 2021-06-20 06:12:40 | INFO | train_inner | epoch 004: 2230 / 3002 loss=2.5, ppl=5.66, wps=5921.5, ups=0.09, wpb=64768, bsz=128, num_updates=11171, lr=9.99186e-05, gnorm=1.937, loss_scale=32, train_wall=10, gb_free=2.8, wall=128034 2021-06-20 06:12:51 | INFO | train_inner | epoch 004: 2231 / 3002 loss=2.472, ppl=5.55, wps=5784.7, ups=0.09, wpb=64794, bsz=128, num_updates=11172, lr=9.99186e-05, gnorm=1.944, loss_scale=32, train_wall=11, gb_free=2.8, wall=128046 2021-06-20 06:13:02 | INFO | train_inner | epoch 004: 2232 / 3002 loss=2.587, ppl=6.01, wps=5888.5, ups=0.09, wpb=64884, bsz=128, num_updates=11173, lr=9.99186e-05, gnorm=1.921, loss_scale=32, train_wall=11, gb_free=2.8, wall=128057 2021-06-20 06:13:13 | INFO | train_inner | epoch 004: 2233 / 3002 loss=2.641, ppl=6.24, wps=5791.7, ups=0.09, wpb=64825, bsz=128, num_updates=11174, lr=9.99186e-05, gnorm=2.068, loss_scale=32, train_wall=11, gb_free=2.8, wall=128068 2021-06-20 06:13:24 | INFO | train_inner | epoch 004: 2234 / 3002 loss=2.679, ppl=6.4, wps=5882.9, ups=0.09, wpb=64818, bsz=128, num_updates=11175, lr=9.99186e-05, gnorm=2.054, loss_scale=32, train_wall=11, gb_free=2.8, wall=128079 2021-06-20 06:13:35 | INFO | train_inner | epoch 004: 2235 / 3002 loss=2.526, ppl=5.76, wps=5946, ups=0.09, wpb=64911, bsz=128, num_updates=11176, lr=9.99186e-05, gnorm=2.06, loss_scale=32, train_wall=10, gb_free=2.8, wall=128090 2021-06-20 06:13:46 | INFO | train_inner | epoch 004: 2236 / 3002 loss=2.7, ppl=6.5, wps=5955, ups=0.09, wpb=64865, bsz=128, num_updates=11177, lr=9.99186e-05, gnorm=1.984, loss_scale=32, train_wall=10, gb_free=2.8, wall=128101 2021-06-20 06:13:57 | INFO | train_inner | epoch 004: 2237 / 3002 loss=2.601, ppl=6.07, wps=5815.7, ups=0.09, wpb=64815, bsz=128, num_updates=11178, lr=9.99186e-05, gnorm=2.047, loss_scale=32, train_wall=11, gb_free=2.8, wall=128112 2021-06-20 06:14:09 | INFO | train_inner | epoch 004: 2238 / 3002 loss=2.434, ppl=5.4, wps=5835.8, ups=0.09, wpb=64834, bsz=128, num_updates=11179, lr=9.99186e-05, gnorm=1.956, loss_scale=32, train_wall=11, gb_free=2.8, wall=128123 2021-06-20 06:14:19 | INFO | train_inner | epoch 004: 2239 / 3002 loss=2.669, ppl=6.36, wps=5909.6, ups=0.09, wpb=64844, bsz=128, num_updates=11180, lr=9.99186e-05, gnorm=2.013, loss_scale=32, train_wall=11, gb_free=2.8, wall=128134 2021-06-20 06:14:30 | INFO | train_inner | epoch 004: 2240 / 3002 loss=2.542, ppl=5.83, wps=5996.9, ups=0.09, wpb=64826, bsz=128, num_updates=11181, lr=9.99185e-05, gnorm=2.024, loss_scale=32, train_wall=10, gb_free=2.8, wall=128145 2021-06-20 06:14:41 | INFO | train_inner | epoch 004: 2241 / 3002 loss=2.604, ppl=6.08, wps=5871.3, ups=0.09, wpb=64840, bsz=128, num_updates=11182, lr=9.99185e-05, gnorm=2.044, loss_scale=32, train_wall=11, gb_free=2.8, wall=128156 2021-06-20 06:14:53 | INFO | train_inner | epoch 004: 2242 / 3002 loss=2.584, ppl=6, wps=5804.1, ups=0.09, wpb=64820, bsz=128, num_updates=11183, lr=9.99185e-05, gnorm=2.038, loss_scale=32, train_wall=11, gb_free=2.8, wall=128167 2021-06-20 06:15:04 | INFO | train_inner | epoch 004: 2243 / 3002 loss=2.487, ppl=5.6, wps=5834.6, ups=0.09, wpb=64828, bsz=128, num_updates=11184, lr=9.99185e-05, gnorm=2, loss_scale=32, train_wall=11, gb_free=2.8, wall=128178 2021-06-20 06:15:15 | INFO | train_inner | epoch 004: 2244 / 3002 loss=2.667, ppl=6.35, wps=5879.3, ups=0.09, wpb=64961, bsz=128, num_updates=11185, lr=9.99185e-05, gnorm=2.064, loss_scale=32, train_wall=11, gb_free=2.8, wall=128189 2021-06-20 06:15:26 | INFO | train_inner | epoch 004: 2245 / 3002 loss=2.461, ppl=5.51, wps=5889.3, ups=0.09, wpb=64639, bsz=128, num_updates=11186, lr=9.99185e-05, gnorm=2.084, loss_scale=32, train_wall=10, gb_free=2.8, wall=128200 2021-06-20 06:15:37 | INFO | train_inner | epoch 004: 2246 / 3002 loss=2.638, ppl=6.23, wps=5865.4, ups=0.09, wpb=64843, bsz=128, num_updates=11187, lr=9.99185e-05, gnorm=2.12, loss_scale=32, train_wall=11, gb_free=2.8, wall=128211 2021-06-20 06:15:48 | INFO | train_inner | epoch 004: 2247 / 3002 loss=2.618, ppl=6.14, wps=5941.9, ups=0.09, wpb=64840, bsz=128, num_updates=11188, lr=9.99185e-05, gnorm=2, loss_scale=32, train_wall=10, gb_free=2.8, wall=128222 2021-06-20 06:15:59 | INFO | train_inner | epoch 004: 2248 / 3002 loss=2.604, ppl=6.08, wps=5911, ups=0.09, wpb=64776, bsz=128, num_updates=11189, lr=9.99185e-05, gnorm=1.971, loss_scale=32, train_wall=10, gb_free=2.8, wall=128233 2021-06-20 06:16:10 | INFO | train_inner | epoch 004: 2249 / 3002 loss=2.585, ppl=6, wps=5752.3, ups=0.09, wpb=64774, bsz=128, num_updates=11190, lr=9.99185e-05, gnorm=1.997, loss_scale=32, train_wall=11, gb_free=2.8, wall=128244 2021-06-20 06:16:21 | INFO | train_inner | epoch 004: 2250 / 3002 loss=2.686, ppl=6.44, wps=5828.1, ups=0.09, wpb=64795, bsz=128, num_updates=11191, lr=9.99185e-05, gnorm=2.064, loss_scale=32, train_wall=11, gb_free=2.8, wall=128255 2021-06-20 06:16:32 | INFO | train_inner | epoch 004: 2251 / 3002 loss=2.506, ppl=5.68, wps=5864.3, ups=0.09, wpb=64825, bsz=128, num_updates=11192, lr=9.99185e-05, gnorm=2.053, loss_scale=32, train_wall=11, gb_free=2.8, wall=128266 2021-06-20 06:16:43 | INFO | train_inner | epoch 004: 2252 / 3002 loss=2.471, ppl=5.54, wps=5881.9, ups=0.09, wpb=64824, bsz=128, num_updates=11193, lr=9.99184e-05, gnorm=1.956, loss_scale=32, train_wall=11, gb_free=2.8, wall=128277 2021-06-20 06:16:54 | INFO | train_inner | epoch 004: 2253 / 3002 loss=2.598, ppl=6.06, wps=5827.3, ups=0.09, wpb=64840, bsz=128, num_updates=11194, lr=9.99184e-05, gnorm=1.892, loss_scale=32, train_wall=11, gb_free=2.8, wall=128288 2021-06-20 06:17:05 | INFO | train_inner | epoch 004: 2254 / 3002 loss=2.43, ppl=5.39, wps=5848.7, ups=0.09, wpb=64858, bsz=128, num_updates=11195, lr=9.99184e-05, gnorm=2.039, loss_scale=32, train_wall=11, gb_free=2.8, wall=128300 2021-06-20 06:17:16 | INFO | train_inner | epoch 004: 2255 / 3002 loss=2.552, ppl=5.87, wps=5796, ups=0.09, wpb=64828, bsz=128, num_updates=11196, lr=9.99184e-05, gnorm=2.075, loss_scale=32, train_wall=11, gb_free=2.8, wall=128311 2021-06-20 06:17:28 | INFO | train_inner | epoch 004: 2256 / 3002 loss=2.515, ppl=5.72, wps=5856.9, ups=0.09, wpb=64862, bsz=128, num_updates=11197, lr=9.99184e-05, gnorm=2.053, loss_scale=32, train_wall=11, gb_free=2.8, wall=128322 2021-06-20 06:17:39 | INFO | train_inner | epoch 004: 2257 / 3002 loss=2.612, ppl=6.12, wps=5777.9, ups=0.09, wpb=64764, bsz=128, num_updates=11198, lr=9.99184e-05, gnorm=2.045, loss_scale=32, train_wall=11, gb_free=2.8, wall=128333 2021-06-20 06:17:50 | INFO | train_inner | epoch 004: 2258 / 3002 loss=2.506, ppl=5.68, wps=5928, ups=0.09, wpb=64928, bsz=128, num_updates=11199, lr=9.99184e-05, gnorm=1.989, loss_scale=32, train_wall=10, gb_free=2.8, wall=128344 2021-06-20 06:18:01 | INFO | train_inner | epoch 004: 2259 / 3002 loss=2.495, ppl=5.64, wps=5812.5, ups=0.09, wpb=64848, bsz=128, num_updates=11200, lr=9.99184e-05, gnorm=2.084, loss_scale=32, train_wall=11, gb_free=2.8, wall=128355 2021-06-20 06:18:12 | INFO | train_inner | epoch 004: 2260 / 3002 loss=2.58, ppl=5.98, wps=5877.7, ups=0.09, wpb=64808, bsz=128, num_updates=11201, lr=9.99184e-05, gnorm=2.011, loss_scale=32, train_wall=11, gb_free=2.8, wall=128366 2021-06-20 06:18:23 | INFO | train_inner | epoch 004: 2261 / 3002 loss=2.576, ppl=5.96, wps=5859.2, ups=0.09, wpb=64771, bsz=128, num_updates=11202, lr=9.99184e-05, gnorm=1.948, loss_scale=32, train_wall=11, gb_free=2.8, wall=128377 2021-06-20 06:18:34 | INFO | train_inner | epoch 004: 2262 / 3002 loss=2.842, ppl=7.17, wps=5788.2, ups=0.09, wpb=64852, bsz=128, num_updates=11203, lr=9.99184e-05, gnorm=2.102, loss_scale=32, train_wall=11, gb_free=2.8, wall=128388 2021-06-20 06:18:45 | INFO | train_inner | epoch 004: 2263 / 3002 loss=2.609, ppl=6.1, wps=5877.5, ups=0.09, wpb=64881, bsz=128, num_updates=11204, lr=9.99184e-05, gnorm=2.013, loss_scale=32, train_wall=11, gb_free=2.8, wall=128399 2021-06-20 06:18:56 | INFO | train_inner | epoch 004: 2264 / 3002 loss=2.464, ppl=5.52, wps=5745.1, ups=0.09, wpb=64766, bsz=128, num_updates=11205, lr=9.99184e-05, gnorm=2.019, loss_scale=32, train_wall=11, gb_free=2.8, wall=128411 2021-06-20 06:19:08 | INFO | train_inner | epoch 004: 2265 / 3002 loss=2.522, ppl=5.74, wps=5757.6, ups=0.09, wpb=64700, bsz=128, num_updates=11206, lr=9.99183e-05, gnorm=1.953, loss_scale=32, train_wall=11, gb_free=2.8, wall=128422 2021-06-20 06:19:19 | INFO | train_inner | epoch 004: 2266 / 3002 loss=2.579, ppl=5.97, wps=5789.2, ups=0.09, wpb=64783, bsz=128, num_updates=11207, lr=9.99183e-05, gnorm=2.018, loss_scale=32, train_wall=11, gb_free=2.8, wall=128433 2021-06-20 06:19:30 | INFO | train_inner | epoch 004: 2267 / 3002 loss=2.653, ppl=6.29, wps=5867.1, ups=0.09, wpb=64821, bsz=128, num_updates=11208, lr=9.99183e-05, gnorm=2.086, loss_scale=32, train_wall=11, gb_free=2.8, wall=128444 2021-06-20 06:19:41 | INFO | train_inner | epoch 004: 2268 / 3002 loss=2.513, ppl=5.71, wps=5811.7, ups=0.09, wpb=64854, bsz=128, num_updates=11209, lr=9.99183e-05, gnorm=2.493, loss_scale=32, train_wall=11, gb_free=2.8, wall=128455 2021-06-20 06:19:52 | INFO | train_inner | epoch 004: 2269 / 3002 loss=2.529, ppl=5.77, wps=5876.6, ups=0.09, wpb=64763, bsz=128, num_updates=11210, lr=9.99183e-05, gnorm=1.972, loss_scale=32, train_wall=11, gb_free=2.8, wall=128466 2021-06-20 06:20:03 | INFO | train_inner | epoch 004: 2270 / 3002 loss=2.615, ppl=6.13, wps=5800.6, ups=0.09, wpb=64858, bsz=128, num_updates=11211, lr=9.99183e-05, gnorm=1.932, loss_scale=32, train_wall=11, gb_free=2.8, wall=128478 2021-06-20 06:20:14 | INFO | train_inner | epoch 004: 2271 / 3002 loss=2.608, ppl=6.1, wps=5848.5, ups=0.09, wpb=64868, bsz=128, num_updates=11212, lr=9.99183e-05, gnorm=2.023, loss_scale=32, train_wall=11, gb_free=2.8, wall=128489 2021-06-20 06:20:25 | INFO | train_inner | epoch 004: 2272 / 3002 loss=2.6, ppl=6.06, wps=5934.5, ups=0.09, wpb=64915, bsz=128, num_updates=11213, lr=9.99183e-05, gnorm=2.025, loss_scale=32, train_wall=10, gb_free=2.8, wall=128500 2021-06-20 06:20:37 | INFO | train_inner | epoch 004: 2273 / 3002 loss=2.609, ppl=6.1, wps=5706.2, ups=0.09, wpb=64785, bsz=128, num_updates=11214, lr=9.99183e-05, gnorm=1.996, loss_scale=32, train_wall=11, gb_free=2.8, wall=128511 2021-06-20 06:20:48 | INFO | train_inner | epoch 004: 2274 / 3002 loss=2.651, ppl=6.28, wps=5842, ups=0.09, wpb=64798, bsz=128, num_updates=11215, lr=9.99183e-05, gnorm=2.023, loss_scale=32, train_wall=11, gb_free=2.8, wall=128522 2021-06-20 06:20:59 | INFO | train_inner | epoch 004: 2275 / 3002 loss=2.544, ppl=5.83, wps=5969.8, ups=0.09, wpb=64841, bsz=128, num_updates=11216, lr=9.99183e-05, gnorm=1.979, loss_scale=32, train_wall=10, gb_free=2.8, wall=128533 2021-06-20 06:21:10 | INFO | train_inner | epoch 004: 2276 / 3002 loss=2.488, ppl=5.61, wps=5786.2, ups=0.09, wpb=64896, bsz=128, num_updates=11217, lr=9.99183e-05, gnorm=2.181, loss_scale=32, train_wall=11, gb_free=2.8, wall=128544 2021-06-20 06:21:21 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 2021-06-20 06:21:32 | INFO | train_inner | epoch 004: 2278 / 3002 loss=2.547, ppl=5.84, wps=2932.2, ups=0.05, wpb=64832, bsz=128, num_updates=11218, lr=9.99182e-05, gnorm=2.006, loss_scale=16, train_wall=21, gb_free=2.8, wall=128566 2021-06-20 06:21:43 | INFO | train_inner | epoch 004: 2279 / 3002 loss=2.597, ppl=6.05, wps=5823.2, ups=0.09, wpb=64778, bsz=128, num_updates=11219, lr=9.99182e-05, gnorm=2.102, loss_scale=16, train_wall=11, gb_free=2.8, wall=128577 2021-06-20 06:21:54 | INFO | train_inner | epoch 004: 2280 / 3002 loss=2.672, ppl=6.37, wps=5845.8, ups=0.09, wpb=64820, bsz=128, num_updates=11220, lr=9.99182e-05, gnorm=2.041, loss_scale=16, train_wall=11, gb_free=2.8, wall=128588 2021-06-20 06:22:05 | INFO | train_inner | epoch 004: 2281 / 3002 loss=2.465, ppl=5.52, wps=5974, ups=0.09, wpb=64782, bsz=128, num_updates=11221, lr=9.99182e-05, gnorm=1.931, loss_scale=16, train_wall=10, gb_free=2.8, wall=128599 2021-06-20 06:22:16 | INFO | train_inner | epoch 004: 2282 / 3002 loss=2.553, ppl=5.87, wps=5858.5, ups=0.09, wpb=64869, bsz=128, num_updates=11222, lr=9.99182e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=128610 2021-06-20 06:22:27 | INFO | train_inner | epoch 004: 2283 / 3002 loss=2.466, ppl=5.52, wps=5739.7, ups=0.09, wpb=64892, bsz=128, num_updates=11223, lr=9.99182e-05, gnorm=2.012, loss_scale=16, train_wall=11, gb_free=2.8, wall=128622 2021-06-20 06:22:38 | INFO | train_inner | epoch 004: 2284 / 3002 loss=2.532, ppl=5.78, wps=5855.8, ups=0.09, wpb=64917, bsz=128, num_updates=11224, lr=9.99182e-05, gnorm=1.979, loss_scale=16, train_wall=11, gb_free=2.8, wall=128633 2021-06-20 06:22:50 | INFO | train_inner | epoch 004: 2285 / 3002 loss=2.653, ppl=6.29, wps=5762.1, ups=0.09, wpb=64893, bsz=128, num_updates=11225, lr=9.99182e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=128644 2021-06-20 06:23:01 | INFO | train_inner | epoch 004: 2286 / 3002 loss=2.401, ppl=5.28, wps=5773.7, ups=0.09, wpb=64808, bsz=128, num_updates=11226, lr=9.99182e-05, gnorm=2.083, loss_scale=16, train_wall=11, gb_free=2.8, wall=128655 2021-06-20 06:23:12 | INFO | train_inner | epoch 004: 2287 / 3002 loss=2.521, ppl=5.74, wps=5886.9, ups=0.09, wpb=64868, bsz=128, num_updates=11227, lr=9.99182e-05, gnorm=2.081, loss_scale=16, train_wall=11, gb_free=2.8, wall=128666 2021-06-20 06:23:23 | INFO | train_inner | epoch 004: 2288 / 3002 loss=2.616, ppl=6.13, wps=5795.1, ups=0.09, wpb=64834, bsz=128, num_updates=11228, lr=9.99182e-05, gnorm=2.063, loss_scale=16, train_wall=11, gb_free=2.8, wall=128677 2021-06-20 06:23:34 | INFO | train_inner | epoch 004: 2289 / 3002 loss=2.556, ppl=5.88, wps=5851.8, ups=0.09, wpb=64790, bsz=128, num_updates=11229, lr=9.99182e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=128689 2021-06-20 06:23:45 | INFO | train_inner | epoch 004: 2290 / 3002 loss=2.451, ppl=5.47, wps=5811.5, ups=0.09, wpb=64882, bsz=128, num_updates=11230, lr=9.99182e-05, gnorm=2.035, loss_scale=16, train_wall=11, gb_free=2.8, wall=128700 2021-06-20 06:23:56 | INFO | train_inner | epoch 004: 2291 / 3002 loss=2.455, ppl=5.48, wps=5876, ups=0.09, wpb=64877, bsz=128, num_updates=11231, lr=9.99181e-05, gnorm=2.028, loss_scale=16, train_wall=11, gb_free=2.8, wall=128711 2021-06-20 06:24:08 | INFO | train_inner | epoch 004: 2292 / 3002 loss=2.508, ppl=5.69, wps=5837.2, ups=0.09, wpb=64731, bsz=128, num_updates=11232, lr=9.99181e-05, gnorm=2.004, loss_scale=16, train_wall=11, gb_free=2.8, wall=128722 2021-06-20 06:24:18 | INFO | train_inner | epoch 004: 2293 / 3002 loss=2.657, ppl=6.31, wps=5942.7, ups=0.09, wpb=64803, bsz=128, num_updates=11233, lr=9.99181e-05, gnorm=2.103, loss_scale=16, train_wall=10, gb_free=2.8, wall=128733 2021-06-20 06:24:29 | INFO | train_inner | epoch 004: 2294 / 3002 loss=2.512, ppl=5.71, wps=5858.1, ups=0.09, wpb=64873, bsz=128, num_updates=11234, lr=9.99181e-05, gnorm=2.061, loss_scale=16, train_wall=11, gb_free=2.8, wall=128744 2021-06-20 06:24:41 | INFO | train_inner | epoch 004: 2295 / 3002 loss=2.654, ppl=6.3, wps=5811.2, ups=0.09, wpb=64797, bsz=128, num_updates=11235, lr=9.99181e-05, gnorm=2.125, loss_scale=16, train_wall=11, gb_free=2.8, wall=128755 2021-06-20 06:24:52 | INFO | train_inner | epoch 004: 2296 / 3002 loss=2.513, ppl=5.71, wps=5711, ups=0.09, wpb=64877, bsz=128, num_updates=11236, lr=9.99181e-05, gnorm=2.016, loss_scale=16, train_wall=11, gb_free=2.8, wall=128766 2021-06-20 06:25:03 | INFO | train_inner | epoch 004: 2297 / 3002 loss=2.575, ppl=5.96, wps=5770.4, ups=0.09, wpb=64822, bsz=128, num_updates=11237, lr=9.99181e-05, gnorm=2.017, loss_scale=16, train_wall=11, gb_free=2.8, wall=128778 2021-06-20 06:25:14 | INFO | train_inner | epoch 004: 2298 / 3002 loss=2.504, ppl=5.67, wps=5768.9, ups=0.09, wpb=64814, bsz=128, num_updates=11238, lr=9.99181e-05, gnorm=2.046, loss_scale=16, train_wall=11, gb_free=2.8, wall=128789 2021-06-20 06:25:26 | INFO | train_inner | epoch 004: 2299 / 3002 loss=2.677, ppl=6.4, wps=5873.6, ups=0.09, wpb=64785, bsz=128, num_updates=11239, lr=9.99181e-05, gnorm=2.121, loss_scale=16, train_wall=11, gb_free=2.8, wall=128800 2021-06-20 06:25:37 | INFO | train_inner | epoch 004: 2300 / 3002 loss=2.376, ppl=5.19, wps=5879.1, ups=0.09, wpb=64828, bsz=128, num_updates=11240, lr=9.99181e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=128811 2021-06-20 06:25:48 | INFO | train_inner | epoch 004: 2301 / 3002 loss=2.544, ppl=5.83, wps=5871, ups=0.09, wpb=64740, bsz=128, num_updates=11241, lr=9.99181e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=128822 2021-06-20 06:25:58 | INFO | train_inner | epoch 004: 2302 / 3002 loss=2.618, ppl=6.14, wps=5958.4, ups=0.09, wpb=64836, bsz=128, num_updates=11242, lr=9.99181e-05, gnorm=2.101, loss_scale=16, train_wall=10, gb_free=2.8, wall=128833 2021-06-20 06:26:10 | INFO | train_inner | epoch 004: 2303 / 3002 loss=2.514, ppl=5.71, wps=5793.1, ups=0.09, wpb=64765, bsz=128, num_updates=11243, lr=9.9918e-05, gnorm=2.168, loss_scale=16, train_wall=11, gb_free=2.8, wall=128844 2021-06-20 06:26:21 | INFO | train_inner | epoch 004: 2304 / 3002 loss=2.532, ppl=5.78, wps=5883.6, ups=0.09, wpb=64803, bsz=128, num_updates=11244, lr=9.9918e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=128855 2021-06-20 06:26:32 | INFO | train_inner | epoch 004: 2305 / 3002 loss=2.464, ppl=5.52, wps=5830.1, ups=0.09, wpb=64804, bsz=128, num_updates=11245, lr=9.9918e-05, gnorm=2, loss_scale=16, train_wall=11, gb_free=2.8, wall=128866 2021-06-20 06:26:43 | INFO | train_inner | epoch 004: 2306 / 3002 loss=2.591, ppl=6.03, wps=5831.8, ups=0.09, wpb=64796, bsz=128, num_updates=11246, lr=9.9918e-05, gnorm=1.904, loss_scale=16, train_wall=11, gb_free=2.8, wall=128877 2021-06-20 06:26:54 | INFO | train_inner | epoch 004: 2307 / 3002 loss=2.344, ppl=5.08, wps=5869.7, ups=0.09, wpb=64780, bsz=128, num_updates=11247, lr=9.9918e-05, gnorm=1.982, loss_scale=16, train_wall=11, gb_free=2.8, wall=128888 2021-06-20 06:27:05 | INFO | train_inner | epoch 004: 2308 / 3002 loss=2.51, ppl=5.7, wps=5886.8, ups=0.09, wpb=64821, bsz=128, num_updates=11248, lr=9.9918e-05, gnorm=1.896, loss_scale=16, train_wall=11, gb_free=2.8, wall=128899 2021-06-20 06:27:16 | INFO | train_inner | epoch 004: 2309 / 3002 loss=2.495, ppl=5.64, wps=5899.3, ups=0.09, wpb=64850, bsz=128, num_updates=11249, lr=9.9918e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=128910 2021-06-20 06:27:27 | INFO | train_inner | epoch 004: 2310 / 3002 loss=2.688, ppl=6.44, wps=5888.3, ups=0.09, wpb=64815, bsz=128, num_updates=11250, lr=9.9918e-05, gnorm=2.009, loss_scale=16, train_wall=11, gb_free=2.8, wall=128921 2021-06-20 06:27:38 | INFO | train_inner | epoch 004: 2311 / 3002 loss=2.54, ppl=5.82, wps=5830.6, ups=0.09, wpb=64712, bsz=128, num_updates=11251, lr=9.9918e-05, gnorm=2.095, loss_scale=16, train_wall=11, gb_free=2.8, wall=128932 2021-06-20 06:27:49 | INFO | train_inner | epoch 004: 2312 / 3002 loss=2.522, ppl=5.74, wps=5845.8, ups=0.09, wpb=64832, bsz=128, num_updates=11252, lr=9.9918e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=128943 2021-06-20 06:28:00 | INFO | train_inner | epoch 004: 2313 / 3002 loss=2.509, ppl=5.69, wps=5858.6, ups=0.09, wpb=64838, bsz=128, num_updates=11253, lr=9.9918e-05, gnorm=2.031, loss_scale=16, train_wall=11, gb_free=2.8, wall=128955 2021-06-20 06:28:11 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 06:28:22 | INFO | train_inner | epoch 004: 2315 / 3002 loss=2.56, ppl=5.9, wps=2943.8, ups=0.05, wpb=64822, bsz=128, num_updates=11254, lr=9.9918e-05, gnorm=2.036, loss_scale=8, train_wall=21, gb_free=2.8, wall=128977 2021-06-20 06:28:34 | INFO | train_inner | epoch 004: 2316 / 3002 loss=2.504, ppl=5.67, wps=5735.6, ups=0.09, wpb=64913, bsz=128, num_updates=11255, lr=9.9918e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=128988 2021-06-20 06:28:45 | INFO | train_inner | epoch 004: 2317 / 3002 loss=2.575, ppl=5.96, wps=5795.3, ups=0.09, wpb=64793, bsz=128, num_updates=11256, lr=9.99179e-05, gnorm=2.023, loss_scale=8, train_wall=11, gb_free=2.8, wall=128999 2021-06-20 06:28:56 | INFO | train_inner | epoch 004: 2318 / 3002 loss=2.473, ppl=5.55, wps=5808.2, ups=0.09, wpb=64765, bsz=128, num_updates=11257, lr=9.99179e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=129010 2021-06-20 06:29:07 | INFO | train_inner | epoch 004: 2319 / 3002 loss=2.543, ppl=5.83, wps=5822.2, ups=0.09, wpb=64855, bsz=128, num_updates=11258, lr=9.99179e-05, gnorm=2.028, loss_scale=8, train_wall=11, gb_free=2.8, wall=129021 2021-06-20 06:29:18 | INFO | train_inner | epoch 004: 2320 / 3002 loss=2.611, ppl=6.11, wps=5915.4, ups=0.09, wpb=64850, bsz=128, num_updates=11259, lr=9.99179e-05, gnorm=2.175, loss_scale=8, train_wall=11, gb_free=2.8, wall=129032 2021-06-20 06:29:29 | INFO | train_inner | epoch 004: 2321 / 3002 loss=2.499, ppl=5.65, wps=5830.7, ups=0.09, wpb=64846, bsz=128, num_updates=11260, lr=9.99179e-05, gnorm=2.046, loss_scale=8, train_wall=11, gb_free=2.8, wall=129043 2021-06-20 06:29:40 | INFO | train_inner | epoch 004: 2322 / 3002 loss=2.505, ppl=5.67, wps=5906.8, ups=0.09, wpb=64806, bsz=128, num_updates=11261, lr=9.99179e-05, gnorm=2.124, loss_scale=8, train_wall=11, gb_free=2.8, wall=129054 2021-06-20 06:29:51 | INFO | train_inner | epoch 004: 2323 / 3002 loss=2.543, ppl=5.83, wps=5878.8, ups=0.09, wpb=64879, bsz=128, num_updates=11262, lr=9.99179e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=129065 2021-06-20 06:30:02 | INFO | train_inner | epoch 004: 2324 / 3002 loss=2.763, ppl=6.79, wps=5830.8, ups=0.09, wpb=64771, bsz=128, num_updates=11263, lr=9.99179e-05, gnorm=2.226, loss_scale=8, train_wall=11, gb_free=2.8, wall=129077 2021-06-20 06:30:13 | INFO | train_inner | epoch 004: 2325 / 3002 loss=2.668, ppl=6.35, wps=5794.6, ups=0.09, wpb=64847, bsz=128, num_updates=11264, lr=9.99179e-05, gnorm=2.699, loss_scale=8, train_wall=11, gb_free=2.8, wall=129088 2021-06-20 06:30:25 | INFO | train_inner | epoch 004: 2326 / 3002 loss=2.603, ppl=6.08, wps=5763.2, ups=0.09, wpb=64854, bsz=128, num_updates=11265, lr=9.99179e-05, gnorm=2.072, loss_scale=8, train_wall=11, gb_free=2.8, wall=129099 2021-06-20 06:30:36 | INFO | train_inner | epoch 004: 2327 / 3002 loss=2.627, ppl=6.18, wps=5850.6, ups=0.09, wpb=64765, bsz=128, num_updates=11266, lr=9.99179e-05, gnorm=7.071, loss_scale=8, train_wall=11, gb_free=2.8, wall=129110 2021-06-20 06:30:47 | INFO | train_inner | epoch 004: 2328 / 3002 loss=2.641, ppl=6.24, wps=5905.4, ups=0.09, wpb=64831, bsz=128, num_updates=11267, lr=9.99179e-05, gnorm=2.088, loss_scale=8, train_wall=11, gb_free=2.8, wall=129121 2021-06-20 06:30:58 | INFO | train_inner | epoch 004: 2329 / 3002 loss=2.549, ppl=5.85, wps=5812.9, ups=0.09, wpb=64781, bsz=128, num_updates=11268, lr=9.99178e-05, gnorm=2.269, loss_scale=8, train_wall=11, gb_free=2.8, wall=129132 2021-06-20 06:31:09 | INFO | train_inner | epoch 004: 2330 / 3002 loss=2.598, ppl=6.05, wps=5906, ups=0.09, wpb=64795, bsz=128, num_updates=11269, lr=9.99178e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=129143 2021-06-20 06:31:20 | INFO | train_inner | epoch 004: 2331 / 3002 loss=2.424, ppl=5.37, wps=5761.5, ups=0.09, wpb=64827, bsz=128, num_updates=11270, lr=9.99178e-05, gnorm=2.127, loss_scale=8, train_wall=11, gb_free=2.8, wall=129154 2021-06-20 06:31:31 | INFO | train_inner | epoch 004: 2332 / 3002 loss=2.438, ppl=5.42, wps=5898.6, ups=0.09, wpb=64786, bsz=128, num_updates=11271, lr=9.99178e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=129165 2021-06-20 06:31:42 | INFO | train_inner | epoch 004: 2333 / 3002 loss=2.406, ppl=5.3, wps=5836.9, ups=0.09, wpb=64866, bsz=128, num_updates=11272, lr=9.99178e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=129176 2021-06-20 06:31:53 | INFO | train_inner | epoch 004: 2334 / 3002 loss=2.626, ppl=6.17, wps=5825.8, ups=0.09, wpb=64834, bsz=128, num_updates=11273, lr=9.99178e-05, gnorm=2.067, loss_scale=8, train_wall=11, gb_free=2.8, wall=129188 2021-06-20 06:32:04 | INFO | train_inner | epoch 004: 2335 / 3002 loss=2.547, ppl=5.84, wps=5965.1, ups=0.09, wpb=64897, bsz=128, num_updates=11274, lr=9.99178e-05, gnorm=2.029, loss_scale=8, train_wall=10, gb_free=2.8, wall=129198 2021-06-20 06:32:15 | INFO | train_inner | epoch 004: 2336 / 3002 loss=2.516, ppl=5.72, wps=5899.1, ups=0.09, wpb=64734, bsz=128, num_updates=11275, lr=9.99178e-05, gnorm=2.061, loss_scale=8, train_wall=11, gb_free=2.8, wall=129209 2021-06-20 06:32:26 | INFO | train_inner | epoch 004: 2337 / 3002 loss=2.602, ppl=6.07, wps=5872.6, ups=0.09, wpb=64826, bsz=128, num_updates=11276, lr=9.99178e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=129220 2021-06-20 06:32:37 | INFO | train_inner | epoch 004: 2338 / 3002 loss=2.454, ppl=5.48, wps=5867.1, ups=0.09, wpb=64803, bsz=128, num_updates=11277, lr=9.99178e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=129232 2021-06-20 06:32:48 | INFO | train_inner | epoch 004: 2339 / 3002 loss=2.527, ppl=5.76, wps=5824.4, ups=0.09, wpb=64866, bsz=128, num_updates=11278, lr=9.99178e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=129243 2021-06-20 06:33:00 | INFO | train_inner | epoch 004: 2340 / 3002 loss=2.658, ppl=6.31, wps=5738.1, ups=0.09, wpb=64713, bsz=128, num_updates=11279, lr=9.99178e-05, gnorm=2.098, loss_scale=8, train_wall=11, gb_free=2.8, wall=129254 2021-06-20 06:33:10 | INFO | train_inner | epoch 004: 2341 / 3002 loss=2.701, ppl=6.5, wps=6004.5, ups=0.09, wpb=64854, bsz=128, num_updates=11280, lr=9.99178e-05, gnorm=2.09, loss_scale=8, train_wall=10, gb_free=2.8, wall=129265 2021-06-20 06:33:21 | INFO | train_inner | epoch 004: 2342 / 3002 loss=2.526, ppl=5.76, wps=5857.7, ups=0.09, wpb=64820, bsz=128, num_updates=11281, lr=9.99177e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=129276 2021-06-20 06:33:33 | INFO | train_inner | epoch 004: 2343 / 3002 loss=2.466, ppl=5.52, wps=5848.5, ups=0.09, wpb=64792, bsz=128, num_updates=11282, lr=9.99177e-05, gnorm=2.026, loss_scale=8, train_wall=11, gb_free=2.8, wall=129287 2021-06-20 06:33:43 | INFO | train_inner | epoch 004: 2344 / 3002 loss=2.483, ppl=5.59, wps=5938.1, ups=0.09, wpb=64908, bsz=128, num_updates=11283, lr=9.99177e-05, gnorm=2.121, loss_scale=8, train_wall=10, gb_free=2.8, wall=129298 2021-06-20 06:33:54 | INFO | train_inner | epoch 004: 2345 / 3002 loss=2.595, ppl=6.04, wps=5911.6, ups=0.09, wpb=64792, bsz=128, num_updates=11284, lr=9.99177e-05, gnorm=2.071, loss_scale=8, train_wall=11, gb_free=2.8, wall=129309 2021-06-20 06:34:05 | INFO | train_inner | epoch 004: 2346 / 3002 loss=2.647, ppl=6.26, wps=5907.9, ups=0.09, wpb=64875, bsz=128, num_updates=11285, lr=9.99177e-05, gnorm=2.196, loss_scale=8, train_wall=10, gb_free=2.8, wall=129320 2021-06-20 06:34:16 | INFO | train_inner | epoch 004: 2347 / 3002 loss=2.596, ppl=6.05, wps=5978.6, ups=0.09, wpb=64808, bsz=128, num_updates=11286, lr=9.99177e-05, gnorm=1.984, loss_scale=8, train_wall=10, gb_free=2.8, wall=129331 2021-06-20 06:34:27 | INFO | train_inner | epoch 004: 2348 / 3002 loss=2.497, ppl=5.65, wps=5862.4, ups=0.09, wpb=64821, bsz=128, num_updates=11287, lr=9.99177e-05, gnorm=2.284, loss_scale=8, train_wall=11, gb_free=2.8, wall=129342 2021-06-20 06:34:39 | INFO | train_inner | epoch 004: 2349 / 3002 loss=2.67, ppl=6.36, wps=5796.6, ups=0.09, wpb=64876, bsz=128, num_updates=11288, lr=9.99177e-05, gnorm=2.026, loss_scale=8, train_wall=11, gb_free=2.8, wall=129353 2021-06-20 06:34:50 | INFO | train_inner | epoch 004: 2350 / 3002 loss=2.837, ppl=7.15, wps=5859, ups=0.09, wpb=64847, bsz=128, num_updates=11289, lr=9.99177e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=129364 2021-06-20 06:35:01 | INFO | train_inner | epoch 004: 2351 / 3002 loss=2.672, ppl=6.37, wps=5894.7, ups=0.09, wpb=64814, bsz=128, num_updates=11290, lr=9.99177e-05, gnorm=2.075, loss_scale=8, train_wall=11, gb_free=2.8, wall=129375 2021-06-20 06:35:12 | INFO | train_inner | epoch 004: 2352 / 3002 loss=2.543, ppl=5.83, wps=5940.2, ups=0.09, wpb=64818, bsz=128, num_updates=11291, lr=9.99177e-05, gnorm=2.11, loss_scale=8, train_wall=10, gb_free=2.8, wall=129386 2021-06-20 06:35:23 | INFO | train_inner | epoch 004: 2353 / 3002 loss=2.502, ppl=5.66, wps=5728.9, ups=0.09, wpb=64849, bsz=128, num_updates=11292, lr=9.99177e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=129397 2021-06-20 06:35:34 | INFO | train_inner | epoch 004: 2354 / 3002 loss=2.596, ppl=6.05, wps=5776.6, ups=0.09, wpb=64799, bsz=128, num_updates=11293, lr=9.99176e-05, gnorm=2.084, loss_scale=8, train_wall=11, gb_free=2.8, wall=129408 2021-06-20 06:35:45 | INFO | train_inner | epoch 004: 2355 / 3002 loss=2.49, ppl=5.62, wps=5717.2, ups=0.09, wpb=64709, bsz=128, num_updates=11294, lr=9.99176e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=129420 2021-06-20 06:35:56 | INFO | train_inner | epoch 004: 2356 / 3002 loss=2.412, ppl=5.32, wps=5900.9, ups=0.09, wpb=64901, bsz=128, num_updates=11295, lr=9.99176e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=129431 2021-06-20 06:36:08 | INFO | train_inner | epoch 004: 2357 / 3002 loss=2.458, ppl=5.49, wps=5765.8, ups=0.09, wpb=64856, bsz=128, num_updates=11296, lr=9.99176e-05, gnorm=2.033, loss_scale=8, train_wall=11, gb_free=2.8, wall=129442 2021-06-20 06:36:19 | INFO | train_inner | epoch 004: 2358 / 3002 loss=2.475, ppl=5.56, wps=5770.5, ups=0.09, wpb=64775, bsz=128, num_updates=11297, lr=9.99176e-05, gnorm=3.277, loss_scale=8, train_wall=11, gb_free=2.8, wall=129453 2021-06-20 06:36:30 | INFO | train_inner | epoch 004: 2359 / 3002 loss=2.643, ppl=6.25, wps=5886.3, ups=0.09, wpb=64849, bsz=128, num_updates=11298, lr=9.99176e-05, gnorm=2.154, loss_scale=8, train_wall=11, gb_free=2.8, wall=129464 2021-06-20 06:36:41 | INFO | train_inner | epoch 004: 2360 / 3002 loss=2.499, ppl=5.65, wps=5805.2, ups=0.09, wpb=64806, bsz=128, num_updates=11299, lr=9.99176e-05, gnorm=2.033, loss_scale=8, train_wall=11, gb_free=2.8, wall=129475 2021-06-20 06:36:52 | INFO | train_inner | epoch 004: 2361 / 3002 loss=2.504, ppl=5.67, wps=5793.2, ups=0.09, wpb=64861, bsz=128, num_updates=11300, lr=9.99176e-05, gnorm=2.269, loss_scale=8, train_wall=11, gb_free=2.8, wall=129487 2021-06-20 06:37:03 | INFO | train_inner | epoch 004: 2362 / 3002 loss=2.518, ppl=5.73, wps=5767.3, ups=0.09, wpb=64811, bsz=128, num_updates=11301, lr=9.99176e-05, gnorm=2.052, loss_scale=8, train_wall=11, gb_free=2.8, wall=129498 2021-06-20 06:37:15 | INFO | train_inner | epoch 004: 2363 / 3002 loss=2.623, ppl=6.16, wps=5822.6, ups=0.09, wpb=64821, bsz=128, num_updates=11302, lr=9.99176e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=129509 2021-06-20 06:37:26 | INFO | train_inner | epoch 004: 2364 / 3002 loss=2.42, ppl=5.35, wps=5828.1, ups=0.09, wpb=64859, bsz=128, num_updates=11303, lr=9.99176e-05, gnorm=2.704, loss_scale=8, train_wall=11, gb_free=2.8, wall=129520 2021-06-20 06:37:37 | INFO | train_inner | epoch 004: 2365 / 3002 loss=2.634, ppl=6.21, wps=5955.4, ups=0.09, wpb=64873, bsz=128, num_updates=11304, lr=9.99176e-05, gnorm=2.025, loss_scale=8, train_wall=10, gb_free=2.8, wall=129531 2021-06-20 06:37:48 | INFO | train_inner | epoch 004: 2366 / 3002 loss=2.807, ppl=7, wps=5707.9, ups=0.09, wpb=64819, bsz=128, num_updates=11305, lr=9.99176e-05, gnorm=2.033, loss_scale=8, train_wall=11, gb_free=2.8, wall=129542 2021-06-20 06:37:59 | INFO | train_inner | epoch 004: 2367 / 3002 loss=2.71, ppl=6.54, wps=5821.8, ups=0.09, wpb=64844, bsz=128, num_updates=11306, lr=9.99175e-05, gnorm=2.089, loss_scale=8, train_wall=11, gb_free=2.8, wall=129553 2021-06-20 06:38:10 | INFO | train_inner | epoch 004: 2368 / 3002 loss=2.683, ppl=6.42, wps=5801.7, ups=0.09, wpb=64811, bsz=128, num_updates=11307, lr=9.99175e-05, gnorm=2.102, loss_scale=8, train_wall=11, gb_free=2.8, wall=129565 2021-06-20 06:38:21 | INFO | train_inner | epoch 004: 2369 / 3002 loss=2.478, ppl=5.57, wps=5859, ups=0.09, wpb=64828, bsz=128, num_updates=11308, lr=9.99175e-05, gnorm=2.054, loss_scale=8, train_wall=11, gb_free=2.8, wall=129576 2021-06-20 06:38:33 | INFO | train_inner | epoch 004: 2370 / 3002 loss=2.529, ppl=5.77, wps=5760.9, ups=0.09, wpb=64764, bsz=128, num_updates=11309, lr=9.99175e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=129587 2021-06-20 06:38:44 | INFO | train_inner | epoch 004: 2371 / 3002 loss=2.456, ppl=5.49, wps=5796.1, ups=0.09, wpb=64815, bsz=128, num_updates=11310, lr=9.99175e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=129598 2021-06-20 06:38:55 | INFO | train_inner | epoch 004: 2372 / 3002 loss=2.62, ppl=6.15, wps=5701.4, ups=0.09, wpb=64809, bsz=128, num_updates=11311, lr=9.99175e-05, gnorm=2.112, loss_scale=8, train_wall=11, gb_free=2.8, wall=129609 2021-06-20 06:39:06 | INFO | train_inner | epoch 004: 2373 / 3002 loss=2.443, ppl=5.44, wps=5879.3, ups=0.09, wpb=64826, bsz=128, num_updates=11312, lr=9.99175e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=129620 2021-06-20 06:39:17 | INFO | train_inner | epoch 004: 2374 / 3002 loss=2.469, ppl=5.54, wps=5875.1, ups=0.09, wpb=64788, bsz=128, num_updates=11313, lr=9.99175e-05, gnorm=4.194, loss_scale=8, train_wall=11, gb_free=2.8, wall=129632 2021-06-20 06:39:28 | INFO | train_inner | epoch 004: 2375 / 3002 loss=2.612, ppl=6.11, wps=5827.5, ups=0.09, wpb=64750, bsz=128, num_updates=11314, lr=9.99175e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=129643 2021-06-20 06:39:39 | INFO | train_inner | epoch 004: 2376 / 3002 loss=2.641, ppl=6.24, wps=5850.9, ups=0.09, wpb=64764, bsz=128, num_updates=11315, lr=9.99175e-05, gnorm=2.101, loss_scale=8, train_wall=11, gb_free=2.8, wall=129654 2021-06-20 06:39:50 | INFO | train_inner | epoch 004: 2377 / 3002 loss=2.559, ppl=5.89, wps=5879.2, ups=0.09, wpb=64836, bsz=128, num_updates=11316, lr=9.99175e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=129665 2021-06-20 06:40:02 | INFO | train_inner | epoch 004: 2378 / 3002 loss=2.753, ppl=6.74, wps=5818.4, ups=0.09, wpb=64903, bsz=128, num_updates=11317, lr=9.99175e-05, gnorm=2.139, loss_scale=8, train_wall=11, gb_free=2.8, wall=129676 2021-06-20 06:40:13 | INFO | train_inner | epoch 004: 2379 / 3002 loss=2.671, ppl=6.37, wps=5871.8, ups=0.09, wpb=64901, bsz=128, num_updates=11318, lr=9.99174e-05, gnorm=2.096, loss_scale=8, train_wall=11, gb_free=2.8, wall=129687 2021-06-20 06:40:24 | INFO | train_inner | epoch 004: 2380 / 3002 loss=2.65, ppl=6.28, wps=5809.6, ups=0.09, wpb=64814, bsz=128, num_updates=11319, lr=9.99174e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=129698 2021-06-20 06:40:35 | INFO | train_inner | epoch 004: 2381 / 3002 loss=2.438, ppl=5.42, wps=5919, ups=0.09, wpb=64888, bsz=128, num_updates=11320, lr=9.99174e-05, gnorm=4.221, loss_scale=8, train_wall=11, gb_free=2.8, wall=129709 2021-06-20 06:40:46 | INFO | train_inner | epoch 004: 2382 / 3002 loss=2.638, ppl=6.22, wps=5857.4, ups=0.09, wpb=64867, bsz=128, num_updates=11321, lr=9.99174e-05, gnorm=2.57, loss_scale=8, train_wall=11, gb_free=2.8, wall=129720 2021-06-20 06:40:57 | INFO | train_inner | epoch 004: 2383 / 3002 loss=2.73, ppl=6.63, wps=5817.7, ups=0.09, wpb=64885, bsz=128, num_updates=11322, lr=9.99174e-05, gnorm=2.162, loss_scale=8, train_wall=11, gb_free=2.8, wall=129731 2021-06-20 06:41:08 | INFO | train_inner | epoch 004: 2384 / 3002 loss=2.398, ppl=5.27, wps=5824, ups=0.09, wpb=64820, bsz=128, num_updates=11323, lr=9.99174e-05, gnorm=2.025, loss_scale=8, train_wall=11, gb_free=2.8, wall=129742 2021-06-20 06:41:19 | INFO | train_inner | epoch 004: 2385 / 3002 loss=2.377, ppl=5.2, wps=5868.7, ups=0.09, wpb=64793, bsz=128, num_updates=11324, lr=9.99174e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=129753 2021-06-20 06:41:30 | INFO | train_inner | epoch 004: 2386 / 3002 loss=2.723, ppl=6.6, wps=5841.9, ups=0.09, wpb=64803, bsz=128, num_updates=11325, lr=9.99174e-05, gnorm=2.167, loss_scale=8, train_wall=11, gb_free=2.8, wall=129765 2021-06-20 06:41:41 | INFO | train_inner | epoch 004: 2387 / 3002 loss=2.405, ppl=5.3, wps=5861.2, ups=0.09, wpb=64773, bsz=128, num_updates=11326, lr=9.99174e-05, gnorm=4.348, loss_scale=8, train_wall=11, gb_free=2.8, wall=129776 2021-06-20 06:41:52 | INFO | train_inner | epoch 004: 2388 / 3002 loss=2.684, ppl=6.43, wps=5787.3, ups=0.09, wpb=64765, bsz=128, num_updates=11327, lr=9.99174e-05, gnorm=2.102, loss_scale=8, train_wall=11, gb_free=2.8, wall=129787 2021-06-20 06:42:03 | INFO | train_inner | epoch 004: 2389 / 3002 loss=2.617, ppl=6.13, wps=5891.4, ups=0.09, wpb=64806, bsz=128, num_updates=11328, lr=9.99174e-05, gnorm=2.189, loss_scale=8, train_wall=11, gb_free=2.8, wall=129798 2021-06-20 06:42:14 | INFO | train_inner | epoch 004: 2390 / 3002 loss=2.531, ppl=5.78, wps=5910.1, ups=0.09, wpb=64881, bsz=128, num_updates=11329, lr=9.99174e-05, gnorm=2.14, loss_scale=8, train_wall=11, gb_free=2.8, wall=129809 2021-06-20 06:42:25 | INFO | train_inner | epoch 004: 2391 / 3002 loss=2.563, ppl=5.91, wps=5961.7, ups=0.09, wpb=64905, bsz=128, num_updates=11330, lr=9.99174e-05, gnorm=2.313, loss_scale=8, train_wall=10, gb_free=2.8, wall=129820 2021-06-20 06:42:37 | INFO | train_inner | epoch 004: 2392 / 3002 loss=2.68, ppl=6.41, wps=5775.5, ups=0.09, wpb=64919, bsz=128, num_updates=11331, lr=9.99173e-05, gnorm=2.175, loss_scale=8, train_wall=11, gb_free=2.8, wall=129831 2021-06-20 06:42:48 | INFO | train_inner | epoch 004: 2393 / 3002 loss=2.562, ppl=5.9, wps=5822.7, ups=0.09, wpb=64789, bsz=128, num_updates=11332, lr=9.99173e-05, gnorm=2.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=129842 2021-06-20 06:42:59 | INFO | train_inner | epoch 004: 2394 / 3002 loss=2.609, ppl=6.1, wps=5742.7, ups=0.09, wpb=64859, bsz=128, num_updates=11333, lr=9.99173e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=129853 2021-06-20 06:43:10 | INFO | train_inner | epoch 004: 2395 / 3002 loss=2.562, ppl=5.91, wps=5810.2, ups=0.09, wpb=64777, bsz=128, num_updates=11334, lr=9.99173e-05, gnorm=2.415, loss_scale=8, train_wall=11, gb_free=2.8, wall=129864 2021-06-20 06:43:21 | INFO | train_inner | epoch 004: 2396 / 3002 loss=2.596, ppl=6.05, wps=6025.2, ups=0.09, wpb=64813, bsz=128, num_updates=11335, lr=9.99173e-05, gnorm=2.185, loss_scale=8, train_wall=10, gb_free=2.8, wall=129875 2021-06-20 06:43:32 | INFO | train_inner | epoch 004: 2397 / 3002 loss=2.589, ppl=6.02, wps=6005.8, ups=0.09, wpb=64929, bsz=128, num_updates=11336, lr=9.99173e-05, gnorm=2.113, loss_scale=8, train_wall=10, gb_free=2.8, wall=129886 2021-06-20 06:43:43 | INFO | train_inner | epoch 004: 2398 / 3002 loss=2.497, ppl=5.65, wps=5827.8, ups=0.09, wpb=64842, bsz=128, num_updates=11337, lr=9.99173e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=129897 2021-06-20 06:43:54 | INFO | train_inner | epoch 004: 2399 / 3002 loss=2.496, ppl=5.64, wps=5949, ups=0.09, wpb=64833, bsz=128, num_updates=11338, lr=9.99173e-05, gnorm=2.085, loss_scale=8, train_wall=10, gb_free=2.8, wall=129908 2021-06-20 06:44:05 | INFO | train_inner | epoch 004: 2400 / 3002 loss=2.714, ppl=6.56, wps=5800.7, ups=0.09, wpb=64806, bsz=128, num_updates=11339, lr=9.99173e-05, gnorm=2.145, loss_scale=8, train_wall=11, gb_free=2.8, wall=129919 2021-06-20 06:44:16 | INFO | train_inner | epoch 004: 2401 / 3002 loss=2.603, ppl=6.07, wps=5839.7, ups=0.09, wpb=64847, bsz=128, num_updates=11340, lr=9.99173e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=129930 2021-06-20 06:44:27 | INFO | train_inner | epoch 004: 2402 / 3002 loss=2.622, ppl=6.16, wps=5932.8, ups=0.09, wpb=64877, bsz=128, num_updates=11341, lr=9.99173e-05, gnorm=2.026, loss_scale=8, train_wall=10, gb_free=2.8, wall=129941 2021-06-20 06:44:38 | INFO | train_inner | epoch 004: 2403 / 3002 loss=2.725, ppl=6.61, wps=5946.7, ups=0.09, wpb=64724, bsz=128, num_updates=11342, lr=9.99173e-05, gnorm=2.133, loss_scale=8, train_wall=10, gb_free=2.8, wall=129952 2021-06-20 06:44:49 | INFO | train_inner | epoch 004: 2404 / 3002 loss=2.537, ppl=5.8, wps=5765.4, ups=0.09, wpb=64742, bsz=128, num_updates=11343, lr=9.99172e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=129963 2021-06-20 06:45:00 | INFO | train_inner | epoch 004: 2405 / 3002 loss=2.628, ppl=6.18, wps=5974.5, ups=0.09, wpb=64710, bsz=128, num_updates=11344, lr=9.99172e-05, gnorm=2.036, loss_scale=8, train_wall=10, gb_free=2.8, wall=129974 2021-06-20 06:45:11 | INFO | train_inner | epoch 004: 2406 / 3002 loss=2.535, ppl=5.8, wps=5780.8, ups=0.09, wpb=64811, bsz=128, num_updates=11345, lr=9.99172e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=129985 2021-06-20 06:45:22 | INFO | train_inner | epoch 004: 2407 / 3002 loss=2.455, ppl=5.48, wps=5995.8, ups=0.09, wpb=64899, bsz=128, num_updates=11346, lr=9.99172e-05, gnorm=2.587, loss_scale=8, train_wall=10, gb_free=2.8, wall=129996 2021-06-20 06:45:33 | INFO | train_inner | epoch 004: 2408 / 3002 loss=2.62, ppl=6.15, wps=5894.5, ups=0.09, wpb=64882, bsz=128, num_updates=11347, lr=9.99172e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=130007 2021-06-20 06:45:44 | INFO | train_inner | epoch 004: 2409 / 3002 loss=2.612, ppl=6.12, wps=5708.2, ups=0.09, wpb=64720, bsz=128, num_updates=11348, lr=9.99172e-05, gnorm=2.025, loss_scale=8, train_wall=11, gb_free=2.8, wall=130019 2021-06-20 06:45:55 | INFO | train_inner | epoch 004: 2410 / 3002 loss=2.509, ppl=5.69, wps=6037, ups=0.09, wpb=64784, bsz=128, num_updates=11349, lr=9.99172e-05, gnorm=2.047, loss_scale=8, train_wall=10, gb_free=2.8, wall=130029 2021-06-20 06:46:06 | INFO | train_inner | epoch 004: 2411 / 3002 loss=2.681, ppl=6.41, wps=5816.5, ups=0.09, wpb=64893, bsz=128, num_updates=11350, lr=9.99172e-05, gnorm=2.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=130040 2021-06-20 06:46:17 | INFO | train_inner | epoch 004: 2412 / 3002 loss=2.469, ppl=5.53, wps=5788.1, ups=0.09, wpb=64819, bsz=128, num_updates=11351, lr=9.99172e-05, gnorm=2.041, loss_scale=8, train_wall=11, gb_free=2.8, wall=130052 2021-06-20 06:46:28 | INFO | train_inner | epoch 004: 2413 / 3002 loss=2.467, ppl=5.53, wps=5884.9, ups=0.09, wpb=64882, bsz=128, num_updates=11352, lr=9.99172e-05, gnorm=2.045, loss_scale=8, train_wall=11, gb_free=2.8, wall=130063 2021-06-20 06:46:40 | INFO | train_inner | epoch 004: 2414 / 3002 loss=2.689, ppl=6.45, wps=5767.1, ups=0.09, wpb=64772, bsz=128, num_updates=11353, lr=9.99172e-05, gnorm=2.167, loss_scale=8, train_wall=11, gb_free=2.8, wall=130074 2021-06-20 06:46:51 | INFO | train_inner | epoch 004: 2415 / 3002 loss=2.518, ppl=5.73, wps=5921.2, ups=0.09, wpb=64860, bsz=128, num_updates=11354, lr=9.99172e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=130085 2021-06-20 06:47:02 | INFO | train_inner | epoch 004: 2416 / 3002 loss=2.523, ppl=5.75, wps=5846.4, ups=0.09, wpb=64879, bsz=128, num_updates=11355, lr=9.99172e-05, gnorm=2.071, loss_scale=8, train_wall=11, gb_free=2.8, wall=130096 2021-06-20 06:47:13 | INFO | train_inner | epoch 004: 2417 / 3002 loss=2.565, ppl=5.92, wps=5898.3, ups=0.09, wpb=64838, bsz=128, num_updates=11356, lr=9.99171e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=130107 2021-06-20 06:47:24 | INFO | train_inner | epoch 004: 2418 / 3002 loss=2.571, ppl=5.94, wps=5809.7, ups=0.09, wpb=64889, bsz=128, num_updates=11357, lr=9.99171e-05, gnorm=2.345, loss_scale=8, train_wall=11, gb_free=2.8, wall=130118 2021-06-20 06:47:35 | INFO | train_inner | epoch 004: 2419 / 3002 loss=2.564, ppl=5.91, wps=5797.7, ups=0.09, wpb=64806, bsz=128, num_updates=11358, lr=9.99171e-05, gnorm=2.126, loss_scale=8, train_wall=11, gb_free=2.8, wall=130129 2021-06-20 06:47:46 | INFO | train_inner | epoch 004: 2420 / 3002 loss=2.78, ppl=6.87, wps=5835.5, ups=0.09, wpb=64811, bsz=128, num_updates=11359, lr=9.99171e-05, gnorm=2.195, loss_scale=8, train_wall=11, gb_free=2.8, wall=130140 2021-06-20 06:47:57 | INFO | train_inner | epoch 004: 2421 / 3002 loss=2.448, ppl=5.46, wps=5915.6, ups=0.09, wpb=64899, bsz=128, num_updates=11360, lr=9.99171e-05, gnorm=2.091, loss_scale=8, train_wall=11, gb_free=2.8, wall=130151 2021-06-20 06:48:08 | INFO | train_inner | epoch 004: 2422 / 3002 loss=2.643, ppl=6.25, wps=5746.4, ups=0.09, wpb=64830, bsz=128, num_updates=11361, lr=9.99171e-05, gnorm=2.127, loss_scale=8, train_wall=11, gb_free=2.8, wall=130163 2021-06-20 06:48:19 | INFO | train_inner | epoch 004: 2423 / 3002 loss=2.543, ppl=5.83, wps=5905.6, ups=0.09, wpb=64877, bsz=128, num_updates=11362, lr=9.99171e-05, gnorm=2.091, loss_scale=8, train_wall=11, gb_free=2.8, wall=130174 2021-06-20 06:48:30 | INFO | train_inner | epoch 004: 2424 / 3002 loss=2.663, ppl=6.33, wps=5887.3, ups=0.09, wpb=64875, bsz=128, num_updates=11363, lr=9.99171e-05, gnorm=2.076, loss_scale=8, train_wall=11, gb_free=2.8, wall=130185 2021-06-20 06:48:42 | INFO | train_inner | epoch 004: 2425 / 3002 loss=2.57, ppl=5.94, wps=5789.3, ups=0.09, wpb=64820, bsz=128, num_updates=11364, lr=9.99171e-05, gnorm=2.335, loss_scale=8, train_wall=11, gb_free=2.8, wall=130196 2021-06-20 06:48:53 | INFO | train_inner | epoch 004: 2426 / 3002 loss=2.514, ppl=5.71, wps=5769.9, ups=0.09, wpb=64795, bsz=128, num_updates=11365, lr=9.99171e-05, gnorm=2.11, loss_scale=8, train_wall=11, gb_free=2.8, wall=130207 2021-06-20 06:49:04 | INFO | train_inner | epoch 004: 2427 / 3002 loss=2.675, ppl=6.38, wps=5900.9, ups=0.09, wpb=64897, bsz=128, num_updates=11366, lr=9.99171e-05, gnorm=2.135, loss_scale=8, train_wall=11, gb_free=2.8, wall=130218 2021-06-20 06:49:15 | INFO | train_inner | epoch 004: 2428 / 3002 loss=2.696, ppl=6.48, wps=5779.2, ups=0.09, wpb=64793, bsz=128, num_updates=11367, lr=9.99171e-05, gnorm=2.073, loss_scale=8, train_wall=11, gb_free=2.8, wall=130229 2021-06-20 06:49:26 | INFO | train_inner | epoch 004: 2429 / 3002 loss=2.446, ppl=5.45, wps=5816.7, ups=0.09, wpb=64819, bsz=128, num_updates=11368, lr=9.9917e-05, gnorm=2.094, loss_scale=8, train_wall=11, gb_free=2.8, wall=130240 2021-06-20 06:49:37 | INFO | train_inner | epoch 004: 2430 / 3002 loss=2.485, ppl=5.6, wps=5829.4, ups=0.09, wpb=64739, bsz=128, num_updates=11369, lr=9.9917e-05, gnorm=5.49, loss_scale=8, train_wall=11, gb_free=2.8, wall=130252 2021-06-20 06:49:48 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-20 06:49:59 | INFO | train_inner | epoch 004: 2432 / 3002 loss=2.601, ppl=6.07, wps=2948.5, ups=0.05, wpb=64859, bsz=128, num_updates=11370, lr=9.9917e-05, gnorm=9.947, loss_scale=4, train_wall=21, gb_free=2.8, wall=130274 2021-06-20 06:50:10 | INFO | train_inner | epoch 004: 2433 / 3002 loss=2.493, ppl=5.63, wps=5786.5, ups=0.09, wpb=64799, bsz=128, num_updates=11371, lr=9.9917e-05, gnorm=2.008, loss_scale=4, train_wall=11, gb_free=2.8, wall=130285 2021-06-20 06:50:22 | INFO | train_inner | epoch 004: 2434 / 3002 loss=2.469, ppl=5.54, wps=5839.9, ups=0.09, wpb=64823, bsz=128, num_updates=11372, lr=9.9917e-05, gnorm=2.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=130296 2021-06-20 06:50:33 | INFO | train_inner | epoch 004: 2435 / 3002 loss=2.464, ppl=5.52, wps=5786, ups=0.09, wpb=64844, bsz=128, num_updates=11373, lr=9.9917e-05, gnorm=2.085, loss_scale=4, train_wall=11, gb_free=2.8, wall=130307 2021-06-20 06:50:44 | INFO | train_inner | epoch 004: 2436 / 3002 loss=2.577, ppl=5.97, wps=5832.5, ups=0.09, wpb=64848, bsz=128, num_updates=11374, lr=9.9917e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=130318 2021-06-20 06:50:55 | INFO | train_inner | epoch 004: 2437 / 3002 loss=2.496, ppl=5.64, wps=5871.9, ups=0.09, wpb=64769, bsz=128, num_updates=11375, lr=9.9917e-05, gnorm=2.584, loss_scale=4, train_wall=11, gb_free=2.8, wall=130329 2021-06-20 06:51:06 | INFO | train_inner | epoch 004: 2438 / 3002 loss=2.673, ppl=6.38, wps=5831.2, ups=0.09, wpb=64845, bsz=128, num_updates=11376, lr=9.9917e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=130340 2021-06-20 06:51:17 | INFO | train_inner | epoch 004: 2439 / 3002 loss=2.583, ppl=5.99, wps=5712.1, ups=0.09, wpb=64822, bsz=128, num_updates=11377, lr=9.9917e-05, gnorm=2.192, loss_scale=4, train_wall=11, gb_free=2.8, wall=130352 2021-06-20 06:51:28 | INFO | train_inner | epoch 004: 2440 / 3002 loss=2.62, ppl=6.15, wps=5929.2, ups=0.09, wpb=64838, bsz=128, num_updates=11378, lr=9.9917e-05, gnorm=2.718, loss_scale=4, train_wall=11, gb_free=2.8, wall=130363 2021-06-20 06:51:39 | INFO | train_inner | epoch 004: 2441 / 3002 loss=2.508, ppl=5.69, wps=5867, ups=0.09, wpb=64789, bsz=128, num_updates=11379, lr=9.9917e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=130374 2021-06-20 06:51:50 | INFO | train_inner | epoch 004: 2442 / 3002 loss=2.653, ppl=6.29, wps=5818.6, ups=0.09, wpb=64816, bsz=128, num_updates=11380, lr=9.9917e-05, gnorm=2.131, loss_scale=4, train_wall=11, gb_free=2.8, wall=130385 2021-06-20 06:52:01 | INFO | train_inner | epoch 004: 2443 / 3002 loss=2.648, ppl=6.27, wps=5920.7, ups=0.09, wpb=64769, bsz=128, num_updates=11381, lr=9.99169e-05, gnorm=2.126, loss_scale=4, train_wall=11, gb_free=2.8, wall=130396 2021-06-20 06:52:12 | INFO | train_inner | epoch 004: 2444 / 3002 loss=2.502, ppl=5.67, wps=5921.9, ups=0.09, wpb=64857, bsz=128, num_updates=11382, lr=9.99169e-05, gnorm=2.182, loss_scale=4, train_wall=10, gb_free=2.8, wall=130407 2021-06-20 06:52:23 | INFO | train_inner | epoch 004: 2445 / 3002 loss=2.651, ppl=6.28, wps=5849, ups=0.09, wpb=64865, bsz=128, num_updates=11383, lr=9.99169e-05, gnorm=1.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=130418 2021-06-20 06:52:35 | INFO | train_inner | epoch 004: 2446 / 3002 loss=2.592, ppl=6.03, wps=5812.8, ups=0.09, wpb=64835, bsz=128, num_updates=11384, lr=9.99169e-05, gnorm=2.261, loss_scale=4, train_wall=11, gb_free=2.8, wall=130429 2021-06-20 06:52:45 | INFO | train_inner | epoch 004: 2447 / 3002 loss=2.398, ppl=5.27, wps=5983.7, ups=0.09, wpb=64807, bsz=128, num_updates=11385, lr=9.99169e-05, gnorm=3.553, loss_scale=4, train_wall=10, gb_free=2.8, wall=130440 2021-06-20 06:52:57 | INFO | train_inner | epoch 004: 2448 / 3002 loss=2.506, ppl=5.68, wps=5802.2, ups=0.09, wpb=64885, bsz=128, num_updates=11386, lr=9.99169e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=130451 2021-06-20 06:53:08 | INFO | train_inner | epoch 004: 2449 / 3002 loss=2.473, ppl=5.55, wps=5918.1, ups=0.09, wpb=64889, bsz=128, num_updates=11387, lr=9.99169e-05, gnorm=2.308, loss_scale=4, train_wall=10, gb_free=2.8, wall=130462 2021-06-20 06:53:19 | INFO | train_inner | epoch 004: 2450 / 3002 loss=2.613, ppl=6.12, wps=5820, ups=0.09, wpb=64822, bsz=128, num_updates=11388, lr=9.99169e-05, gnorm=2.132, loss_scale=4, train_wall=11, gb_free=2.8, wall=130473 2021-06-20 06:53:30 | INFO | train_inner | epoch 004: 2451 / 3002 loss=2.543, ppl=5.83, wps=5932.4, ups=0.09, wpb=64850, bsz=128, num_updates=11389, lr=9.99169e-05, gnorm=1.984, loss_scale=4, train_wall=10, gb_free=2.8, wall=130484 2021-06-20 06:53:41 | INFO | train_inner | epoch 004: 2452 / 3002 loss=2.542, ppl=5.82, wps=5827.7, ups=0.09, wpb=64761, bsz=128, num_updates=11390, lr=9.99169e-05, gnorm=2.081, loss_scale=4, train_wall=11, gb_free=2.8, wall=130495 2021-06-20 06:53:52 | INFO | train_inner | epoch 004: 2453 / 3002 loss=2.47, ppl=5.54, wps=5939, ups=0.09, wpb=64832, bsz=128, num_updates=11391, lr=9.99169e-05, gnorm=2.012, loss_scale=4, train_wall=10, gb_free=2.8, wall=130506 2021-06-20 06:54:03 | INFO | train_inner | epoch 004: 2454 / 3002 loss=2.536, ppl=5.8, wps=5821.4, ups=0.09, wpb=64729, bsz=128, num_updates=11392, lr=9.99169e-05, gnorm=2.09, loss_scale=4, train_wall=11, gb_free=2.8, wall=130517 2021-06-20 06:54:14 | INFO | train_inner | epoch 004: 2455 / 3002 loss=2.615, ppl=6.13, wps=5844.5, ups=0.09, wpb=64749, bsz=128, num_updates=11393, lr=9.99168e-05, gnorm=2.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=130528 2021-06-20 06:54:25 | INFO | train_inner | epoch 004: 2456 / 3002 loss=2.524, ppl=5.75, wps=5879.1, ups=0.09, wpb=64879, bsz=128, num_updates=11394, lr=9.99168e-05, gnorm=2.18, loss_scale=4, train_wall=11, gb_free=2.8, wall=130539 2021-06-20 06:54:36 | INFO | train_inner | epoch 004: 2457 / 3002 loss=2.472, ppl=5.55, wps=5856, ups=0.09, wpb=64851, bsz=128, num_updates=11395, lr=9.99168e-05, gnorm=2.045, loss_scale=4, train_wall=11, gb_free=2.8, wall=130550 2021-06-20 06:54:47 | INFO | train_inner | epoch 004: 2458 / 3002 loss=2.536, ppl=5.8, wps=5848, ups=0.09, wpb=64788, bsz=128, num_updates=11396, lr=9.99168e-05, gnorm=2.194, loss_scale=4, train_wall=11, gb_free=2.8, wall=130561 2021-06-20 06:54:58 | INFO | train_inner | epoch 004: 2459 / 3002 loss=2.458, ppl=5.5, wps=5867.7, ups=0.09, wpb=64827, bsz=128, num_updates=11397, lr=9.99168e-05, gnorm=2.01, loss_scale=4, train_wall=11, gb_free=2.8, wall=130572 2021-06-20 06:55:09 | INFO | train_inner | epoch 004: 2460 / 3002 loss=2.577, ppl=5.97, wps=5994.7, ups=0.09, wpb=64895, bsz=128, num_updates=11398, lr=9.99168e-05, gnorm=1.986, loss_scale=4, train_wall=10, gb_free=2.8, wall=130583 2021-06-20 06:55:20 | INFO | train_inner | epoch 004: 2461 / 3002 loss=2.633, ppl=6.2, wps=5769.3, ups=0.09, wpb=64822, bsz=128, num_updates=11399, lr=9.99168e-05, gnorm=1.984, loss_scale=4, train_wall=11, gb_free=2.8, wall=130595 2021-06-20 06:55:31 | INFO | train_inner | epoch 004: 2462 / 3002 loss=2.633, ppl=6.2, wps=5920, ups=0.09, wpb=64721, bsz=128, num_updates=11400, lr=9.99168e-05, gnorm=1.986, loss_scale=4, train_wall=10, gb_free=2.8, wall=130605 2021-06-20 06:55:42 | INFO | train_inner | epoch 004: 2463 / 3002 loss=2.542, ppl=5.82, wps=5979.2, ups=0.09, wpb=64863, bsz=128, num_updates=11401, lr=9.99168e-05, gnorm=2.006, loss_scale=4, train_wall=10, gb_free=2.8, wall=130616 2021-06-20 06:55:53 | INFO | train_inner | epoch 004: 2464 / 3002 loss=2.555, ppl=5.88, wps=5814.9, ups=0.09, wpb=64793, bsz=128, num_updates=11402, lr=9.99168e-05, gnorm=2.043, loss_scale=4, train_wall=11, gb_free=2.8, wall=130627 2021-06-20 06:56:04 | INFO | train_inner | epoch 004: 2465 / 3002 loss=2.499, ppl=5.65, wps=5846, ups=0.09, wpb=64796, bsz=128, num_updates=11403, lr=9.99168e-05, gnorm=2.07, loss_scale=4, train_wall=11, gb_free=2.8, wall=130639 2021-06-20 06:56:15 | INFO | train_inner | epoch 004: 2466 / 3002 loss=2.532, ppl=5.78, wps=5881.6, ups=0.09, wpb=64681, bsz=128, num_updates=11404, lr=9.99168e-05, gnorm=2.029, loss_scale=4, train_wall=11, gb_free=2.8, wall=130650 2021-06-20 06:56:26 | INFO | train_inner | epoch 004: 2467 / 3002 loss=2.564, ppl=5.91, wps=5859.2, ups=0.09, wpb=64857, bsz=128, num_updates=11405, lr=9.99168e-05, gnorm=2.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=130661 2021-06-20 06:56:38 | INFO | train_inner | epoch 004: 2468 / 3002 loss=2.527, ppl=5.76, wps=5725.2, ups=0.09, wpb=64820, bsz=128, num_updates=11406, lr=9.99167e-05, gnorm=2.067, loss_scale=4, train_wall=11, gb_free=2.8, wall=130672 2021-06-20 06:56:48 | INFO | train_inner | epoch 004: 2469 / 3002 loss=2.567, ppl=5.92, wps=5971.1, ups=0.09, wpb=64861, bsz=128, num_updates=11407, lr=9.99167e-05, gnorm=1.997, loss_scale=4, train_wall=10, gb_free=2.8, wall=130683 2021-06-20 06:56:59 | INFO | train_inner | epoch 004: 2470 / 3002 loss=2.618, ppl=6.14, wps=5906.1, ups=0.09, wpb=64858, bsz=128, num_updates=11408, lr=9.99167e-05, gnorm=1.957, loss_scale=4, train_wall=11, gb_free=2.8, wall=130694 2021-06-20 06:57:11 | INFO | train_inner | epoch 004: 2471 / 3002 loss=2.541, ppl=5.82, wps=5773.5, ups=0.09, wpb=64786, bsz=128, num_updates=11409, lr=9.99167e-05, gnorm=2.107, loss_scale=4, train_wall=11, gb_free=2.8, wall=130705 2021-06-20 06:57:22 | INFO | train_inner | epoch 004: 2472 / 3002 loss=2.344, ppl=5.08, wps=5831.7, ups=0.09, wpb=64843, bsz=128, num_updates=11410, lr=9.99167e-05, gnorm=2.031, loss_scale=4, train_wall=11, gb_free=2.8, wall=130716 2021-06-20 06:57:33 | INFO | train_inner | epoch 004: 2473 / 3002 loss=2.558, ppl=5.89, wps=5856.3, ups=0.09, wpb=64877, bsz=128, num_updates=11411, lr=9.99167e-05, gnorm=2.048, loss_scale=4, train_wall=11, gb_free=2.8, wall=130727 2021-06-20 06:57:44 | INFO | train_inner | epoch 004: 2474 / 3002 loss=2.455, ppl=5.48, wps=5776.5, ups=0.09, wpb=64848, bsz=128, num_updates=11412, lr=9.99167e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=130738 2021-06-20 06:57:55 | INFO | train_inner | epoch 004: 2475 / 3002 loss=2.535, ppl=5.8, wps=5893.9, ups=0.09, wpb=64834, bsz=128, num_updates=11413, lr=9.99167e-05, gnorm=2.104, loss_scale=4, train_wall=11, gb_free=2.8, wall=130749 2021-06-20 06:58:06 | INFO | train_inner | epoch 004: 2476 / 3002 loss=2.564, ppl=5.91, wps=5899.3, ups=0.09, wpb=64865, bsz=128, num_updates=11414, lr=9.99167e-05, gnorm=2.542, loss_scale=4, train_wall=11, gb_free=2.8, wall=130760 2021-06-20 06:58:17 | INFO | train_inner | epoch 004: 2477 / 3002 loss=2.602, ppl=6.07, wps=5815.8, ups=0.09, wpb=64879, bsz=128, num_updates=11415, lr=9.99167e-05, gnorm=2.048, loss_scale=4, train_wall=11, gb_free=2.8, wall=130772 2021-06-20 06:58:28 | INFO | train_inner | epoch 004: 2478 / 3002 loss=2.426, ppl=5.37, wps=5773, ups=0.09, wpb=64861, bsz=128, num_updates=11416, lr=9.99167e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=130783 2021-06-20 06:58:40 | INFO | train_inner | epoch 004: 2479 / 3002 loss=2.74, ppl=6.68, wps=5838.8, ups=0.09, wpb=64761, bsz=128, num_updates=11417, lr=9.99167e-05, gnorm=1.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=130794 2021-06-20 06:58:51 | INFO | train_inner | epoch 004: 2480 / 3002 loss=2.855, ppl=7.24, wps=5912.4, ups=0.09, wpb=64866, bsz=128, num_updates=11418, lr=9.99166e-05, gnorm=2.123, loss_scale=4, train_wall=11, gb_free=2.8, wall=130805 2021-06-20 06:59:02 | INFO | train_inner | epoch 004: 2481 / 3002 loss=2.592, ppl=6.03, wps=5778.5, ups=0.09, wpb=64746, bsz=128, num_updates=11419, lr=9.99166e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=130816 2021-06-20 06:59:13 | INFO | train_inner | epoch 004: 2482 / 3002 loss=2.404, ppl=5.29, wps=5781.9, ups=0.09, wpb=64845, bsz=128, num_updates=11420, lr=9.99166e-05, gnorm=1.974, loss_scale=4, train_wall=11, gb_free=2.8, wall=130827 2021-06-20 06:59:24 | INFO | train_inner | epoch 004: 2483 / 3002 loss=2.695, ppl=6.47, wps=5888.3, ups=0.09, wpb=64863, bsz=128, num_updates=11421, lr=9.99166e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=130838 2021-06-20 06:59:35 | INFO | train_inner | epoch 004: 2484 / 3002 loss=2.413, ppl=5.32, wps=5863.3, ups=0.09, wpb=64817, bsz=128, num_updates=11422, lr=9.99166e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=130849 2021-06-20 06:59:46 | INFO | train_inner | epoch 004: 2485 / 3002 loss=2.62, ppl=6.15, wps=5846.6, ups=0.09, wpb=64810, bsz=128, num_updates=11423, lr=9.99166e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=130860 2021-06-20 06:59:57 | INFO | train_inner | epoch 004: 2486 / 3002 loss=2.706, ppl=6.52, wps=5879.8, ups=0.09, wpb=64775, bsz=128, num_updates=11424, lr=9.99166e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=130871 2021-06-20 07:00:08 | INFO | train_inner | epoch 004: 2487 / 3002 loss=2.611, ppl=6.11, wps=5875.4, ups=0.09, wpb=64908, bsz=128, num_updates=11425, lr=9.99166e-05, gnorm=2.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=130883 2021-06-20 07:00:19 | INFO | train_inner | epoch 004: 2488 / 3002 loss=2.627, ppl=6.18, wps=5768.7, ups=0.09, wpb=64848, bsz=128, num_updates=11426, lr=9.99166e-05, gnorm=3.727, loss_scale=4, train_wall=11, gb_free=2.8, wall=130894 2021-06-20 07:00:30 | INFO | train_inner | epoch 004: 2489 / 3002 loss=2.61, ppl=6.1, wps=5896.2, ups=0.09, wpb=64861, bsz=128, num_updates=11427, lr=9.99166e-05, gnorm=1.983, loss_scale=4, train_wall=11, gb_free=2.8, wall=130905 2021-06-20 07:00:41 | INFO | train_inner | epoch 004: 2490 / 3002 loss=2.55, ppl=5.86, wps=5883.8, ups=0.09, wpb=64730, bsz=128, num_updates=11428, lr=9.99166e-05, gnorm=2.082, loss_scale=4, train_wall=11, gb_free=2.8, wall=130916 2021-06-20 07:00:52 | INFO | train_inner | epoch 004: 2491 / 3002 loss=2.387, ppl=5.23, wps=5934.9, ups=0.09, wpb=64850, bsz=128, num_updates=11429, lr=9.99166e-05, gnorm=1.984, loss_scale=4, train_wall=10, gb_free=2.8, wall=130927 2021-06-20 07:01:03 | INFO | train_inner | epoch 004: 2492 / 3002 loss=2.518, ppl=5.73, wps=5891.4, ups=0.09, wpb=64851, bsz=128, num_updates=11430, lr=9.99166e-05, gnorm=2.022, loss_scale=4, train_wall=11, gb_free=2.8, wall=130938 2021-06-20 07:01:14 | INFO | train_inner | epoch 004: 2493 / 3002 loss=2.597, ppl=6.05, wps=5876.5, ups=0.09, wpb=64860, bsz=128, num_updates=11431, lr=9.99165e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=130949 2021-06-20 07:01:26 | INFO | train_inner | epoch 004: 2494 / 3002 loss=2.539, ppl=5.81, wps=5832.2, ups=0.09, wpb=64818, bsz=128, num_updates=11432, lr=9.99165e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=130960 2021-06-20 07:01:37 | INFO | train_inner | epoch 004: 2495 / 3002 loss=2.493, ppl=5.63, wps=5708.6, ups=0.09, wpb=64883, bsz=128, num_updates=11433, lr=9.99165e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=130971 2021-06-20 07:01:48 | INFO | train_inner | epoch 004: 2496 / 3002 loss=2.545, ppl=5.84, wps=5792.1, ups=0.09, wpb=64855, bsz=128, num_updates=11434, lr=9.99165e-05, gnorm=2.211, loss_scale=4, train_wall=11, gb_free=2.8, wall=130982 2021-06-20 07:01:59 | INFO | train_inner | epoch 004: 2497 / 3002 loss=2.567, ppl=5.93, wps=5810.3, ups=0.09, wpb=64858, bsz=128, num_updates=11435, lr=9.99165e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=130994 2021-06-20 07:02:10 | INFO | train_inner | epoch 004: 2498 / 3002 loss=2.609, ppl=6.1, wps=5792.2, ups=0.09, wpb=64852, bsz=128, num_updates=11436, lr=9.99165e-05, gnorm=2.043, loss_scale=4, train_wall=11, gb_free=2.8, wall=131005 2021-06-20 07:02:22 | INFO | train_inner | epoch 004: 2499 / 3002 loss=2.506, ppl=5.68, wps=5754.3, ups=0.09, wpb=64844, bsz=128, num_updates=11437, lr=9.99165e-05, gnorm=2.106, loss_scale=4, train_wall=11, gb_free=2.8, wall=131016 2021-06-20 07:02:33 | INFO | train_inner | epoch 004: 2500 / 3002 loss=2.682, ppl=6.42, wps=5874.3, ups=0.09, wpb=64790, bsz=128, num_updates=11438, lr=9.99165e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=131027 2021-06-20 07:02:44 | INFO | train_inner | epoch 004: 2501 / 3002 loss=2.438, ppl=5.42, wps=5847, ups=0.09, wpb=64932, bsz=128, num_updates=11439, lr=9.99165e-05, gnorm=2.1, loss_scale=4, train_wall=11, gb_free=2.8, wall=131038 2021-06-20 07:02:55 | INFO | train_inner | epoch 004: 2502 / 3002 loss=2.485, ppl=5.6, wps=5991.5, ups=0.09, wpb=64866, bsz=128, num_updates=11440, lr=9.99165e-05, gnorm=11.2, loss_scale=4, train_wall=10, gb_free=2.8, wall=131049 2021-06-20 07:03:06 | INFO | train_inner | epoch 004: 2503 / 3002 loss=2.474, ppl=5.56, wps=5872.3, ups=0.09, wpb=64774, bsz=128, num_updates=11441, lr=9.99165e-05, gnorm=1.972, loss_scale=4, train_wall=11, gb_free=2.8, wall=131060 2021-06-20 07:03:17 | INFO | train_inner | epoch 004: 2504 / 3002 loss=2.6, ppl=6.06, wps=5868, ups=0.09, wpb=64860, bsz=128, num_updates=11442, lr=9.99165e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=131071 2021-06-20 07:03:28 | INFO | train_inner | epoch 004: 2505 / 3002 loss=2.486, ppl=5.6, wps=5874.6, ups=0.09, wpb=64827, bsz=128, num_updates=11443, lr=9.99164e-05, gnorm=4.772, loss_scale=4, train_wall=11, gb_free=2.8, wall=131082 2021-06-20 07:03:39 | INFO | train_inner | epoch 004: 2506 / 3002 loss=2.595, ppl=6.04, wps=5882, ups=0.09, wpb=64817, bsz=128, num_updates=11444, lr=9.99164e-05, gnorm=1.961, loss_scale=4, train_wall=11, gb_free=2.8, wall=131093 2021-06-20 07:03:50 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-20 07:04:01 | INFO | train_inner | epoch 004: 2508 / 3002 loss=2.41, ppl=5.31, wps=2951.1, ups=0.05, wpb=64770, bsz=128, num_updates=11445, lr=9.99164e-05, gnorm=2.001, loss_scale=2, train_wall=21, gb_free=2.8, wall=131115 2021-06-20 07:04:12 | INFO | train_inner | epoch 004: 2509 / 3002 loss=2.59, ppl=6.02, wps=5909, ups=0.09, wpb=64900, bsz=128, num_updates=11446, lr=9.99164e-05, gnorm=2.053, loss_scale=2, train_wall=11, gb_free=2.8, wall=131126 2021-06-20 07:04:23 | INFO | train_inner | epoch 004: 2510 / 3002 loss=2.695, ppl=6.47, wps=5796.1, ups=0.09, wpb=64828, bsz=128, num_updates=11447, lr=9.99164e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=131137 2021-06-20 07:04:34 | INFO | train_inner | epoch 004: 2511 / 3002 loss=2.383, ppl=5.22, wps=5859.1, ups=0.09, wpb=64877, bsz=128, num_updates=11448, lr=9.99164e-05, gnorm=2.257, loss_scale=2, train_wall=11, gb_free=2.8, wall=131148 2021-06-20 07:04:45 | INFO | train_inner | epoch 004: 2512 / 3002 loss=2.648, ppl=6.27, wps=5854, ups=0.09, wpb=64881, bsz=128, num_updates=11449, lr=9.99164e-05, gnorm=2.364, loss_scale=2, train_wall=11, gb_free=2.8, wall=131159 2021-06-20 07:04:56 | INFO | train_inner | epoch 004: 2513 / 3002 loss=2.419, ppl=5.35, wps=5787.6, ups=0.09, wpb=64826, bsz=128, num_updates=11450, lr=9.99164e-05, gnorm=2.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=131171 2021-06-20 07:05:07 | INFO | train_inner | epoch 004: 2514 / 3002 loss=2.665, ppl=6.34, wps=5795.1, ups=0.09, wpb=64753, bsz=128, num_updates=11451, lr=9.99164e-05, gnorm=3.172, loss_scale=2, train_wall=11, gb_free=2.8, wall=131182 2021-06-20 07:05:19 | INFO | train_inner | epoch 004: 2515 / 3002 loss=2.656, ppl=6.3, wps=5841.3, ups=0.09, wpb=64790, bsz=128, num_updates=11452, lr=9.99164e-05, gnorm=2.226, loss_scale=2, train_wall=11, gb_free=2.8, wall=131193 2021-06-20 07:05:30 | INFO | train_inner | epoch 004: 2516 / 3002 loss=2.563, ppl=5.91, wps=5785.4, ups=0.09, wpb=64871, bsz=128, num_updates=11453, lr=9.99164e-05, gnorm=2.023, loss_scale=2, train_wall=11, gb_free=2.8, wall=131204 2021-06-20 07:05:41 | INFO | train_inner | epoch 004: 2517 / 3002 loss=2.568, ppl=5.93, wps=5806.7, ups=0.09, wpb=64851, bsz=128, num_updates=11454, lr=9.99164e-05, gnorm=3.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=131215 2021-06-20 07:05:52 | INFO | train_inner | epoch 004: 2518 / 3002 loss=2.458, ppl=5.5, wps=5975.2, ups=0.09, wpb=64823, bsz=128, num_updates=11455, lr=9.99164e-05, gnorm=2.056, loss_scale=2, train_wall=10, gb_free=2.8, wall=131226 2021-06-20 07:06:03 | INFO | train_inner | epoch 004: 2519 / 3002 loss=2.601, ppl=6.07, wps=5788.4, ups=0.09, wpb=64881, bsz=128, num_updates=11456, lr=9.99163e-05, gnorm=2.248, loss_scale=2, train_wall=11, gb_free=2.8, wall=131237 2021-06-20 07:06:14 | INFO | train_inner | epoch 004: 2520 / 3002 loss=2.539, ppl=5.81, wps=5805.9, ups=0.09, wpb=64727, bsz=128, num_updates=11457, lr=9.99163e-05, gnorm=2.403, loss_scale=2, train_wall=11, gb_free=2.8, wall=131248 2021-06-20 07:06:25 | INFO | train_inner | epoch 004: 2521 / 3002 loss=2.683, ppl=6.42, wps=5873.6, ups=0.09, wpb=64783, bsz=128, num_updates=11458, lr=9.99163e-05, gnorm=2.656, loss_scale=2, train_wall=11, gb_free=2.8, wall=131260 2021-06-20 07:06:36 | INFO | train_inner | epoch 004: 2522 / 3002 loss=2.58, ppl=5.98, wps=5901.1, ups=0.09, wpb=64802, bsz=128, num_updates=11459, lr=9.99163e-05, gnorm=2.767, loss_scale=2, train_wall=11, gb_free=2.8, wall=131270 2021-06-20 07:06:47 | INFO | train_inner | epoch 004: 2523 / 3002 loss=2.527, ppl=5.77, wps=5830.2, ups=0.09, wpb=64822, bsz=128, num_updates=11460, lr=9.99163e-05, gnorm=4.741, loss_scale=2, train_wall=11, gb_free=2.8, wall=131282 2021-06-20 07:06:58 | INFO | train_inner | epoch 004: 2524 / 3002 loss=2.475, ppl=5.56, wps=5812, ups=0.09, wpb=64867, bsz=128, num_updates=11461, lr=9.99163e-05, gnorm=5.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=131293 2021-06-20 07:07:09 | INFO | train_inner | epoch 004: 2525 / 3002 loss=2.607, ppl=6.09, wps=5860.5, ups=0.09, wpb=64843, bsz=128, num_updates=11462, lr=9.99163e-05, gnorm=2.234, loss_scale=2, train_wall=11, gb_free=2.8, wall=131304 2021-06-20 07:07:20 | INFO | train_inner | epoch 004: 2526 / 3002 loss=2.559, ppl=5.89, wps=5925.1, ups=0.09, wpb=64843, bsz=128, num_updates=11463, lr=9.99163e-05, gnorm=2.067, loss_scale=2, train_wall=10, gb_free=2.8, wall=131315 2021-06-20 07:07:31 | INFO | train_inner | epoch 004: 2527 / 3002 loss=2.573, ppl=5.95, wps=5934.7, ups=0.09, wpb=64819, bsz=128, num_updates=11464, lr=9.99163e-05, gnorm=2.231, loss_scale=2, train_wall=10, gb_free=2.8, wall=131326 2021-06-20 07:07:42 | INFO | train_inner | epoch 004: 2528 / 3002 loss=2.553, ppl=5.87, wps=5823.2, ups=0.09, wpb=64775, bsz=128, num_updates=11465, lr=9.99163e-05, gnorm=3.443, loss_scale=2, train_wall=11, gb_free=2.8, wall=131337 2021-06-20 07:07:54 | INFO | train_inner | epoch 004: 2529 / 3002 loss=2.656, ppl=6.3, wps=5747.5, ups=0.09, wpb=64813, bsz=128, num_updates=11466, lr=9.99163e-05, gnorm=2.325, loss_scale=2, train_wall=11, gb_free=2.8, wall=131348 2021-06-20 07:08:05 | INFO | train_inner | epoch 004: 2530 / 3002 loss=2.536, ppl=5.8, wps=5862, ups=0.09, wpb=64879, bsz=128, num_updates=11467, lr=9.99163e-05, gnorm=2.408, loss_scale=2, train_wall=11, gb_free=2.8, wall=131359 2021-06-20 07:08:16 | INFO | train_inner | epoch 004: 2531 / 3002 loss=2.608, ppl=6.1, wps=5729.7, ups=0.09, wpb=64825, bsz=128, num_updates=11468, lr=9.99162e-05, gnorm=2.461, loss_scale=2, train_wall=11, gb_free=2.8, wall=131370 2021-06-20 07:08:27 | INFO | train_inner | epoch 004: 2532 / 3002 loss=2.67, ppl=6.36, wps=6076.8, ups=0.09, wpb=64843, bsz=128, num_updates=11469, lr=9.99162e-05, gnorm=2.91, loss_scale=2, train_wall=10, gb_free=2.8, wall=131381 2021-06-20 07:08:38 | INFO | train_inner | epoch 004: 2533 / 3002 loss=2.601, ppl=6.07, wps=5782.8, ups=0.09, wpb=64800, bsz=128, num_updates=11470, lr=9.99162e-05, gnorm=6.658, loss_scale=2, train_wall=11, gb_free=2.8, wall=131392 2021-06-20 07:08:49 | INFO | train_inner | epoch 004: 2534 / 3002 loss=2.518, ppl=5.73, wps=5990.5, ups=0.09, wpb=64970, bsz=128, num_updates=11471, lr=9.99162e-05, gnorm=2.961, loss_scale=2, train_wall=10, gb_free=2.8, wall=131403 2021-06-20 07:09:00 | INFO | train_inner | epoch 004: 2535 / 3002 loss=2.556, ppl=5.88, wps=5784.5, ups=0.09, wpb=64893, bsz=128, num_updates=11472, lr=9.99162e-05, gnorm=2.224, loss_scale=2, train_wall=11, gb_free=2.8, wall=131414 2021-06-20 07:09:11 | INFO | train_inner | epoch 004: 2536 / 3002 loss=2.609, ppl=6.1, wps=5812, ups=0.09, wpb=64790, bsz=128, num_updates=11473, lr=9.99162e-05, gnorm=6.582, loss_scale=2, train_wall=11, gb_free=2.8, wall=131426 2021-06-20 07:09:22 | INFO | train_inner | epoch 004: 2537 / 3002 loss=2.576, ppl=5.96, wps=5851, ups=0.09, wpb=64803, bsz=128, num_updates=11474, lr=9.99162e-05, gnorm=2.719, loss_scale=2, train_wall=11, gb_free=2.8, wall=131437 2021-06-20 07:09:33 | INFO | train_inner | epoch 004: 2538 / 3002 loss=2.721, ppl=6.59, wps=5910.4, ups=0.09, wpb=64824, bsz=128, num_updates=11475, lr=9.99162e-05, gnorm=11.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=131448 2021-06-20 07:09:44 | INFO | train_inner | epoch 004: 2539 / 3002 loss=2.536, ppl=5.8, wps=6062.6, ups=0.09, wpb=64924, bsz=128, num_updates=11476, lr=9.99162e-05, gnorm=2.389, loss_scale=2, train_wall=10, gb_free=2.8, wall=131458 2021-06-20 07:09:55 | INFO | train_inner | epoch 004: 2540 / 3002 loss=2.516, ppl=5.72, wps=5781.1, ups=0.09, wpb=64787, bsz=128, num_updates=11477, lr=9.99162e-05, gnorm=11.505, loss_scale=2, train_wall=11, gb_free=2.8, wall=131470 2021-06-20 07:10:06 | INFO | train_inner | epoch 004: 2541 / 3002 loss=2.442, ppl=5.44, wps=5843.4, ups=0.09, wpb=64847, bsz=128, num_updates=11478, lr=9.99162e-05, gnorm=2.268, loss_scale=2, train_wall=11, gb_free=2.8, wall=131481 2021-06-20 07:10:17 | INFO | train_inner | epoch 004: 2542 / 3002 loss=2.598, ppl=6.06, wps=5945.5, ups=0.09, wpb=64798, bsz=128, num_updates=11479, lr=9.99162e-05, gnorm=2.547, loss_scale=2, train_wall=10, gb_free=2.8, wall=131492 2021-06-20 07:10:28 | INFO | train_inner | epoch 004: 2543 / 3002 loss=2.527, ppl=5.76, wps=5755.2, ups=0.09, wpb=64913, bsz=128, num_updates=11480, lr=9.99162e-05, gnorm=2.39, loss_scale=2, train_wall=11, gb_free=2.8, wall=131503 2021-06-20 07:10:40 | INFO | train_inner | epoch 004: 2544 / 3002 loss=2.657, ppl=6.31, wps=5855.4, ups=0.09, wpb=64673, bsz=128, num_updates=11481, lr=9.99161e-05, gnorm=2.395, loss_scale=2, train_wall=11, gb_free=2.8, wall=131514 2021-06-20 07:10:51 | INFO | train_inner | epoch 004: 2545 / 3002 loss=2.526, ppl=5.76, wps=5825.6, ups=0.09, wpb=64846, bsz=128, num_updates=11482, lr=9.99161e-05, gnorm=2.355, loss_scale=2, train_wall=11, gb_free=2.8, wall=131525 2021-06-20 07:11:02 | INFO | train_inner | epoch 004: 2546 / 3002 loss=2.549, ppl=5.85, wps=5838, ups=0.09, wpb=64788, bsz=128, num_updates=11483, lr=9.99161e-05, gnorm=2.181, loss_scale=2, train_wall=11, gb_free=2.8, wall=131536 2021-06-20 07:11:13 | INFO | train_inner | epoch 004: 2547 / 3002 loss=2.569, ppl=5.93, wps=5741.2, ups=0.09, wpb=64834, bsz=128, num_updates=11484, lr=9.99161e-05, gnorm=2.317, loss_scale=2, train_wall=11, gb_free=2.8, wall=131547 2021-06-20 07:11:24 | INFO | train_inner | epoch 004: 2548 / 3002 loss=2.436, ppl=5.41, wps=5886.4, ups=0.09, wpb=64790, bsz=128, num_updates=11485, lr=9.99161e-05, gnorm=2.592, loss_scale=2, train_wall=11, gb_free=2.8, wall=131558 2021-06-20 07:11:35 | INFO | train_inner | epoch 004: 2549 / 3002 loss=2.73, ppl=6.64, wps=5821.4, ups=0.09, wpb=64913, bsz=128, num_updates=11486, lr=9.99161e-05, gnorm=4.453, loss_scale=2, train_wall=11, gb_free=2.8, wall=131570 2021-06-20 07:11:46 | INFO | train_inner | epoch 004: 2550 / 3002 loss=2.409, ppl=5.31, wps=5936.6, ups=0.09, wpb=64881, bsz=128, num_updates=11487, lr=9.99161e-05, gnorm=2.346, loss_scale=2, train_wall=10, gb_free=2.8, wall=131580 2021-06-20 07:11:57 | INFO | train_inner | epoch 004: 2551 / 3002 loss=2.555, ppl=5.88, wps=5937.6, ups=0.09, wpb=64836, bsz=128, num_updates=11488, lr=9.99161e-05, gnorm=2.191, loss_scale=2, train_wall=10, gb_free=2.8, wall=131591 2021-06-20 07:12:08 | INFO | train_inner | epoch 004: 2552 / 3002 loss=2.603, ppl=6.08, wps=5779.1, ups=0.09, wpb=64855, bsz=128, num_updates=11489, lr=9.99161e-05, gnorm=3.407, loss_scale=2, train_wall=11, gb_free=2.8, wall=131603 2021-06-20 07:12:19 | INFO | train_inner | epoch 004: 2553 / 3002 loss=2.734, ppl=6.65, wps=5935, ups=0.09, wpb=64895, bsz=128, num_updates=11490, lr=9.99161e-05, gnorm=2.962, loss_scale=2, train_wall=10, gb_free=2.8, wall=131614 2021-06-20 07:12:30 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 2021-06-20 07:12:41 | INFO | train_inner | epoch 004: 2555 / 3002 loss=2.644, ppl=6.25, wps=2962.1, ups=0.05, wpb=64865, bsz=128, num_updates=11491, lr=9.99161e-05, gnorm=2.491, loss_scale=1, train_wall=21, gb_free=2.8, wall=131635 2021-06-20 07:12:52 | INFO | train_inner | epoch 004: 2556 / 3002 loss=2.599, ppl=6.06, wps=5971.7, ups=0.09, wpb=64862, bsz=128, num_updates=11492, lr=9.99161e-05, gnorm=4.582, loss_scale=1, train_wall=10, gb_free=2.8, wall=131646 2021-06-20 07:13:03 | INFO | train_inner | epoch 004: 2557 / 3002 loss=2.697, ppl=6.49, wps=5963.7, ups=0.09, wpb=64841, bsz=128, num_updates=11493, lr=9.9916e-05, gnorm=2.513, loss_scale=1, train_wall=10, gb_free=2.8, wall=131657 2021-06-20 07:13:14 | INFO | train_inner | epoch 004: 2558 / 3002 loss=2.712, ppl=6.55, wps=5874.8, ups=0.09, wpb=64828, bsz=128, num_updates=11494, lr=9.9916e-05, gnorm=3.49, loss_scale=1, train_wall=11, gb_free=2.8, wall=131668 2021-06-20 07:13:25 | INFO | train_inner | epoch 004: 2559 / 3002 loss=2.877, ppl=7.35, wps=5838.3, ups=0.09, wpb=64748, bsz=128, num_updates=11495, lr=9.9916e-05, gnorm=6.398, loss_scale=1, train_wall=11, gb_free=2.8, wall=131679 2021-06-20 07:13:36 | INFO | train_inner | epoch 004: 2560 / 3002 loss=2.867, ppl=7.3, wps=5941.4, ups=0.09, wpb=64829, bsz=128, num_updates=11496, lr=9.9916e-05, gnorm=43.956, loss_scale=1, train_wall=10, gb_free=2.8, wall=131690 2021-06-20 07:13:47 | INFO | train_inner | epoch 004: 2561 / 3002 loss=2.753, ppl=6.74, wps=5925.5, ups=0.09, wpb=64882, bsz=128, num_updates=11497, lr=9.9916e-05, gnorm=4.981, loss_scale=1, train_wall=10, gb_free=2.8, wall=131701 2021-06-20 07:13:58 | INFO | train_inner | epoch 004: 2562 / 3002 loss=2.706, ppl=6.53, wps=5865.2, ups=0.09, wpb=64797, bsz=128, num_updates=11498, lr=9.9916e-05, gnorm=5.634, loss_scale=1, train_wall=11, gb_free=2.8, wall=131712 2021-06-20 07:14:09 | INFO | train_inner | epoch 004: 2563 / 3002 loss=2.603, ppl=6.08, wps=5759.7, ups=0.09, wpb=64785, bsz=128, num_updates=11499, lr=9.9916e-05, gnorm=2.975, loss_scale=1, train_wall=11, gb_free=2.8, wall=131723 2021-06-20 07:14:20 | INFO | train_inner | epoch 004: 2564 / 3002 loss=2.713, ppl=6.56, wps=5737.4, ups=0.09, wpb=64836, bsz=128, num_updates=11500, lr=9.9916e-05, gnorm=3.527, loss_scale=1, train_wall=11, gb_free=2.8, wall=131735 2021-06-20 07:14:31 | INFO | train_inner | epoch 004: 2565 / 3002 loss=2.639, ppl=6.23, wps=5890.1, ups=0.09, wpb=64806, bsz=128, num_updates=11501, lr=9.9916e-05, gnorm=3.035, loss_scale=1, train_wall=11, gb_free=2.8, wall=131746 2021-06-20 07:14:42 | INFO | train_inner | epoch 004: 2566 / 3002 loss=2.82, ppl=7.06, wps=5863.2, ups=0.09, wpb=64809, bsz=128, num_updates=11502, lr=9.9916e-05, gnorm=24.159, loss_scale=1, train_wall=11, gb_free=2.8, wall=131757 2021-06-20 07:14:54 | INFO | train_inner | epoch 004: 2567 / 3002 loss=2.648, ppl=6.27, wps=5838.8, ups=0.09, wpb=64830, bsz=128, num_updates=11503, lr=9.9916e-05, gnorm=4.07, loss_scale=1, train_wall=11, gb_free=2.8, wall=131768 2021-06-20 07:15:05 | INFO | train_inner | epoch 004: 2568 / 3002 loss=2.764, ppl=6.79, wps=5846.9, ups=0.09, wpb=64867, bsz=128, num_updates=11504, lr=9.9916e-05, gnorm=8.996, loss_scale=1, train_wall=11, gb_free=2.8, wall=131779 2021-06-20 07:15:16 | INFO | train_inner | epoch 004: 2569 / 3002 loss=2.651, ppl=6.28, wps=5848.7, ups=0.09, wpb=64772, bsz=128, num_updates=11505, lr=9.9916e-05, gnorm=8.962, loss_scale=1, train_wall=11, gb_free=2.8, wall=131790 2021-06-20 07:15:27 | INFO | train_inner | epoch 004: 2570 / 3002 loss=2.463, ppl=5.51, wps=5849.3, ups=0.09, wpb=64861, bsz=128, num_updates=11506, lr=9.99159e-05, gnorm=3.288, loss_scale=1, train_wall=11, gb_free=2.8, wall=131801 2021-06-20 07:15:38 | INFO | train_inner | epoch 004: 2571 / 3002 loss=2.713, ppl=6.56, wps=5857.5, ups=0.09, wpb=64763, bsz=128, num_updates=11507, lr=9.99159e-05, gnorm=3.215, loss_scale=1, train_wall=11, gb_free=2.8, wall=131812 2021-06-20 07:15:49 | INFO | train_inner | epoch 004: 2572 / 3002 loss=2.596, ppl=6.05, wps=6056.7, ups=0.09, wpb=64744, bsz=128, num_updates=11508, lr=9.99159e-05, gnorm=2.59, loss_scale=1, train_wall=10, gb_free=2.8, wall=131823 2021-06-20 07:16:00 | INFO | train_inner | epoch 004: 2573 / 3002 loss=2.546, ppl=5.84, wps=5836.3, ups=0.09, wpb=64775, bsz=128, num_updates=11509, lr=9.99159e-05, gnorm=2.528, loss_scale=1, train_wall=11, gb_free=2.8, wall=131834 2021-06-20 07:16:11 | INFO | train_inner | epoch 004: 2574 / 3002 loss=2.643, ppl=6.24, wps=5858.5, ups=0.09, wpb=64830, bsz=128, num_updates=11510, lr=9.99159e-05, gnorm=3.786, loss_scale=1, train_wall=11, gb_free=2.8, wall=131845 2021-06-20 07:16:22 | INFO | train_inner | epoch 004: 2575 / 3002 loss=2.603, ppl=6.07, wps=5921.8, ups=0.09, wpb=64838, bsz=128, num_updates=11511, lr=9.99159e-05, gnorm=2.615, loss_scale=1, train_wall=11, gb_free=2.8, wall=131856 2021-06-20 07:16:33 | INFO | train_inner | epoch 004: 2576 / 3002 loss=2.759, ppl=6.77, wps=5807.1, ups=0.09, wpb=64835, bsz=128, num_updates=11512, lr=9.99159e-05, gnorm=2.609, loss_scale=1, train_wall=11, gb_free=2.8, wall=131867 2021-06-20 07:16:44 | INFO | train_inner | epoch 004: 2577 / 3002 loss=2.529, ppl=5.77, wps=5786.8, ups=0.09, wpb=64896, bsz=128, num_updates=11513, lr=9.99159e-05, gnorm=5.721, loss_scale=1, train_wall=11, gb_free=2.8, wall=131878 2021-06-20 07:16:55 | INFO | train_inner | epoch 004: 2578 / 3002 loss=2.606, ppl=6.09, wps=5894, ups=0.09, wpb=64780, bsz=128, num_updates=11514, lr=9.99159e-05, gnorm=4.129, loss_scale=1, train_wall=11, gb_free=2.8, wall=131889 2021-06-20 07:17:06 | INFO | train_inner | epoch 004: 2579 / 3002 loss=2.757, ppl=6.76, wps=5779.6, ups=0.09, wpb=64903, bsz=128, num_updates=11515, lr=9.99159e-05, gnorm=2.307, loss_scale=1, train_wall=11, gb_free=2.8, wall=131901 2021-06-20 07:17:17 | INFO | train_inner | epoch 004: 2580 / 3002 loss=2.547, ppl=5.85, wps=5907.3, ups=0.09, wpb=64819, bsz=128, num_updates=11516, lr=9.99159e-05, gnorm=2.761, loss_scale=1, train_wall=11, gb_free=2.8, wall=131912 2021-06-20 07:17:28 | INFO | train_inner | epoch 004: 2581 / 3002 loss=2.694, ppl=6.47, wps=5872.5, ups=0.09, wpb=64796, bsz=128, num_updates=11517, lr=9.99159e-05, gnorm=2.737, loss_scale=1, train_wall=11, gb_free=2.8, wall=131923 2021-06-20 07:17:40 | INFO | train_inner | epoch 004: 2582 / 3002 loss=2.595, ppl=6.04, wps=5784.5, ups=0.09, wpb=64829, bsz=128, num_updates=11518, lr=9.99158e-05, gnorm=2.609, loss_scale=1, train_wall=11, gb_free=2.8, wall=131934 2021-06-20 07:17:50 | INFO | train_inner | epoch 004: 2583 / 3002 loss=2.695, ppl=6.48, wps=5966.8, ups=0.09, wpb=64846, bsz=128, num_updates=11519, lr=9.99158e-05, gnorm=2.764, loss_scale=1, train_wall=10, gb_free=2.8, wall=131945 2021-06-20 07:18:01 | INFO | train_inner | epoch 004: 2584 / 3002 loss=2.731, ppl=6.64, wps=5840.6, ups=0.09, wpb=64817, bsz=128, num_updates=11520, lr=9.99158e-05, gnorm=2.228, loss_scale=1, train_wall=11, gb_free=2.8, wall=131956 2021-06-20 07:18:13 | INFO | train_inner | epoch 004: 2585 / 3002 loss=2.655, ppl=6.3, wps=5845.5, ups=0.09, wpb=64836, bsz=128, num_updates=11521, lr=9.99158e-05, gnorm=2.232, loss_scale=1, train_wall=11, gb_free=2.8, wall=131967 2021-06-20 07:18:24 | INFO | train_inner | epoch 004: 2586 / 3002 loss=2.604, ppl=6.08, wps=5805.4, ups=0.09, wpb=64819, bsz=128, num_updates=11522, lr=9.99158e-05, gnorm=3.16, loss_scale=1, train_wall=11, gb_free=2.8, wall=131978 2021-06-20 07:18:35 | INFO | train_inner | epoch 004: 2587 / 3002 loss=2.711, ppl=6.55, wps=5961.4, ups=0.09, wpb=64843, bsz=128, num_updates=11523, lr=9.99158e-05, gnorm=2.125, loss_scale=1, train_wall=10, gb_free=2.8, wall=131989 2021-06-20 07:18:46 | INFO | train_inner | epoch 004: 2588 / 3002 loss=2.582, ppl=5.99, wps=5900.9, ups=0.09, wpb=64851, bsz=128, num_updates=11524, lr=9.99158e-05, gnorm=3.085, loss_scale=1, train_wall=11, gb_free=2.8, wall=132000 2021-06-20 07:18:57 | INFO | train_inner | epoch 004: 2589 / 3002 loss=2.457, ppl=5.49, wps=5835.1, ups=0.09, wpb=64825, bsz=128, num_updates=11525, lr=9.99158e-05, gnorm=4.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=132011 2021-06-20 07:19:08 | INFO | train_inner | epoch 004: 2590 / 3002 loss=2.466, ppl=5.53, wps=5847.5, ups=0.09, wpb=64822, bsz=128, num_updates=11526, lr=9.99158e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=132022 2021-06-20 07:19:19 | INFO | train_inner | epoch 004: 2591 / 3002 loss=2.621, ppl=6.15, wps=5925.8, ups=0.09, wpb=64832, bsz=128, num_updates=11527, lr=9.99158e-05, gnorm=2.309, loss_scale=1, train_wall=10, gb_free=2.8, wall=132033 2021-06-20 07:19:30 | INFO | train_inner | epoch 004: 2592 / 3002 loss=2.601, ppl=6.07, wps=5904.6, ups=0.09, wpb=64787, bsz=128, num_updates=11528, lr=9.99158e-05, gnorm=2.145, loss_scale=1, train_wall=11, gb_free=2.8, wall=132044 2021-06-20 07:19:41 | INFO | train_inner | epoch 004: 2593 / 3002 loss=2.605, ppl=6.09, wps=5916.9, ups=0.09, wpb=64905, bsz=128, num_updates=11529, lr=9.99158e-05, gnorm=2.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=132055 2021-06-20 07:19:52 | INFO | train_inner | epoch 004: 2594 / 3002 loss=2.423, ppl=5.36, wps=5829.8, ups=0.09, wpb=64755, bsz=128, num_updates=11530, lr=9.99158e-05, gnorm=2.201, loss_scale=1, train_wall=11, gb_free=2.8, wall=132066 2021-06-20 07:20:03 | INFO | train_inner | epoch 004: 2595 / 3002 loss=2.632, ppl=6.2, wps=5875.7, ups=0.09, wpb=64870, bsz=128, num_updates=11531, lr=9.99157e-05, gnorm=2.235, loss_scale=1, train_wall=11, gb_free=2.8, wall=132077 2021-06-20 07:20:14 | INFO | train_inner | epoch 004: 2596 / 3002 loss=2.495, ppl=5.64, wps=5821.1, ups=0.09, wpb=64834, bsz=128, num_updates=11532, lr=9.99157e-05, gnorm=2.06, loss_scale=1, train_wall=11, gb_free=2.8, wall=132088 2021-06-20 07:20:25 | INFO | train_inner | epoch 004: 2597 / 3002 loss=2.511, ppl=5.7, wps=5852.7, ups=0.09, wpb=64783, bsz=128, num_updates=11533, lr=9.99157e-05, gnorm=2.154, loss_scale=1, train_wall=11, gb_free=2.8, wall=132099 2021-06-20 07:20:36 | INFO | train_inner | epoch 004: 2598 / 3002 loss=2.388, ppl=5.23, wps=6002.3, ups=0.09, wpb=64843, bsz=128, num_updates=11534, lr=9.99157e-05, gnorm=2.075, loss_scale=1, train_wall=10, gb_free=2.8, wall=132110 2021-06-20 07:20:47 | INFO | train_inner | epoch 004: 2599 / 3002 loss=2.431, ppl=5.39, wps=5839, ups=0.09, wpb=64783, bsz=128, num_updates=11535, lr=9.99157e-05, gnorm=2.116, loss_scale=1, train_wall=11, gb_free=2.8, wall=132121 2021-06-20 07:20:58 | INFO | train_inner | epoch 004: 2600 / 3002 loss=2.539, ppl=5.81, wps=6052.8, ups=0.09, wpb=64876, bsz=128, num_updates=11536, lr=9.99157e-05, gnorm=2.246, loss_scale=1, train_wall=10, gb_free=2.8, wall=132132 2021-06-20 07:21:09 | INFO | train_inner | epoch 004: 2601 / 3002 loss=2.705, ppl=6.52, wps=5858.2, ups=0.09, wpb=64840, bsz=128, num_updates=11537, lr=9.99157e-05, gnorm=2.02, loss_scale=1, train_wall=11, gb_free=2.8, wall=132143 2021-06-20 07:21:20 | INFO | train_inner | epoch 004: 2602 / 3002 loss=2.717, ppl=6.57, wps=5916.2, ups=0.09, wpb=64848, bsz=128, num_updates=11538, lr=9.99157e-05, gnorm=2.309, loss_scale=1, train_wall=11, gb_free=2.8, wall=132154 2021-06-20 07:21:31 | INFO | train_inner | epoch 004: 2603 / 3002 loss=2.551, ppl=5.86, wps=5871.5, ups=0.09, wpb=64824, bsz=128, num_updates=11539, lr=9.99157e-05, gnorm=2.15, loss_scale=1, train_wall=11, gb_free=2.8, wall=132165 2021-06-20 07:21:42 | INFO | train_inner | epoch 004: 2604 / 3002 loss=2.599, ppl=6.06, wps=5878.6, ups=0.09, wpb=64894, bsz=128, num_updates=11540, lr=9.99157e-05, gnorm=2.196, loss_scale=1, train_wall=11, gb_free=2.8, wall=132176 2021-06-20 07:21:53 | INFO | train_inner | epoch 004: 2605 / 3002 loss=2.55, ppl=5.86, wps=5816.5, ups=0.09, wpb=64839, bsz=128, num_updates=11541, lr=9.99157e-05, gnorm=2.163, loss_scale=1, train_wall=11, gb_free=2.8, wall=132187 2021-06-20 07:22:04 | INFO | train_inner | epoch 004: 2606 / 3002 loss=2.654, ppl=6.29, wps=5863.2, ups=0.09, wpb=64715, bsz=128, num_updates=11542, lr=9.99157e-05, gnorm=2.119, loss_scale=1, train_wall=11, gb_free=2.8, wall=132198 2021-06-20 07:22:15 | INFO | train_inner | epoch 004: 2607 / 3002 loss=2.416, ppl=5.34, wps=5890.6, ups=0.09, wpb=64813, bsz=128, num_updates=11543, lr=9.99156e-05, gnorm=2.225, loss_scale=1, train_wall=11, gb_free=2.8, wall=132209 2021-06-20 07:22:26 | INFO | train_inner | epoch 004: 2608 / 3002 loss=2.509, ppl=5.69, wps=5850.3, ups=0.09, wpb=64842, bsz=128, num_updates=11544, lr=9.99156e-05, gnorm=2.04, loss_scale=1, train_wall=11, gb_free=2.8, wall=132220 2021-06-20 07:22:37 | INFO | train_inner | epoch 004: 2609 / 3002 loss=2.393, ppl=5.25, wps=5830.8, ups=0.09, wpb=64806, bsz=128, num_updates=11545, lr=9.99156e-05, gnorm=2.043, loss_scale=1, train_wall=11, gb_free=2.8, wall=132231 2021-06-20 07:22:48 | INFO | train_inner | epoch 004: 2610 / 3002 loss=2.538, ppl=5.81, wps=5772.1, ups=0.09, wpb=64760, bsz=128, num_updates=11546, lr=9.99156e-05, gnorm=2.082, loss_scale=1, train_wall=11, gb_free=2.8, wall=132243 2021-06-20 07:22:59 | INFO | train_inner | epoch 004: 2611 / 3002 loss=2.442, ppl=5.43, wps=5859.7, ups=0.09, wpb=64848, bsz=128, num_updates=11547, lr=9.99156e-05, gnorm=2.018, loss_scale=1, train_wall=11, gb_free=2.8, wall=132254 2021-06-20 07:23:11 | INFO | train_inner | epoch 004: 2612 / 3002 loss=2.642, ppl=6.24, wps=5838.5, ups=0.09, wpb=64794, bsz=128, num_updates=11548, lr=9.99156e-05, gnorm=2.03, loss_scale=1, train_wall=11, gb_free=2.8, wall=132265 2021-06-20 07:23:22 | INFO | train_inner | epoch 004: 2613 / 3002 loss=2.538, ppl=5.81, wps=5883.7, ups=0.09, wpb=64815, bsz=128, num_updates=11549, lr=9.99156e-05, gnorm=2.064, loss_scale=1, train_wall=11, gb_free=2.8, wall=132276 2021-06-20 07:23:33 | INFO | train_inner | epoch 004: 2614 / 3002 loss=2.505, ppl=5.68, wps=5839.3, ups=0.09, wpb=64790, bsz=128, num_updates=11550, lr=9.99156e-05, gnorm=2.177, loss_scale=1, train_wall=11, gb_free=2.8, wall=132287 2021-06-20 07:23:44 | INFO | train_inner | epoch 004: 2615 / 3002 loss=2.531, ppl=5.78, wps=5902, ups=0.09, wpb=64762, bsz=128, num_updates=11551, lr=9.99156e-05, gnorm=2.001, loss_scale=1, train_wall=10, gb_free=2.8, wall=132298 2021-06-20 07:23:55 | INFO | train_inner | epoch 004: 2616 / 3002 loss=2.453, ppl=5.48, wps=5913.1, ups=0.09, wpb=64754, bsz=128, num_updates=11552, lr=9.99156e-05, gnorm=2.172, loss_scale=1, train_wall=11, gb_free=2.8, wall=132309 2021-06-20 07:24:06 | INFO | train_inner | epoch 004: 2617 / 3002 loss=2.6, ppl=6.06, wps=5854.3, ups=0.09, wpb=64855, bsz=128, num_updates=11553, lr=9.99156e-05, gnorm=2.057, loss_scale=1, train_wall=11, gb_free=2.8, wall=132320 2021-06-20 07:24:17 | INFO | train_inner | epoch 004: 2618 / 3002 loss=2.684, ppl=6.43, wps=5914.1, ups=0.09, wpb=64802, bsz=128, num_updates=11554, lr=9.99156e-05, gnorm=2.29, loss_scale=1, train_wall=10, gb_free=2.8, wall=132331 2021-06-20 07:24:28 | INFO | train_inner | epoch 004: 2619 / 3002 loss=2.73, ppl=6.64, wps=5941.7, ups=0.09, wpb=64864, bsz=128, num_updates=11555, lr=9.99156e-05, gnorm=2.128, loss_scale=1, train_wall=10, gb_free=2.8, wall=132342 2021-06-20 07:24:38 | INFO | train_inner | epoch 004: 2620 / 3002 loss=2.647, ppl=6.26, wps=5944.9, ups=0.09, wpb=64817, bsz=128, num_updates=11556, lr=9.99155e-05, gnorm=2.037, loss_scale=1, train_wall=10, gb_free=2.8, wall=132353 2021-06-20 07:24:50 | INFO | train_inner | epoch 004: 2621 / 3002 loss=2.764, ppl=6.79, wps=5848.7, ups=0.09, wpb=64781, bsz=128, num_updates=11557, lr=9.99155e-05, gnorm=4.047, loss_scale=1, train_wall=11, gb_free=2.8, wall=132364 2021-06-20 07:25:01 | INFO | train_inner | epoch 004: 2622 / 3002 loss=2.518, ppl=5.73, wps=5884.4, ups=0.09, wpb=64822, bsz=128, num_updates=11558, lr=9.99155e-05, gnorm=2.108, loss_scale=1, train_wall=11, gb_free=2.8, wall=132375 2021-06-20 07:25:11 | INFO | train_inner | epoch 004: 2623 / 3002 loss=2.535, ppl=5.79, wps=6005.3, ups=0.09, wpb=64818, bsz=128, num_updates=11559, lr=9.99155e-05, gnorm=2.072, loss_scale=1, train_wall=10, gb_free=2.8, wall=132386 2021-06-20 07:25:22 | INFO | train_inner | epoch 004: 2624 / 3002 loss=2.488, ppl=5.61, wps=5861.4, ups=0.09, wpb=64821, bsz=128, num_updates=11560, lr=9.99155e-05, gnorm=2.035, loss_scale=1, train_wall=11, gb_free=2.8, wall=132397 2021-06-20 07:25:33 | INFO | train_inner | epoch 004: 2625 / 3002 loss=2.689, ppl=6.45, wps=5861.6, ups=0.09, wpb=64828, bsz=128, num_updates=11561, lr=9.99155e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=132408 2021-06-20 07:25:44 | INFO | train_inner | epoch 004: 2626 / 3002 loss=2.645, ppl=6.25, wps=5846.8, ups=0.09, wpb=64624, bsz=128, num_updates=11562, lr=9.99155e-05, gnorm=2.045, loss_scale=1, train_wall=11, gb_free=2.8, wall=132419 2021-06-20 07:25:56 | INFO | train_inner | epoch 004: 2627 / 3002 loss=2.605, ppl=6.09, wps=5833.3, ups=0.09, wpb=64886, bsz=128, num_updates=11563, lr=9.99155e-05, gnorm=2.1, loss_scale=1, train_wall=11, gb_free=2.8, wall=132430 2021-06-20 07:26:07 | INFO | train_inner | epoch 004: 2628 / 3002 loss=2.49, ppl=5.62, wps=5824.1, ups=0.09, wpb=64817, bsz=128, num_updates=11564, lr=9.99155e-05, gnorm=2.098, loss_scale=1, train_wall=11, gb_free=2.8, wall=132441 2021-06-20 07:26:18 | INFO | train_inner | epoch 004: 2629 / 3002 loss=2.563, ppl=5.91, wps=5947.5, ups=0.09, wpb=64831, bsz=128, num_updates=11565, lr=9.99155e-05, gnorm=2.034, loss_scale=1, train_wall=10, gb_free=2.8, wall=132452 2021-06-20 07:26:29 | INFO | train_inner | epoch 004: 2630 / 3002 loss=2.597, ppl=6.05, wps=5878.5, ups=0.09, wpb=64802, bsz=128, num_updates=11566, lr=9.99155e-05, gnorm=2.348, loss_scale=1, train_wall=11, gb_free=2.8, wall=132463 2021-06-20 07:26:40 | INFO | train_inner | epoch 004: 2631 / 3002 loss=2.38, ppl=5.21, wps=5891.5, ups=0.09, wpb=64878, bsz=128, num_updates=11567, lr=9.99155e-05, gnorm=1.993, loss_scale=1, train_wall=11, gb_free=2.8, wall=132474 2021-06-20 07:26:51 | INFO | train_inner | epoch 004: 2632 / 3002 loss=2.589, ppl=6.02, wps=5875.7, ups=0.09, wpb=64791, bsz=128, num_updates=11568, lr=9.99154e-05, gnorm=2.359, loss_scale=1, train_wall=11, gb_free=2.8, wall=132485 2021-06-20 07:27:02 | INFO | train_inner | epoch 004: 2633 / 3002 loss=2.567, ppl=5.93, wps=5955, ups=0.09, wpb=64831, bsz=128, num_updates=11569, lr=9.99154e-05, gnorm=2.081, loss_scale=1, train_wall=10, gb_free=2.8, wall=132496 2021-06-20 07:27:13 | INFO | train_inner | epoch 004: 2634 / 3002 loss=2.569, ppl=5.94, wps=5743.1, ups=0.09, wpb=64758, bsz=128, num_updates=11570, lr=9.99154e-05, gnorm=4.927, loss_scale=1, train_wall=11, gb_free=2.8, wall=132507 2021-06-20 07:27:24 | INFO | train_inner | epoch 004: 2635 / 3002 loss=2.415, ppl=5.33, wps=5872.7, ups=0.09, wpb=64700, bsz=128, num_updates=11571, lr=9.99154e-05, gnorm=1.936, loss_scale=1, train_wall=11, gb_free=2.8, wall=132518 2021-06-20 07:27:35 | INFO | train_inner | epoch 004: 2636 / 3002 loss=2.719, ppl=6.58, wps=5724.5, ups=0.09, wpb=64823, bsz=128, num_updates=11572, lr=9.99154e-05, gnorm=2.056, loss_scale=1, train_wall=11, gb_free=2.8, wall=132530 2021-06-20 07:27:46 | INFO | train_inner | epoch 004: 2637 / 3002 loss=2.622, ppl=6.16, wps=5885, ups=0.09, wpb=64826, bsz=128, num_updates=11573, lr=9.99154e-05, gnorm=2.293, loss_scale=1, train_wall=11, gb_free=2.8, wall=132541 2021-06-20 07:27:57 | INFO | train_inner | epoch 004: 2638 / 3002 loss=2.482, ppl=5.59, wps=5943.6, ups=0.09, wpb=64845, bsz=128, num_updates=11574, lr=9.99154e-05, gnorm=1.911, loss_scale=1, train_wall=10, gb_free=2.8, wall=132551 2021-06-20 07:28:08 | INFO | train_inner | epoch 004: 2639 / 3002 loss=2.5, ppl=5.66, wps=5777.3, ups=0.09, wpb=64785, bsz=128, num_updates=11575, lr=9.99154e-05, gnorm=2.036, loss_scale=1, train_wall=11, gb_free=2.8, wall=132563 2021-06-20 07:28:19 | INFO | train_inner | epoch 004: 2640 / 3002 loss=2.448, ppl=5.46, wps=5871.5, ups=0.09, wpb=64821, bsz=128, num_updates=11576, lr=9.99154e-05, gnorm=46.11, loss_scale=1, train_wall=11, gb_free=2.8, wall=132574 2021-06-20 07:28:30 | INFO | train_inner | epoch 004: 2641 / 3002 loss=2.581, ppl=5.98, wps=5902.8, ups=0.09, wpb=64838, bsz=128, num_updates=11577, lr=9.99154e-05, gnorm=2.131, loss_scale=1, train_wall=11, gb_free=2.8, wall=132585 2021-06-20 07:28:42 | INFO | train_inner | epoch 004: 2642 / 3002 loss=2.559, ppl=5.89, wps=5829, ups=0.09, wpb=64882, bsz=128, num_updates=11578, lr=9.99154e-05, gnorm=2.147, loss_scale=1, train_wall=11, gb_free=2.8, wall=132596 2021-06-20 07:28:53 | INFO | train_inner | epoch 004: 2643 / 3002 loss=2.785, ppl=6.89, wps=5835.9, ups=0.09, wpb=64808, bsz=128, num_updates=11579, lr=9.99154e-05, gnorm=3.988, loss_scale=1, train_wall=11, gb_free=2.8, wall=132607 2021-06-20 07:29:04 | INFO | train_inner | epoch 004: 2644 / 3002 loss=2.461, ppl=5.51, wps=5832.1, ups=0.09, wpb=64829, bsz=128, num_updates=11580, lr=9.99154e-05, gnorm=1.945, loss_scale=1, train_wall=11, gb_free=2.8, wall=132618 2021-06-20 07:29:15 | INFO | train_inner | epoch 004: 2645 / 3002 loss=2.656, ppl=6.3, wps=5816.8, ups=0.09, wpb=64749, bsz=128, num_updates=11581, lr=9.99153e-05, gnorm=2.463, loss_scale=1, train_wall=11, gb_free=2.8, wall=132629 2021-06-20 07:29:26 | INFO | train_inner | epoch 004: 2646 / 3002 loss=2.539, ppl=5.81, wps=5835.9, ups=0.09, wpb=64836, bsz=128, num_updates=11582, lr=9.99153e-05, gnorm=2.063, loss_scale=1, train_wall=11, gb_free=2.8, wall=132640 2021-06-20 07:29:37 | INFO | train_inner | epoch 004: 2647 / 3002 loss=2.401, ppl=5.28, wps=5877.4, ups=0.09, wpb=64826, bsz=128, num_updates=11583, lr=9.99153e-05, gnorm=2.537, loss_scale=1, train_wall=11, gb_free=2.8, wall=132651 2021-06-20 07:29:48 | INFO | train_inner | epoch 004: 2648 / 3002 loss=2.469, ppl=5.54, wps=5842.5, ups=0.09, wpb=64739, bsz=128, num_updates=11584, lr=9.99153e-05, gnorm=2.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=132662 2021-06-20 07:29:59 | INFO | train_inner | epoch 004: 2649 / 3002 loss=2.607, ppl=6.09, wps=5834.1, ups=0.09, wpb=64811, bsz=128, num_updates=11585, lr=9.99153e-05, gnorm=2.229, loss_scale=1, train_wall=11, gb_free=2.8, wall=132674 2021-06-20 07:30:10 | INFO | train_inner | epoch 004: 2650 / 3002 loss=2.582, ppl=5.99, wps=5902.6, ups=0.09, wpb=64809, bsz=128, num_updates=11586, lr=9.99153e-05, gnorm=2.217, loss_scale=1, train_wall=11, gb_free=2.8, wall=132685 2021-06-20 07:30:21 | INFO | train_inner | epoch 004: 2651 / 3002 loss=2.454, ppl=5.48, wps=5870.5, ups=0.09, wpb=64767, bsz=128, num_updates=11587, lr=9.99153e-05, gnorm=7.223, loss_scale=1, train_wall=11, gb_free=2.8, wall=132696 2021-06-20 07:30:32 | INFO | train_inner | epoch 004: 2652 / 3002 loss=2.496, ppl=5.64, wps=5942.8, ups=0.09, wpb=64868, bsz=128, num_updates=11588, lr=9.99153e-05, gnorm=2.552, loss_scale=1, train_wall=10, gb_free=2.8, wall=132706 2021-06-20 07:30:43 | INFO | train_inner | epoch 004: 2653 / 3002 loss=2.584, ppl=6, wps=5859.8, ups=0.09, wpb=64825, bsz=128, num_updates=11589, lr=9.99153e-05, gnorm=9.119, loss_scale=1, train_wall=11, gb_free=2.8, wall=132718 2021-06-20 07:30:54 | INFO | train_inner | epoch 004: 2654 / 3002 loss=2.696, ppl=6.48, wps=5913.3, ups=0.09, wpb=64749, bsz=128, num_updates=11590, lr=9.99153e-05, gnorm=2.156, loss_scale=1, train_wall=10, gb_free=2.8, wall=132728 2021-06-20 07:31:05 | INFO | train_inner | epoch 004: 2655 / 3002 loss=2.598, ppl=6.06, wps=5873.1, ups=0.09, wpb=64850, bsz=128, num_updates=11591, lr=9.99153e-05, gnorm=5.834, loss_scale=1, train_wall=11, gb_free=2.8, wall=132740 2021-06-20 07:31:16 | INFO | train_inner | epoch 004: 2656 / 3002 loss=2.548, ppl=5.85, wps=5851.5, ups=0.09, wpb=64854, bsz=128, num_updates=11592, lr=9.99153e-05, gnorm=2.403, loss_scale=1, train_wall=11, gb_free=2.8, wall=132751 2021-06-20 07:31:27 | INFO | train_inner | epoch 004: 2657 / 3002 loss=2.536, ppl=5.8, wps=5784.2, ups=0.09, wpb=64826, bsz=128, num_updates=11593, lr=9.99152e-05, gnorm=2.929, loss_scale=1, train_wall=11, gb_free=2.8, wall=132762 2021-06-20 07:31:39 | INFO | train_inner | epoch 004: 2658 / 3002 loss=2.705, ppl=6.52, wps=5770.7, ups=0.09, wpb=64813, bsz=128, num_updates=11594, lr=9.99152e-05, gnorm=2.804, loss_scale=1, train_wall=11, gb_free=2.8, wall=132773 2021-06-20 07:31:50 | INFO | train_inner | epoch 004: 2659 / 3002 loss=2.738, ppl=6.67, wps=5974.1, ups=0.09, wpb=64891, bsz=128, num_updates=11595, lr=9.99152e-05, gnorm=5.11, loss_scale=1, train_wall=10, gb_free=2.8, wall=132784 2021-06-20 07:32:01 | INFO | train_inner | epoch 004: 2660 / 3002 loss=2.541, ppl=5.82, wps=5891.4, ups=0.09, wpb=64893, bsz=128, num_updates=11596, lr=9.99152e-05, gnorm=3.674, loss_scale=1, train_wall=11, gb_free=2.8, wall=132795 2021-06-20 07:32:12 | INFO | train_inner | epoch 004: 2661 / 3002 loss=2.563, ppl=5.91, wps=5832.7, ups=0.09, wpb=64894, bsz=128, num_updates=11597, lr=9.99152e-05, gnorm=10.939, loss_scale=1, train_wall=11, gb_free=2.8, wall=132806 2021-06-20 07:32:23 | INFO | train_inner | epoch 004: 2662 / 3002 loss=2.691, ppl=6.46, wps=5910.9, ups=0.09, wpb=64821, bsz=128, num_updates=11598, lr=9.99152e-05, gnorm=2.564, loss_scale=1, train_wall=11, gb_free=2.8, wall=132817 2021-06-20 07:32:34 | INFO | train_inner | epoch 004: 2663 / 3002 loss=2.606, ppl=6.09, wps=5861.5, ups=0.09, wpb=64852, bsz=128, num_updates=11599, lr=9.99152e-05, gnorm=3.21, loss_scale=1, train_wall=11, gb_free=2.8, wall=132828 2021-06-20 07:32:45 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 2021-06-20 07:32:56 | INFO | train_inner | epoch 004: 2665 / 3002 loss=2.815, ppl=7.04, wps=2921.5, ups=0.05, wpb=64814, bsz=128, num_updates=11600, lr=9.99152e-05, gnorm=7.311, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=132850 2021-06-20 07:33:07 | INFO | train_inner | epoch 004: 2666 / 3002 loss=2.573, ppl=5.95, wps=5860, ups=0.09, wpb=64886, bsz=128, num_updates=11601, lr=9.99152e-05, gnorm=3.505, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132861 2021-06-20 07:33:18 | INFO | train_inner | epoch 004: 2667 / 3002 loss=2.519, ppl=5.73, wps=5898.8, ups=0.09, wpb=64821, bsz=128, num_updates=11602, lr=9.99152e-05, gnorm=4.59, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132872 2021-06-20 07:33:29 | INFO | train_inner | epoch 004: 2668 / 3002 loss=2.684, ppl=6.42, wps=5752.4, ups=0.09, wpb=64849, bsz=128, num_updates=11603, lr=9.99152e-05, gnorm=4.197, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132884 2021-06-20 07:33:40 | INFO | train_inner | epoch 004: 2669 / 3002 loss=2.65, ppl=6.28, wps=5824.6, ups=0.09, wpb=64867, bsz=128, num_updates=11604, lr=9.99152e-05, gnorm=5.469, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132895 2021-06-20 07:33:52 | INFO | train_inner | epoch 004: 2670 / 3002 loss=2.627, ppl=6.18, wps=5808.6, ups=0.09, wpb=64852, bsz=128, num_updates=11605, lr=9.99152e-05, gnorm=2.291, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132906 2021-06-20 07:34:02 | INFO | train_inner | epoch 004: 2671 / 3002 loss=2.637, ppl=6.22, wps=5971.2, ups=0.09, wpb=64789, bsz=128, num_updates=11606, lr=9.99151e-05, gnorm=4.452, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=132917 2021-06-20 07:34:13 | INFO | train_inner | epoch 004: 2672 / 3002 loss=2.566, ppl=5.92, wps=5914.1, ups=0.09, wpb=64795, bsz=128, num_updates=11607, lr=9.99151e-05, gnorm=2.516, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=132928 2021-06-20 07:34:24 | INFO | train_inner | epoch 004: 2673 / 3002 loss=2.597, ppl=6.05, wps=5862.4, ups=0.09, wpb=64812, bsz=128, num_updates=11608, lr=9.99151e-05, gnorm=3.414, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132939 2021-06-20 07:34:36 | INFO | train_inner | epoch 004: 2674 / 3002 loss=2.605, ppl=6.09, wps=5804.3, ups=0.09, wpb=64821, bsz=128, num_updates=11609, lr=9.99151e-05, gnorm=2.666, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132950 2021-06-20 07:34:47 | INFO | train_inner | epoch 004: 2675 / 3002 loss=2.648, ppl=6.27, wps=5834.7, ups=0.09, wpb=64867, bsz=128, num_updates=11610, lr=9.99151e-05, gnorm=2.324, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132961 2021-06-20 07:34:58 | INFO | train_inner | epoch 004: 2676 / 3002 loss=2.733, ppl=6.65, wps=5904, ups=0.09, wpb=64900, bsz=128, num_updates=11611, lr=9.99151e-05, gnorm=2.906, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132972 2021-06-20 07:35:09 | INFO | train_inner | epoch 004: 2677 / 3002 loss=2.461, ppl=5.5, wps=6009.5, ups=0.09, wpb=64879, bsz=128, num_updates=11612, lr=9.99151e-05, gnorm=2.149, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=132983 2021-06-20 07:35:20 | INFO | train_inner | epoch 004: 2678 / 3002 loss=2.418, ppl=5.34, wps=5758.3, ups=0.09, wpb=64849, bsz=128, num_updates=11613, lr=9.99151e-05, gnorm=18.579, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132994 2021-06-20 07:35:31 | INFO | train_inner | epoch 004: 2679 / 3002 loss=2.646, ppl=6.26, wps=5832.1, ups=0.09, wpb=64811, bsz=128, num_updates=11614, lr=9.99151e-05, gnorm=3.547, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133005 2021-06-20 07:35:42 | INFO | train_inner | epoch 004: 2680 / 3002 loss=2.534, ppl=5.79, wps=5840.8, ups=0.09, wpb=64829, bsz=128, num_updates=11615, lr=9.99151e-05, gnorm=5.3, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133016 2021-06-20 07:35:53 | INFO | train_inner | epoch 004: 2681 / 3002 loss=2.558, ppl=5.89, wps=5785.7, ups=0.09, wpb=64780, bsz=128, num_updates=11616, lr=9.99151e-05, gnorm=4.523, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133028 2021-06-20 07:36:04 | INFO | train_inner | epoch 004: 2682 / 3002 loss=2.51, ppl=5.7, wps=5898, ups=0.09, wpb=64854, bsz=128, num_updates=11617, lr=9.99151e-05, gnorm=2.477, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133039 2021-06-20 07:36:15 | INFO | train_inner | epoch 004: 2683 / 3002 loss=2.729, ppl=6.63, wps=5746.6, ups=0.09, wpb=64817, bsz=128, num_updates=11618, lr=9.9915e-05, gnorm=2.663, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133050 2021-06-20 07:36:26 | INFO | train_inner | epoch 004: 2684 / 3002 loss=2.736, ppl=6.66, wps=5948.8, ups=0.09, wpb=64766, bsz=128, num_updates=11619, lr=9.9915e-05, gnorm=2.983, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133061 2021-06-20 07:36:37 | INFO | train_inner | epoch 004: 2685 / 3002 loss=2.598, ppl=6.05, wps=5844.1, ups=0.09, wpb=64810, bsz=128, num_updates=11620, lr=9.9915e-05, gnorm=2.55, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133072 2021-06-20 07:36:48 | INFO | train_inner | epoch 004: 2686 / 3002 loss=2.634, ppl=6.21, wps=6015.8, ups=0.09, wpb=64918, bsz=128, num_updates=11621, lr=9.9915e-05, gnorm=2.277, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133083 2021-06-20 07:36:59 | INFO | train_inner | epoch 004: 2687 / 3002 loss=2.45, ppl=5.46, wps=5948.2, ups=0.09, wpb=64883, bsz=128, num_updates=11622, lr=9.9915e-05, gnorm=2.426, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133093 2021-06-20 07:37:10 | INFO | train_inner | epoch 004: 2688 / 3002 loss=2.509, ppl=5.69, wps=5797.5, ups=0.09, wpb=64765, bsz=128, num_updates=11623, lr=9.9915e-05, gnorm=3.102, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133105 2021-06-20 07:37:22 | INFO | train_inner | epoch 004: 2689 / 3002 loss=2.694, ppl=6.47, wps=5753.3, ups=0.09, wpb=64818, bsz=128, num_updates=11624, lr=9.9915e-05, gnorm=2.188, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133116 2021-06-20 07:37:33 | INFO | train_inner | epoch 004: 2690 / 3002 loss=2.575, ppl=5.96, wps=5798.2, ups=0.09, wpb=64852, bsz=128, num_updates=11625, lr=9.9915e-05, gnorm=2.077, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133127 2021-06-20 07:37:44 | INFO | train_inner | epoch 004: 2691 / 3002 loss=2.737, ppl=6.66, wps=5765.3, ups=0.09, wpb=64850, bsz=128, num_updates=11626, lr=9.9915e-05, gnorm=2.105, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133138 2021-06-20 07:37:55 | INFO | train_inner | epoch 004: 2692 / 3002 loss=2.608, ppl=6.1, wps=5932, ups=0.09, wpb=64839, bsz=128, num_updates=11627, lr=9.9915e-05, gnorm=2.226, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133149 2021-06-20 07:38:06 | INFO | train_inner | epoch 004: 2693 / 3002 loss=2.575, ppl=5.96, wps=5879.5, ups=0.09, wpb=64852, bsz=128, num_updates=11628, lr=9.9915e-05, gnorm=2.056, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133160 2021-06-20 07:38:17 | INFO | train_inner | epoch 004: 2694 / 3002 loss=2.572, ppl=5.95, wps=5811.8, ups=0.09, wpb=64828, bsz=128, num_updates=11629, lr=9.9915e-05, gnorm=2.128, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133171 2021-06-20 07:38:28 | INFO | train_inner | epoch 004: 2695 / 3002 loss=2.565, ppl=5.92, wps=5810.9, ups=0.09, wpb=64827, bsz=128, num_updates=11630, lr=9.9915e-05, gnorm=2.054, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133183 2021-06-20 07:38:39 | INFO | train_inner | epoch 004: 2696 / 3002 loss=2.594, ppl=6.04, wps=5894, ups=0.09, wpb=64878, bsz=128, num_updates=11631, lr=9.99149e-05, gnorm=2.098, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133194 2021-06-20 07:38:50 | INFO | train_inner | epoch 004: 2697 / 3002 loss=2.612, ppl=6.11, wps=5911.2, ups=0.09, wpb=64832, bsz=128, num_updates=11632, lr=9.99149e-05, gnorm=2.049, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133205 2021-06-20 07:39:02 | INFO | train_inner | epoch 004: 2698 / 3002 loss=2.566, ppl=5.92, wps=5674.5, ups=0.09, wpb=64778, bsz=128, num_updates=11633, lr=9.99149e-05, gnorm=5.499, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133216 2021-06-20 07:39:13 | INFO | train_inner | epoch 004: 2699 / 3002 loss=2.497, ppl=5.65, wps=5816.4, ups=0.09, wpb=64760, bsz=128, num_updates=11634, lr=9.99149e-05, gnorm=2.19, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133227 2021-06-20 07:39:24 | INFO | train_inner | epoch 004: 2700 / 3002 loss=2.661, ppl=6.32, wps=5886.8, ups=0.09, wpb=64781, bsz=128, num_updates=11635, lr=9.99149e-05, gnorm=2.043, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133238 2021-06-20 07:39:35 | INFO | train_inner | epoch 004: 2701 / 3002 loss=2.37, ppl=5.17, wps=5903.6, ups=0.09, wpb=64817, bsz=128, num_updates=11636, lr=9.99149e-05, gnorm=2.029, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133249 2021-06-20 07:39:46 | INFO | train_inner | epoch 004: 2702 / 3002 loss=2.613, ppl=6.12, wps=5840.5, ups=0.09, wpb=64861, bsz=128, num_updates=11637, lr=9.99149e-05, gnorm=1.983, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133260 2021-06-20 07:39:57 | INFO | train_inner | epoch 004: 2703 / 3002 loss=2.575, ppl=5.96, wps=5946.4, ups=0.09, wpb=64784, bsz=128, num_updates=11638, lr=9.99149e-05, gnorm=2.017, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133271 2021-06-20 07:40:08 | INFO | train_inner | epoch 004: 2704 / 3002 loss=2.54, ppl=5.82, wps=5971.4, ups=0.09, wpb=64896, bsz=128, num_updates=11639, lr=9.99149e-05, gnorm=2.027, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133282 2021-06-20 07:40:19 | INFO | train_inner | epoch 004: 2705 / 3002 loss=2.528, ppl=5.77, wps=5766.8, ups=0.09, wpb=64846, bsz=128, num_updates=11640, lr=9.99149e-05, gnorm=2.067, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133293 2021-06-20 07:40:30 | INFO | train_inner | epoch 004: 2706 / 3002 loss=2.673, ppl=6.38, wps=5909.2, ups=0.09, wpb=64904, bsz=128, num_updates=11641, lr=9.99149e-05, gnorm=2.158, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133304 2021-06-20 07:40:41 | INFO | train_inner | epoch 004: 2707 / 3002 loss=2.624, ppl=6.16, wps=5773.1, ups=0.09, wpb=64820, bsz=128, num_updates=11642, lr=9.99149e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133315 2021-06-20 07:40:52 | INFO | train_inner | epoch 004: 2708 / 3002 loss=2.625, ppl=6.17, wps=5827.7, ups=0.09, wpb=64768, bsz=128, num_updates=11643, lr=9.99148e-05, gnorm=2.041, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133327 2021-06-20 07:41:03 | INFO | train_inner | epoch 004: 2709 / 3002 loss=2.534, ppl=5.79, wps=5887.4, ups=0.09, wpb=64902, bsz=128, num_updates=11644, lr=9.99148e-05, gnorm=2.01, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133338 2021-06-20 07:41:14 | INFO | train_inner | epoch 004: 2710 / 3002 loss=2.602, ppl=6.07, wps=5872.7, ups=0.09, wpb=64813, bsz=128, num_updates=11645, lr=9.99148e-05, gnorm=2.081, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133349 2021-06-20 07:41:25 | INFO | train_inner | epoch 004: 2711 / 3002 loss=2.555, ppl=5.88, wps=5867.2, ups=0.09, wpb=64817, bsz=128, num_updates=11646, lr=9.99148e-05, gnorm=2.045, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133360 2021-06-20 07:41:36 | INFO | train_inner | epoch 004: 2712 / 3002 loss=2.511, ppl=5.7, wps=5852.3, ups=0.09, wpb=64881, bsz=128, num_updates=11647, lr=9.99148e-05, gnorm=2.012, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133371 2021-06-20 07:41:47 | INFO | train_inner | epoch 004: 2713 / 3002 loss=2.578, ppl=5.97, wps=5888.6, ups=0.09, wpb=64818, bsz=128, num_updates=11648, lr=9.99148e-05, gnorm=2.044, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133382 2021-06-20 07:41:58 | INFO | train_inner | epoch 004: 2714 / 3002 loss=2.556, ppl=5.88, wps=5991.3, ups=0.09, wpb=64904, bsz=128, num_updates=11649, lr=9.99148e-05, gnorm=2.106, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133393 2021-06-20 07:42:10 | INFO | train_inner | epoch 004: 2715 / 3002 loss=2.545, ppl=5.84, wps=5730.4, ups=0.09, wpb=64879, bsz=128, num_updates=11650, lr=9.99148e-05, gnorm=2.12, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133404 2021-06-20 07:42:21 | INFO | train_inner | epoch 004: 2716 / 3002 loss=2.605, ppl=6.08, wps=5846.5, ups=0.09, wpb=64788, bsz=128, num_updates=11651, lr=9.99148e-05, gnorm=2.082, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133415 2021-06-20 07:42:32 | INFO | train_inner | epoch 004: 2717 / 3002 loss=2.555, ppl=5.88, wps=5796.5, ups=0.09, wpb=64817, bsz=128, num_updates=11652, lr=9.99148e-05, gnorm=1.995, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133426 2021-06-20 07:42:43 | INFO | train_inner | epoch 004: 2718 / 3002 loss=2.559, ppl=5.89, wps=5744.4, ups=0.09, wpb=64878, bsz=128, num_updates=11653, lr=9.99148e-05, gnorm=1.986, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133437 2021-06-20 07:42:54 | INFO | train_inner | epoch 004: 2719 / 3002 loss=2.605, ppl=6.09, wps=5824.6, ups=0.09, wpb=64763, bsz=128, num_updates=11654, lr=9.99148e-05, gnorm=1.984, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133449 2021-06-20 07:43:06 | INFO | train_inner | epoch 004: 2720 / 3002 loss=2.553, ppl=5.87, wps=5752.2, ups=0.09, wpb=64790, bsz=128, num_updates=11655, lr=9.99148e-05, gnorm=2.018, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133460 2021-06-20 07:43:17 | INFO | train_inner | epoch 004: 2721 / 3002 loss=2.532, ppl=5.78, wps=5826.9, ups=0.09, wpb=64881, bsz=128, num_updates=11656, lr=9.99147e-05, gnorm=1.965, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133471 2021-06-20 07:43:28 | INFO | train_inner | epoch 004: 2722 / 3002 loss=2.614, ppl=6.12, wps=5787.1, ups=0.09, wpb=64853, bsz=128, num_updates=11657, lr=9.99147e-05, gnorm=2.053, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133482 2021-06-20 07:43:39 | INFO | train_inner | epoch 004: 2723 / 3002 loss=2.496, ppl=5.64, wps=5999.2, ups=0.09, wpb=64908, bsz=128, num_updates=11658, lr=9.99147e-05, gnorm=1.967, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133493 2021-06-20 07:43:50 | INFO | train_inner | epoch 004: 2724 / 3002 loss=2.506, ppl=5.68, wps=5881.7, ups=0.09, wpb=64822, bsz=128, num_updates=11659, lr=9.99147e-05, gnorm=3.413, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133504 2021-06-20 07:44:01 | INFO | train_inner | epoch 004: 2725 / 3002 loss=2.385, ppl=5.22, wps=5789.6, ups=0.09, wpb=64896, bsz=128, num_updates=11660, lr=9.99147e-05, gnorm=2.046, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133515 2021-06-20 07:44:12 | INFO | train_inner | epoch 004: 2726 / 3002 loss=2.516, ppl=5.72, wps=5759.6, ups=0.09, wpb=64743, bsz=128, num_updates=11661, lr=9.99147e-05, gnorm=2.041, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133526 2021-06-20 07:44:23 | INFO | train_inner | epoch 004: 2727 / 3002 loss=2.526, ppl=5.76, wps=5822.5, ups=0.09, wpb=64813, bsz=128, num_updates=11662, lr=9.99147e-05, gnorm=2.006, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133538 2021-06-20 07:44:34 | INFO | train_inner | epoch 004: 2728 / 3002 loss=2.7, ppl=6.5, wps=5846.8, ups=0.09, wpb=64824, bsz=128, num_updates=11663, lr=9.99147e-05, gnorm=2.016, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133549 2021-06-20 07:44:45 | INFO | train_inner | epoch 004: 2729 / 3002 loss=2.457, ppl=5.49, wps=5847.4, ups=0.09, wpb=64790, bsz=128, num_updates=11664, lr=9.99147e-05, gnorm=2.025, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133560 2021-06-20 07:44:57 | INFO | train_inner | epoch 004: 2730 / 3002 loss=2.551, ppl=5.86, wps=5819.5, ups=0.09, wpb=64881, bsz=128, num_updates=11665, lr=9.99147e-05, gnorm=1.974, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133571 2021-06-20 07:45:07 | INFO | train_inner | epoch 004: 2731 / 3002 loss=2.593, ppl=6.04, wps=5960.2, ups=0.09, wpb=64868, bsz=128, num_updates=11666, lr=9.99147e-05, gnorm=1.95, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133582 2021-06-20 07:45:19 | INFO | train_inner | epoch 004: 2732 / 3002 loss=2.546, ppl=5.84, wps=5703.9, ups=0.09, wpb=64778, bsz=128, num_updates=11667, lr=9.99147e-05, gnorm=2.05, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133593 2021-06-20 07:45:30 | INFO | train_inner | epoch 004: 2733 / 3002 loss=2.665, ppl=6.34, wps=5791.3, ups=0.09, wpb=64711, bsz=128, num_updates=11668, lr=9.99146e-05, gnorm=2.12, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133604 2021-06-20 07:45:41 | INFO | train_inner | epoch 004: 2734 / 3002 loss=2.467, ppl=5.53, wps=5753.1, ups=0.09, wpb=64855, bsz=128, num_updates=11669, lr=9.99146e-05, gnorm=2.09, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133616 2021-06-20 07:45:52 | INFO | train_inner | epoch 004: 2735 / 3002 loss=2.369, ppl=5.17, wps=5901.2, ups=0.09, wpb=64819, bsz=128, num_updates=11670, lr=9.99146e-05, gnorm=1.982, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133627 2021-06-20 07:46:03 | INFO | train_inner | epoch 004: 2736 / 3002 loss=2.445, ppl=5.44, wps=5812.3, ups=0.09, wpb=64830, bsz=128, num_updates=11671, lr=9.99146e-05, gnorm=2.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133638 2021-06-20 07:46:14 | INFO | train_inner | epoch 004: 2737 / 3002 loss=2.702, ppl=6.51, wps=5862.8, ups=0.09, wpb=64798, bsz=128, num_updates=11672, lr=9.99146e-05, gnorm=3.15, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133649 2021-06-20 07:46:26 | INFO | train_inner | epoch 004: 2738 / 3002 loss=2.426, ppl=5.37, wps=5862.3, ups=0.09, wpb=64867, bsz=128, num_updates=11673, lr=9.99146e-05, gnorm=2.022, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133660 2021-06-20 07:46:37 | INFO | train_inner | epoch 004: 2739 / 3002 loss=2.559, ppl=5.89, wps=5770.2, ups=0.09, wpb=64853, bsz=128, num_updates=11674, lr=9.99146e-05, gnorm=1.974, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133671 2021-06-20 07:46:48 | INFO | train_inner | epoch 004: 2740 / 3002 loss=2.575, ppl=5.96, wps=5873.6, ups=0.09, wpb=64800, bsz=128, num_updates=11675, lr=9.99146e-05, gnorm=9.218, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133682 2021-06-20 07:46:59 | INFO | train_inner | epoch 004: 2741 / 3002 loss=2.612, ppl=6.11, wps=5981.2, ups=0.09, wpb=64801, bsz=128, num_updates=11676, lr=9.99146e-05, gnorm=2.043, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133693 2021-06-20 07:47:10 | INFO | train_inner | epoch 004: 2742 / 3002 loss=2.711, ppl=6.55, wps=5972.8, ups=0.09, wpb=64841, bsz=128, num_updates=11677, lr=9.99146e-05, gnorm=1.979, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133704 2021-06-20 07:47:21 | INFO | train_inner | epoch 004: 2743 / 3002 loss=2.591, ppl=6.03, wps=5813.8, ups=0.09, wpb=64852, bsz=128, num_updates=11678, lr=9.99146e-05, gnorm=1.984, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133715 2021-06-20 07:47:31 | INFO | train_inner | epoch 004: 2744 / 3002 loss=2.453, ppl=5.48, wps=6031.3, ups=0.09, wpb=64818, bsz=128, num_updates=11679, lr=9.99146e-05, gnorm=1.948, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133726 2021-06-20 07:47:43 | INFO | train_inner | epoch 004: 2745 / 3002 loss=2.574, ppl=5.96, wps=5796.3, ups=0.09, wpb=64835, bsz=128, num_updates=11680, lr=9.99146e-05, gnorm=1.945, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133737 2021-06-20 07:47:54 | INFO | train_inner | epoch 004: 2746 / 3002 loss=2.572, ppl=5.95, wps=5915, ups=0.09, wpb=64931, bsz=128, num_updates=11681, lr=9.99145e-05, gnorm=2.041, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133748 2021-06-20 07:48:05 | INFO | train_inner | epoch 004: 2747 / 3002 loss=2.693, ppl=6.46, wps=5857.3, ups=0.09, wpb=64858, bsz=128, num_updates=11682, lr=9.99145e-05, gnorm=2.283, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133759 2021-06-20 07:48:16 | INFO | train_inner | epoch 004: 2748 / 3002 loss=2.448, ppl=5.46, wps=5856.4, ups=0.09, wpb=64756, bsz=128, num_updates=11683, lr=9.99145e-05, gnorm=2.07, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133770 2021-06-20 07:48:27 | INFO | train_inner | epoch 004: 2749 / 3002 loss=2.51, ppl=5.7, wps=5911.5, ups=0.09, wpb=64841, bsz=128, num_updates=11684, lr=9.99145e-05, gnorm=2.222, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133781 2021-06-20 07:48:38 | INFO | train_inner | epoch 004: 2750 / 3002 loss=2.536, ppl=5.8, wps=5924.5, ups=0.09, wpb=64842, bsz=128, num_updates=11685, lr=9.99145e-05, gnorm=3.218, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133792 2021-06-20 07:48:48 | INFO | train_inner | epoch 004: 2751 / 3002 loss=2.796, ppl=6.94, wps=6030.2, ups=0.09, wpb=64867, bsz=128, num_updates=11686, lr=9.99145e-05, gnorm=2.088, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133803 2021-06-20 07:48:59 | INFO | train_inner | epoch 004: 2752 / 3002 loss=2.643, ppl=6.25, wps=5871, ups=0.09, wpb=64898, bsz=128, num_updates=11687, lr=9.99145e-05, gnorm=2.059, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133814 2021-06-20 07:49:11 | INFO | train_inner | epoch 004: 2753 / 3002 loss=2.45, ppl=5.46, wps=5836.9, ups=0.09, wpb=64817, bsz=128, num_updates=11688, lr=9.99145e-05, gnorm=2.185, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133825 2021-06-20 07:49:22 | INFO | train_inner | epoch 004: 2754 / 3002 loss=2.623, ppl=6.16, wps=5828.4, ups=0.09, wpb=64901, bsz=128, num_updates=11689, lr=9.99145e-05, gnorm=2.101, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133836 2021-06-20 07:49:33 | INFO | train_inner | epoch 004: 2755 / 3002 loss=2.5, ppl=5.66, wps=5783.7, ups=0.09, wpb=64845, bsz=128, num_updates=11690, lr=9.99145e-05, gnorm=11.311, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133847 2021-06-20 07:49:44 | INFO | train_inner | epoch 004: 2756 / 3002 loss=2.589, ppl=6.01, wps=5832.3, ups=0.09, wpb=64840, bsz=128, num_updates=11691, lr=9.99145e-05, gnorm=2.078, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133858 2021-06-20 07:49:55 | INFO | train_inner | epoch 004: 2757 / 3002 loss=2.532, ppl=5.78, wps=5804.4, ups=0.09, wpb=64756, bsz=128, num_updates=11692, lr=9.99145e-05, gnorm=1.992, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133869 2021-06-20 07:50:06 | INFO | train_inner | epoch 004: 2758 / 3002 loss=2.515, ppl=5.71, wps=5901.5, ups=0.09, wpb=64801, bsz=128, num_updates=11693, lr=9.99144e-05, gnorm=2.019, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133880 2021-06-20 07:50:17 | INFO | train_inner | epoch 004: 2759 / 3002 loss=2.627, ppl=6.18, wps=5997.8, ups=0.09, wpb=64798, bsz=128, num_updates=11694, lr=9.99144e-05, gnorm=1.997, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133891 2021-06-20 07:50:28 | INFO | train_inner | epoch 004: 2760 / 3002 loss=2.616, ppl=6.13, wps=5907.1, ups=0.09, wpb=64848, bsz=128, num_updates=11695, lr=9.99144e-05, gnorm=1.996, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133902 2021-06-20 07:50:39 | INFO | train_inner | epoch 004: 2761 / 3002 loss=2.406, ppl=5.3, wps=5945.3, ups=0.09, wpb=64836, bsz=128, num_updates=11696, lr=9.99144e-05, gnorm=2.024, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133913 2021-06-20 07:50:50 | INFO | train_inner | epoch 004: 2762 / 3002 loss=2.541, ppl=5.82, wps=5830.8, ups=0.09, wpb=64807, bsz=128, num_updates=11697, lr=9.99144e-05, gnorm=1.988, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133924 2021-06-20 07:51:01 | INFO | train_inner | epoch 004: 2763 / 3002 loss=2.6, ppl=6.06, wps=5874.9, ups=0.09, wpb=64766, bsz=128, num_updates=11698, lr=9.99144e-05, gnorm=2.043, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133935 2021-06-20 07:51:12 | INFO | train_inner | epoch 004: 2764 / 3002 loss=2.578, ppl=5.97, wps=5809, ups=0.09, wpb=64862, bsz=128, num_updates=11699, lr=9.99144e-05, gnorm=2.064, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133946 2021-06-20 07:51:23 | INFO | train_inner | epoch 004: 2765 / 3002 loss=2.603, ppl=6.08, wps=5825.5, ups=0.09, wpb=64688, bsz=128, num_updates=11700, lr=9.99144e-05, gnorm=2.029, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133958 2021-06-20 07:51:34 | INFO | train_inner | epoch 004: 2766 / 3002 loss=2.528, ppl=5.77, wps=5928.3, ups=0.09, wpb=64865, bsz=128, num_updates=11701, lr=9.99144e-05, gnorm=2.01, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133969 2021-06-20 07:51:45 | INFO | train_inner | epoch 004: 2767 / 3002 loss=2.491, ppl=5.62, wps=5884.8, ups=0.09, wpb=64850, bsz=128, num_updates=11702, lr=9.99144e-05, gnorm=1.942, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133980 2021-06-20 07:51:56 | INFO | train_inner | epoch 004: 2768 / 3002 loss=2.364, ppl=5.15, wps=5785.7, ups=0.09, wpb=64849, bsz=128, num_updates=11703, lr=9.99144e-05, gnorm=2.04, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133991 2021-06-20 07:52:08 | INFO | train_inner | epoch 004: 2769 / 3002 loss=2.563, ppl=5.91, wps=5771, ups=0.09, wpb=64843, bsz=128, num_updates=11704, lr=9.99144e-05, gnorm=2.033, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134002 2021-06-20 07:52:19 | INFO | train_inner | epoch 004: 2770 / 3002 loss=2.523, ppl=5.75, wps=5835.2, ups=0.09, wpb=64793, bsz=128, num_updates=11705, lr=9.99144e-05, gnorm=2.02, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134013 2021-06-20 07:52:30 | INFO | train_inner | epoch 004: 2771 / 3002 loss=2.646, ppl=6.26, wps=5819.5, ups=0.09, wpb=64811, bsz=128, num_updates=11706, lr=9.99143e-05, gnorm=2.18, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134024 2021-06-20 07:52:41 | INFO | train_inner | epoch 004: 2772 / 3002 loss=2.532, ppl=5.78, wps=5762.9, ups=0.09, wpb=64748, bsz=128, num_updates=11707, lr=9.99143e-05, gnorm=2.173, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134035 2021-06-20 07:52:52 | INFO | train_inner | epoch 004: 2773 / 3002 loss=2.449, ppl=5.46, wps=5865.5, ups=0.09, wpb=64859, bsz=128, num_updates=11708, lr=9.99143e-05, gnorm=3.382, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134047 2021-06-20 07:53:03 | INFO | train_inner | epoch 004: 2774 / 3002 loss=2.628, ppl=6.18, wps=5902.9, ups=0.09, wpb=64792, bsz=128, num_updates=11709, lr=9.99143e-05, gnorm=2.215, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=134057 2021-06-20 07:53:14 | INFO | train_inner | epoch 004: 2775 / 3002 loss=2.576, ppl=5.96, wps=5888.6, ups=0.09, wpb=64813, bsz=128, num_updates=11710, lr=9.99143e-05, gnorm=2.122, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134068 2021-06-20 07:53:25 | INFO | train_inner | epoch 004: 2776 / 3002 loss=2.54, ppl=5.82, wps=5853.3, ups=0.09, wpb=64900, bsz=128, num_updates=11711, lr=9.99143e-05, gnorm=2.037, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134080 2021-06-20 07:53:36 | INFO | train_inner | epoch 004: 2777 / 3002 loss=2.541, ppl=5.82, wps=5922, ups=0.09, wpb=64815, bsz=128, num_updates=11712, lr=9.99143e-05, gnorm=2.036, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134091 2021-06-20 07:53:47 | INFO | train_inner | epoch 004: 2778 / 3002 loss=2.614, ppl=6.12, wps=5810.3, ups=0.09, wpb=64804, bsz=128, num_updates=11713, lr=9.99143e-05, gnorm=1.957, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134102 2021-06-20 07:53:58 | INFO | train_inner | epoch 004: 2779 / 3002 loss=2.496, ppl=5.64, wps=5931.4, ups=0.09, wpb=64881, bsz=128, num_updates=11714, lr=9.99143e-05, gnorm=1.978, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=134113 2021-06-20 07:54:09 | INFO | train_inner | epoch 004: 2780 / 3002 loss=2.546, ppl=5.84, wps=5866.3, ups=0.09, wpb=64825, bsz=128, num_updates=11715, lr=9.99143e-05, gnorm=1.958, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134124 2021-06-20 07:54:20 | INFO | train_inner | epoch 004: 2781 / 3002 loss=2.535, ppl=5.8, wps=5985.3, ups=0.09, wpb=64882, bsz=128, num_updates=11716, lr=9.99143e-05, gnorm=1.945, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=134135 2021-06-20 07:54:31 | INFO | train_inner | epoch 004: 2782 / 3002 loss=2.448, ppl=5.46, wps=5910.1, ups=0.09, wpb=64886, bsz=128, num_updates=11717, lr=9.99143e-05, gnorm=2.273, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134145 2021-06-20 07:54:42 | INFO | train_inner | epoch 004: 2783 / 3002 loss=2.519, ppl=5.73, wps=5806.3, ups=0.09, wpb=64858, bsz=128, num_updates=11718, lr=9.99142e-05, gnorm=2.004, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134157 2021-06-20 07:54:53 | INFO | train_inner | epoch 004: 2784 / 3002 loss=2.396, ppl=5.26, wps=5982.2, ups=0.09, wpb=64852, bsz=128, num_updates=11719, lr=9.99142e-05, gnorm=2.005, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=134168 2021-06-20 07:55:04 | INFO | train_inner | epoch 004: 2785 / 3002 loss=2.524, ppl=5.75, wps=5751.6, ups=0.09, wpb=64771, bsz=128, num_updates=11720, lr=9.99142e-05, gnorm=1.976, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134179 2021-06-20 07:55:15 | INFO | train_inner | epoch 004: 2786 / 3002 loss=2.616, ppl=6.13, wps=5871.9, ups=0.09, wpb=64877, bsz=128, num_updates=11721, lr=9.99142e-05, gnorm=2.032, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134190 2021-06-20 07:55:26 | INFO | train_inner | epoch 004: 2787 / 3002 loss=2.664, ppl=6.34, wps=5954.2, ups=0.09, wpb=64814, bsz=128, num_updates=11722, lr=9.99142e-05, gnorm=2.09, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=134201 2021-06-20 07:55:38 | INFO | train_inner | epoch 004: 2788 / 3002 loss=2.517, ppl=5.72, wps=5785.3, ups=0.09, wpb=64768, bsz=128, num_updates=11723, lr=9.99142e-05, gnorm=2.845, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134212 2021-06-20 07:55:49 | INFO | train_inner | epoch 004: 2789 / 3002 loss=2.484, ppl=5.59, wps=5823.5, ups=0.09, wpb=64806, bsz=128, num_updates=11724, lr=9.99142e-05, gnorm=3.306, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134223 2021-06-20 07:56:00 | INFO | train_inner | epoch 004: 2790 / 3002 loss=2.596, ppl=6.05, wps=5870.6, ups=0.09, wpb=64845, bsz=128, num_updates=11725, lr=9.99142e-05, gnorm=2.02, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134234 2021-06-20 07:56:11 | INFO | train_inner | epoch 004: 2791 / 3002 loss=2.508, ppl=5.69, wps=5909.4, ups=0.09, wpb=64802, bsz=128, num_updates=11726, lr=9.99142e-05, gnorm=2.014, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134245 2021-06-20 07:56:22 | INFO | train_inner | epoch 004: 2792 / 3002 loss=2.676, ppl=6.39, wps=5826, ups=0.09, wpb=64820, bsz=128, num_updates=11727, lr=9.99142e-05, gnorm=2.095, loss_scale=1, train_wall=11, gb_free=2.8, wall=134256 2021-06-20 07:56:33 | INFO | train_inner | epoch 004: 2793 / 3002 loss=2.622, ppl=6.15, wps=5942.1, ups=0.09, wpb=64863, bsz=128, num_updates=11728, lr=9.99142e-05, gnorm=2.121, loss_scale=1, train_wall=10, gb_free=2.8, wall=134267 2021-06-20 07:56:44 | INFO | train_inner | epoch 004: 2794 / 3002 loss=2.638, ppl=6.23, wps=5835.9, ups=0.09, wpb=64809, bsz=128, num_updates=11729, lr=9.99142e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=134278 2021-06-20 07:56:55 | INFO | train_inner | epoch 004: 2795 / 3002 loss=2.427, ppl=5.38, wps=5899.6, ups=0.09, wpb=64861, bsz=128, num_updates=11730, lr=9.99142e-05, gnorm=2.048, loss_scale=1, train_wall=11, gb_free=2.8, wall=134289 2021-06-20 07:57:06 | INFO | train_inner | epoch 004: 2796 / 3002 loss=2.512, ppl=5.7, wps=5903.8, ups=0.09, wpb=64862, bsz=128, num_updates=11731, lr=9.99141e-05, gnorm=3.874, loss_scale=1, train_wall=11, gb_free=2.8, wall=134300 2021-06-20 07:57:17 | INFO | train_inner | epoch 004: 2797 / 3002 loss=2.542, ppl=5.82, wps=5826.4, ups=0.09, wpb=64754, bsz=128, num_updates=11732, lr=9.99141e-05, gnorm=2.063, loss_scale=1, train_wall=11, gb_free=2.8, wall=134311 2021-06-20 07:57:28 | INFO | train_inner | epoch 004: 2798 / 3002 loss=2.601, ppl=6.07, wps=5777, ups=0.09, wpb=64844, bsz=128, num_updates=11733, lr=9.99141e-05, gnorm=2.022, loss_scale=1, train_wall=11, gb_free=2.8, wall=134323 2021-06-20 07:57:39 | INFO | train_inner | epoch 004: 2799 / 3002 loss=2.435, ppl=5.41, wps=5881.7, ups=0.09, wpb=64903, bsz=128, num_updates=11734, lr=9.99141e-05, gnorm=2.084, loss_scale=1, train_wall=11, gb_free=2.8, wall=134334 2021-06-20 07:57:50 | INFO | train_inner | epoch 004: 2800 / 3002 loss=2.614, ppl=6.12, wps=5752.9, ups=0.09, wpb=64880, bsz=128, num_updates=11735, lr=9.99141e-05, gnorm=2.004, loss_scale=1, train_wall=11, gb_free=2.8, wall=134345 2021-06-20 07:58:02 | INFO | train_inner | epoch 004: 2801 / 3002 loss=2.582, ppl=5.99, wps=5820.1, ups=0.09, wpb=64797, bsz=128, num_updates=11736, lr=9.99141e-05, gnorm=1.944, loss_scale=1, train_wall=11, gb_free=2.8, wall=134356 2021-06-20 07:58:13 | INFO | train_inner | epoch 004: 2802 / 3002 loss=2.547, ppl=5.84, wps=5889.7, ups=0.09, wpb=64833, bsz=128, num_updates=11737, lr=9.99141e-05, gnorm=1.993, loss_scale=1, train_wall=11, gb_free=2.8, wall=134367 2021-06-20 07:58:24 | INFO | train_inner | epoch 004: 2803 / 3002 loss=2.466, ppl=5.53, wps=5770.4, ups=0.09, wpb=64791, bsz=128, num_updates=11738, lr=9.99141e-05, gnorm=1.908, loss_scale=1, train_wall=11, gb_free=2.8, wall=134378 2021-06-20 07:58:35 | INFO | train_inner | epoch 004: 2804 / 3002 loss=2.506, ppl=5.68, wps=5867.7, ups=0.09, wpb=64851, bsz=128, num_updates=11739, lr=9.99141e-05, gnorm=2.004, loss_scale=1, train_wall=11, gb_free=2.8, wall=134389 2021-06-20 07:58:46 | INFO | train_inner | epoch 004: 2805 / 3002 loss=2.532, ppl=5.78, wps=5871.7, ups=0.09, wpb=64825, bsz=128, num_updates=11740, lr=9.99141e-05, gnorm=2.087, loss_scale=1, train_wall=11, gb_free=2.8, wall=134400 2021-06-20 07:58:57 | INFO | train_inner | epoch 004: 2806 / 3002 loss=2.478, ppl=5.57, wps=5864.9, ups=0.09, wpb=64832, bsz=128, num_updates=11741, lr=9.99141e-05, gnorm=2.035, loss_scale=1, train_wall=11, gb_free=2.8, wall=134411 2021-06-20 07:59:08 | INFO | train_inner | epoch 004: 2807 / 3002 loss=2.651, ppl=6.28, wps=5822.5, ups=0.09, wpb=64767, bsz=128, num_updates=11742, lr=9.99141e-05, gnorm=2.115, loss_scale=1, train_wall=11, gb_free=2.8, wall=134422 2021-06-20 07:59:19 | INFO | train_inner | epoch 004: 2808 / 3002 loss=2.66, ppl=6.32, wps=5776.5, ups=0.09, wpb=64850, bsz=128, num_updates=11743, lr=9.9914e-05, gnorm=2.121, loss_scale=1, train_wall=11, gb_free=2.8, wall=134434 2021-06-20 07:59:31 | INFO | train_inner | epoch 004: 2809 / 3002 loss=2.468, ppl=5.53, wps=5785.8, ups=0.09, wpb=64828, bsz=128, num_updates=11744, lr=9.9914e-05, gnorm=2.052, loss_scale=1, train_wall=11, gb_free=2.8, wall=134445 2021-06-20 07:59:42 | INFO | train_inner | epoch 004: 2810 / 3002 loss=2.45, ppl=5.46, wps=5801.1, ups=0.09, wpb=64891, bsz=128, num_updates=11745, lr=9.9914e-05, gnorm=2.053, loss_scale=1, train_wall=11, gb_free=2.8, wall=134456 2021-06-20 07:59:53 | INFO | train_inner | epoch 004: 2811 / 3002 loss=2.437, ppl=5.42, wps=5888.6, ups=0.09, wpb=64772, bsz=128, num_updates=11746, lr=9.9914e-05, gnorm=1.934, loss_scale=1, train_wall=10, gb_free=2.8, wall=134467 2021-06-20 08:00:04 | INFO | train_inner | epoch 004: 2812 / 3002 loss=2.588, ppl=6.01, wps=5881.9, ups=0.09, wpb=64790, bsz=128, num_updates=11747, lr=9.9914e-05, gnorm=2.023, loss_scale=1, train_wall=11, gb_free=2.8, wall=134478 2021-06-20 08:00:15 | INFO | train_inner | epoch 004: 2813 / 3002 loss=2.442, ppl=5.43, wps=5819, ups=0.09, wpb=64812, bsz=128, num_updates=11748, lr=9.9914e-05, gnorm=1.954, loss_scale=1, train_wall=11, gb_free=2.8, wall=134489 2021-06-20 08:00:26 | INFO | train_inner | epoch 004: 2814 / 3002 loss=2.675, ppl=6.39, wps=5804.7, ups=0.09, wpb=64748, bsz=128, num_updates=11749, lr=9.9914e-05, gnorm=1.974, loss_scale=1, train_wall=11, gb_free=2.8, wall=134500 2021-06-20 08:00:37 | INFO | train_inner | epoch 004: 2815 / 3002 loss=2.523, ppl=5.75, wps=5754.3, ups=0.09, wpb=64836, bsz=128, num_updates=11750, lr=9.9914e-05, gnorm=2.095, loss_scale=1, train_wall=11, gb_free=2.8, wall=134512 2021-06-20 08:00:48 | INFO | train_inner | epoch 004: 2816 / 3002 loss=2.534, ppl=5.79, wps=5802.6, ups=0.09, wpb=64781, bsz=128, num_updates=11751, lr=9.9914e-05, gnorm=2.057, loss_scale=1, train_wall=11, gb_free=2.8, wall=134523 2021-06-20 08:01:00 | INFO | train_inner | epoch 004: 2817 / 3002 loss=2.513, ppl=5.71, wps=5784.3, ups=0.09, wpb=64798, bsz=128, num_updates=11752, lr=9.9914e-05, gnorm=1.965, loss_scale=1, train_wall=11, gb_free=2.8, wall=134534 2021-06-20 08:01:11 | INFO | train_inner | epoch 004: 2818 / 3002 loss=2.486, ppl=5.6, wps=5837.2, ups=0.09, wpb=64769, bsz=128, num_updates=11753, lr=9.9914e-05, gnorm=1.979, loss_scale=1, train_wall=11, gb_free=2.8, wall=134545 2021-06-20 08:01:22 | INFO | train_inner | epoch 004: 2819 / 3002 loss=2.577, ppl=5.97, wps=5924.4, ups=0.09, wpb=64787, bsz=128, num_updates=11754, lr=9.9914e-05, gnorm=2.285, loss_scale=1, train_wall=10, gb_free=2.8, wall=134556 2021-06-20 08:01:33 | INFO | train_inner | epoch 004: 2820 / 3002 loss=2.441, ppl=5.43, wps=5739.1, ups=0.09, wpb=64916, bsz=128, num_updates=11755, lr=9.9914e-05, gnorm=1.93, loss_scale=1, train_wall=11, gb_free=2.8, wall=134567 2021-06-20 08:01:44 | INFO | train_inner | epoch 004: 2821 / 3002 loss=2.375, ppl=5.19, wps=5834.8, ups=0.09, wpb=64860, bsz=128, num_updates=11756, lr=9.99139e-05, gnorm=2.247, loss_scale=1, train_wall=11, gb_free=2.8, wall=134578 2021-06-20 08:01:55 | INFO | train_inner | epoch 004: 2822 / 3002 loss=2.44, ppl=5.43, wps=5916.8, ups=0.09, wpb=64828, bsz=128, num_updates=11757, lr=9.99139e-05, gnorm=1.908, loss_scale=1, train_wall=11, gb_free=2.8, wall=134589 2021-06-20 08:02:06 | INFO | train_inner | epoch 004: 2823 / 3002 loss=2.523, ppl=5.75, wps=5771.8, ups=0.09, wpb=64715, bsz=128, num_updates=11758, lr=9.99139e-05, gnorm=2.001, loss_scale=1, train_wall=11, gb_free=2.8, wall=134601 2021-06-20 08:02:18 | INFO | train_inner | epoch 004: 2824 / 3002 loss=2.691, ppl=6.46, wps=5792.5, ups=0.09, wpb=64816, bsz=128, num_updates=11759, lr=9.99139e-05, gnorm=1.98, loss_scale=1, train_wall=11, gb_free=2.8, wall=134612 2021-06-20 08:02:29 | INFO | train_inner | epoch 004: 2825 / 3002 loss=2.554, ppl=5.87, wps=5796.7, ups=0.09, wpb=64847, bsz=128, num_updates=11760, lr=9.99139e-05, gnorm=2.062, loss_scale=1, train_wall=11, gb_free=2.8, wall=134623 2021-06-20 08:02:40 | INFO | train_inner | epoch 004: 2826 / 3002 loss=2.556, ppl=5.88, wps=5735.1, ups=0.09, wpb=64849, bsz=128, num_updates=11761, lr=9.99139e-05, gnorm=2.019, loss_scale=1, train_wall=11, gb_free=2.8, wall=134634 2021-06-20 08:02:51 | INFO | train_inner | epoch 004: 2827 / 3002 loss=2.576, ppl=5.96, wps=5882.1, ups=0.09, wpb=64762, bsz=128, num_updates=11762, lr=9.99139e-05, gnorm=2.05, loss_scale=1, train_wall=11, gb_free=2.8, wall=134645 2021-06-20 08:03:02 | INFO | train_inner | epoch 004: 2828 / 3002 loss=2.59, ppl=6.02, wps=5814.6, ups=0.09, wpb=64875, bsz=128, num_updates=11763, lr=9.99139e-05, gnorm=2.113, loss_scale=1, train_wall=11, gb_free=2.8, wall=134657 2021-06-20 08:03:13 | INFO | train_inner | epoch 004: 2829 / 3002 loss=2.7, ppl=6.5, wps=5820.4, ups=0.09, wpb=64814, bsz=128, num_updates=11764, lr=9.99139e-05, gnorm=2.144, loss_scale=1, train_wall=11, gb_free=2.8, wall=134668 2021-06-20 08:03:25 | INFO | train_inner | epoch 004: 2830 / 3002 loss=2.536, ppl=5.8, wps=5756.5, ups=0.09, wpb=64833, bsz=128, num_updates=11765, lr=9.99139e-05, gnorm=2.041, loss_scale=1, train_wall=11, gb_free=2.8, wall=134679 2021-06-20 08:03:36 | INFO | train_inner | epoch 004: 2831 / 3002 loss=2.378, ppl=5.2, wps=5925.2, ups=0.09, wpb=64834, bsz=128, num_updates=11766, lr=9.99139e-05, gnorm=1.961, loss_scale=1, train_wall=10, gb_free=2.8, wall=134690 2021-06-20 08:03:47 | INFO | train_inner | epoch 004: 2832 / 3002 loss=2.679, ppl=6.4, wps=5810.4, ups=0.09, wpb=64756, bsz=128, num_updates=11767, lr=9.99139e-05, gnorm=1.925, loss_scale=1, train_wall=11, gb_free=2.8, wall=134701 2021-06-20 08:03:58 | INFO | train_inner | epoch 004: 2833 / 3002 loss=2.635, ppl=6.21, wps=5738.5, ups=0.09, wpb=64849, bsz=128, num_updates=11768, lr=9.99138e-05, gnorm=2.074, loss_scale=1, train_wall=11, gb_free=2.8, wall=134712 2021-06-20 08:04:09 | INFO | train_inner | epoch 004: 2834 / 3002 loss=2.561, ppl=5.9, wps=5893, ups=0.09, wpb=64807, bsz=128, num_updates=11769, lr=9.99138e-05, gnorm=1.944, loss_scale=1, train_wall=11, gb_free=2.8, wall=134723 2021-06-20 08:04:20 | INFO | train_inner | epoch 004: 2835 / 3002 loss=2.463, ppl=5.51, wps=5827.4, ups=0.09, wpb=64901, bsz=128, num_updates=11770, lr=9.99138e-05, gnorm=1.957, loss_scale=1, train_wall=11, gb_free=2.8, wall=134734 2021-06-20 08:04:31 | INFO | train_inner | epoch 004: 2836 / 3002 loss=2.451, ppl=5.47, wps=5811.4, ups=0.09, wpb=64818, bsz=128, num_updates=11771, lr=9.99138e-05, gnorm=1.978, loss_scale=1, train_wall=11, gb_free=2.8, wall=134746 2021-06-20 08:04:42 | INFO | train_inner | epoch 004: 2837 / 3002 loss=2.488, ppl=5.61, wps=5980.7, ups=0.09, wpb=64796, bsz=128, num_updates=11772, lr=9.99138e-05, gnorm=1.956, loss_scale=1, train_wall=10, gb_free=2.8, wall=134756 2021-06-20 08:04:53 | INFO | train_inner | epoch 004: 2838 / 3002 loss=2.487, ppl=5.61, wps=5701.2, ups=0.09, wpb=64815, bsz=128, num_updates=11773, lr=9.99138e-05, gnorm=1.987, loss_scale=1, train_wall=11, gb_free=2.8, wall=134768 2021-06-20 08:05:05 | INFO | train_inner | epoch 004: 2839 / 3002 loss=2.595, ppl=6.04, wps=5859, ups=0.09, wpb=64757, bsz=128, num_updates=11774, lr=9.99138e-05, gnorm=2.018, loss_scale=1, train_wall=11, gb_free=2.8, wall=134779 2021-06-20 08:05:15 | INFO | train_inner | epoch 004: 2840 / 3002 loss=2.536, ppl=5.8, wps=5901.6, ups=0.09, wpb=64786, bsz=128, num_updates=11775, lr=9.99138e-05, gnorm=1.99, loss_scale=1, train_wall=11, gb_free=2.8, wall=134790 2021-06-20 08:05:26 | INFO | train_inner | epoch 004: 2841 / 3002 loss=2.518, ppl=5.73, wps=5940.7, ups=0.09, wpb=64949, bsz=128, num_updates=11776, lr=9.99138e-05, gnorm=1.998, loss_scale=1, train_wall=10, gb_free=2.8, wall=134801 2021-06-20 08:05:37 | INFO | train_inner | epoch 004: 2842 / 3002 loss=2.504, ppl=5.67, wps=6002, ups=0.09, wpb=64869, bsz=128, num_updates=11777, lr=9.99138e-05, gnorm=1.917, loss_scale=1, train_wall=10, gb_free=2.8, wall=134812 2021-06-20 08:05:48 | INFO | train_inner | epoch 004: 2843 / 3002 loss=2.473, ppl=5.55, wps=5955.9, ups=0.09, wpb=64882, bsz=128, num_updates=11778, lr=9.99138e-05, gnorm=2.047, loss_scale=1, train_wall=10, gb_free=2.8, wall=134822 2021-06-20 08:05:59 | INFO | train_inner | epoch 004: 2844 / 3002 loss=2.544, ppl=5.83, wps=5840.4, ups=0.09, wpb=64844, bsz=128, num_updates=11779, lr=9.99138e-05, gnorm=1.92, loss_scale=1, train_wall=11, gb_free=2.8, wall=134834 2021-06-20 08:06:10 | INFO | train_inner | epoch 004: 2845 / 3002 loss=2.558, ppl=5.89, wps=5845, ups=0.09, wpb=64771, bsz=128, num_updates=11780, lr=9.99138e-05, gnorm=2.085, loss_scale=1, train_wall=11, gb_free=2.8, wall=134845 2021-06-20 08:06:21 | INFO | train_inner | epoch 004: 2846 / 3002 loss=2.599, ppl=6.06, wps=5836.3, ups=0.09, wpb=64782, bsz=128, num_updates=11781, lr=9.99137e-05, gnorm=2.031, loss_scale=1, train_wall=11, gb_free=2.8, wall=134856 2021-06-20 08:06:33 | INFO | train_inner | epoch 004: 2847 / 3002 loss=2.407, ppl=5.31, wps=5809.7, ups=0.09, wpb=64822, bsz=128, num_updates=11782, lr=9.99137e-05, gnorm=1.978, loss_scale=1, train_wall=11, gb_free=2.8, wall=134867 2021-06-20 08:06:44 | INFO | train_inner | epoch 004: 2848 / 3002 loss=2.595, ppl=6.04, wps=5816.3, ups=0.09, wpb=64682, bsz=128, num_updates=11783, lr=9.99137e-05, gnorm=2.112, loss_scale=1, train_wall=11, gb_free=2.8, wall=134878 2021-06-20 08:06:54 | INFO | train_inner | epoch 004: 2849 / 3002 loss=2.58, ppl=5.98, wps=6034.4, ups=0.09, wpb=64846, bsz=128, num_updates=11784, lr=9.99137e-05, gnorm=1.942, loss_scale=1, train_wall=10, gb_free=2.8, wall=134889 2021-06-20 08:07:06 | INFO | train_inner | epoch 004: 2850 / 3002 loss=2.54, ppl=5.81, wps=5822.3, ups=0.09, wpb=64713, bsz=128, num_updates=11785, lr=9.99137e-05, gnorm=2.027, loss_scale=1, train_wall=11, gb_free=2.8, wall=134900 2021-06-20 08:07:17 | INFO | train_inner | epoch 004: 2851 / 3002 loss=2.621, ppl=6.15, wps=5847.4, ups=0.09, wpb=64846, bsz=128, num_updates=11786, lr=9.99137e-05, gnorm=1.955, loss_scale=1, train_wall=11, gb_free=2.8, wall=134911 2021-06-20 08:07:28 | INFO | train_inner | epoch 004: 2852 / 3002 loss=2.57, ppl=5.94, wps=5920.4, ups=0.09, wpb=64882, bsz=128, num_updates=11787, lr=9.99137e-05, gnorm=1.955, loss_scale=1, train_wall=11, gb_free=2.8, wall=134922 2021-06-20 08:07:39 | INFO | train_inner | epoch 004: 2853 / 3002 loss=2.634, ppl=6.21, wps=5925.1, ups=0.09, wpb=64796, bsz=128, num_updates=11788, lr=9.99137e-05, gnorm=2.074, loss_scale=1, train_wall=10, gb_free=2.8, wall=134933 2021-06-20 08:07:50 | INFO | train_inner | epoch 004: 2854 / 3002 loss=2.57, ppl=5.94, wps=5858.6, ups=0.09, wpb=64812, bsz=128, num_updates=11789, lr=9.99137e-05, gnorm=2.041, loss_scale=1, train_wall=11, gb_free=2.8, wall=134944 2021-06-20 08:08:01 | INFO | train_inner | epoch 004: 2855 / 3002 loss=2.615, ppl=6.12, wps=5785.7, ups=0.09, wpb=64838, bsz=128, num_updates=11790, lr=9.99137e-05, gnorm=2, loss_scale=1, train_wall=11, gb_free=2.8, wall=134955 2021-06-20 08:08:12 | INFO | train_inner | epoch 004: 2856 / 3002 loss=2.625, ppl=6.17, wps=5756.3, ups=0.09, wpb=64810, bsz=128, num_updates=11791, lr=9.99137e-05, gnorm=2, loss_scale=1, train_wall=11, gb_free=2.8, wall=134966 2021-06-20 08:08:23 | INFO | train_inner | epoch 004: 2857 / 3002 loss=2.511, ppl=5.7, wps=5908.9, ups=0.09, wpb=64901, bsz=128, num_updates=11792, lr=9.99137e-05, gnorm=2.05, loss_scale=1, train_wall=11, gb_free=2.8, wall=134977 2021-06-20 08:08:34 | INFO | train_inner | epoch 004: 2858 / 3002 loss=2.595, ppl=6.04, wps=5877.6, ups=0.09, wpb=64851, bsz=128, num_updates=11793, lr=9.99136e-05, gnorm=2.059, loss_scale=1, train_wall=11, gb_free=2.8, wall=134988 2021-06-20 08:08:45 | INFO | train_inner | epoch 004: 2859 / 3002 loss=2.479, ppl=5.58, wps=5849.1, ups=0.09, wpb=64800, bsz=128, num_updates=11794, lr=9.99136e-05, gnorm=2.103, loss_scale=1, train_wall=11, gb_free=2.8, wall=134999 2021-06-20 08:08:56 | INFO | train_inner | epoch 004: 2860 / 3002 loss=2.86, ppl=7.26, wps=5860.4, ups=0.09, wpb=64792, bsz=128, num_updates=11795, lr=9.99136e-05, gnorm=2.063, loss_scale=1, train_wall=11, gb_free=2.8, wall=135011 2021-06-20 08:09:07 | INFO | train_inner | epoch 004: 2861 / 3002 loss=2.549, ppl=5.85, wps=5754.9, ups=0.09, wpb=64862, bsz=128, num_updates=11796, lr=9.99136e-05, gnorm=1.967, loss_scale=1, train_wall=11, gb_free=2.8, wall=135022 2021-06-20 08:09:19 | INFO | train_inner | epoch 004: 2862 / 3002 loss=2.416, ppl=5.34, wps=5842.1, ups=0.09, wpb=64857, bsz=128, num_updates=11797, lr=9.99136e-05, gnorm=1.855, loss_scale=1, train_wall=11, gb_free=2.8, wall=135033 2021-06-20 08:09:29 | INFO | train_inner | epoch 004: 2863 / 3002 loss=2.651, ppl=6.28, wps=5952.2, ups=0.09, wpb=64847, bsz=128, num_updates=11798, lr=9.99136e-05, gnorm=2.004, loss_scale=1, train_wall=10, gb_free=2.8, wall=135044 2021-06-20 08:09:41 | INFO | train_inner | epoch 004: 2864 / 3002 loss=2.631, ppl=6.2, wps=5840.8, ups=0.09, wpb=64735, bsz=128, num_updates=11799, lr=9.99136e-05, gnorm=2.005, loss_scale=1, train_wall=11, gb_free=2.8, wall=135055 2021-06-20 08:09:52 | INFO | train_inner | epoch 004: 2865 / 3002 loss=2.716, ppl=6.57, wps=5730.2, ups=0.09, wpb=64820, bsz=128, num_updates=11800, lr=9.99136e-05, gnorm=2.08, loss_scale=1, train_wall=11, gb_free=2.8, wall=135066 2021-06-20 08:10:03 | INFO | train_inner | epoch 004: 2866 / 3002 loss=2.557, ppl=5.89, wps=5783.4, ups=0.09, wpb=64816, bsz=128, num_updates=11801, lr=9.99136e-05, gnorm=2.317, loss_scale=1, train_wall=11, gb_free=2.8, wall=135077 2021-06-20 08:10:14 | INFO | train_inner | epoch 004: 2867 / 3002 loss=2.58, ppl=5.98, wps=5841.4, ups=0.09, wpb=64759, bsz=128, num_updates=11802, lr=9.99136e-05, gnorm=2.1, loss_scale=1, train_wall=11, gb_free=2.8, wall=135089 2021-06-20 08:10:25 | INFO | train_inner | epoch 004: 2868 / 3002 loss=2.607, ppl=6.09, wps=5832.9, ups=0.09, wpb=64799, bsz=128, num_updates=11803, lr=9.99136e-05, gnorm=2.038, loss_scale=1, train_wall=11, gb_free=2.8, wall=135100 2021-06-20 08:10:36 | INFO | train_inner | epoch 004: 2869 / 3002 loss=2.508, ppl=5.69, wps=5807.8, ups=0.09, wpb=64839, bsz=128, num_updates=11804, lr=9.99136e-05, gnorm=2.093, loss_scale=1, train_wall=11, gb_free=2.8, wall=135111 2021-06-20 08:10:48 | INFO | train_inner | epoch 004: 2870 / 3002 loss=2.513, ppl=5.71, wps=5808.3, ups=0.09, wpb=64719, bsz=128, num_updates=11805, lr=9.99136e-05, gnorm=2.055, loss_scale=1, train_wall=11, gb_free=2.8, wall=135122 2021-06-20 08:10:59 | INFO | train_inner | epoch 004: 2871 / 3002 loss=2.707, ppl=6.53, wps=5875.8, ups=0.09, wpb=64765, bsz=128, num_updates=11806, lr=9.99135e-05, gnorm=2.126, loss_scale=1, train_wall=11, gb_free=2.8, wall=135133 2021-06-20 08:11:10 | INFO | train_inner | epoch 004: 2872 / 3002 loss=2.518, ppl=5.73, wps=5832.6, ups=0.09, wpb=64923, bsz=128, num_updates=11807, lr=9.99135e-05, gnorm=2.024, loss_scale=1, train_wall=11, gb_free=2.8, wall=135144 2021-06-20 08:11:21 | INFO | train_inner | epoch 004: 2873 / 3002 loss=2.383, ppl=5.22, wps=5879.4, ups=0.09, wpb=64802, bsz=128, num_updates=11808, lr=9.99135e-05, gnorm=1.983, loss_scale=1, train_wall=11, gb_free=2.8, wall=135155 2021-06-20 08:11:32 | INFO | train_inner | epoch 004: 2874 / 3002 loss=2.68, ppl=6.41, wps=5908.7, ups=0.09, wpb=64795, bsz=128, num_updates=11809, lr=9.99135e-05, gnorm=2.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=135166 2021-06-20 08:11:43 | INFO | train_inner | epoch 004: 2875 / 3002 loss=2.49, ppl=5.62, wps=5907.7, ups=0.09, wpb=64879, bsz=128, num_updates=11810, lr=9.99135e-05, gnorm=1.995, loss_scale=1, train_wall=11, gb_free=2.8, wall=135177 2021-06-20 08:11:54 | INFO | train_inner | epoch 004: 2876 / 3002 loss=2.531, ppl=5.78, wps=5895.8, ups=0.09, wpb=64825, bsz=128, num_updates=11811, lr=9.99135e-05, gnorm=2.029, loss_scale=1, train_wall=11, gb_free=2.8, wall=135188 2021-06-20 08:12:05 | INFO | train_inner | epoch 004: 2877 / 3002 loss=2.586, ppl=6, wps=5647.8, ups=0.09, wpb=64716, bsz=128, num_updates=11812, lr=9.99135e-05, gnorm=2.176, loss_scale=1, train_wall=11, gb_free=2.8, wall=135199 2021-06-20 08:12:16 | INFO | train_inner | epoch 004: 2878 / 3002 loss=2.818, ppl=7.05, wps=5844.5, ups=0.09, wpb=64750, bsz=128, num_updates=11813, lr=9.99135e-05, gnorm=2.081, loss_scale=1, train_wall=11, gb_free=2.8, wall=135211 2021-06-20 08:12:27 | INFO | train_inner | epoch 004: 2879 / 3002 loss=2.644, ppl=6.25, wps=5839.2, ups=0.09, wpb=64880, bsz=128, num_updates=11814, lr=9.99135e-05, gnorm=2.095, loss_scale=1, train_wall=11, gb_free=2.8, wall=135222 2021-06-20 08:12:38 | INFO | train_inner | epoch 004: 2880 / 3002 loss=2.673, ppl=6.38, wps=5872.9, ups=0.09, wpb=64787, bsz=128, num_updates=11815, lr=9.99135e-05, gnorm=1.997, loss_scale=1, train_wall=11, gb_free=2.8, wall=135233 2021-06-20 08:12:50 | INFO | train_inner | epoch 004: 2881 / 3002 loss=2.577, ppl=5.97, wps=5778.2, ups=0.09, wpb=64804, bsz=128, num_updates=11816, lr=9.99135e-05, gnorm=1.997, loss_scale=1, train_wall=11, gb_free=2.8, wall=135244 2021-06-20 08:13:01 | INFO | train_inner | epoch 004: 2882 / 3002 loss=2.625, ppl=6.17, wps=5934.5, ups=0.09, wpb=64829, bsz=128, num_updates=11817, lr=9.99135e-05, gnorm=2.035, loss_scale=1, train_wall=10, gb_free=2.8, wall=135255 2021-06-20 08:13:12 | INFO | train_inner | epoch 004: 2883 / 3002 loss=2.564, ppl=5.91, wps=5816.1, ups=0.09, wpb=64833, bsz=128, num_updates=11818, lr=9.99134e-05, gnorm=1.88, loss_scale=1, train_wall=11, gb_free=2.8, wall=135266 2021-06-20 08:13:23 | INFO | train_inner | epoch 004: 2884 / 3002 loss=2.381, ppl=5.21, wps=5885.4, ups=0.09, wpb=64839, bsz=128, num_updates=11819, lr=9.99134e-05, gnorm=2.005, loss_scale=1, train_wall=11, gb_free=2.8, wall=135277 2021-06-20 08:13:34 | INFO | train_inner | epoch 004: 2885 / 3002 loss=2.604, ppl=6.08, wps=5744.5, ups=0.09, wpb=64827, bsz=128, num_updates=11820, lr=9.99134e-05, gnorm=1.956, loss_scale=1, train_wall=11, gb_free=2.8, wall=135288 2021-06-20 08:13:45 | INFO | train_inner | epoch 004: 2886 / 3002 loss=2.644, ppl=6.25, wps=5903.3, ups=0.09, wpb=64776, bsz=128, num_updates=11821, lr=9.99134e-05, gnorm=2.003, loss_scale=1, train_wall=11, gb_free=2.8, wall=135299 2021-06-20 08:13:56 | INFO | train_inner | epoch 004: 2887 / 3002 loss=2.554, ppl=5.87, wps=5822.4, ups=0.09, wpb=64873, bsz=128, num_updates=11822, lr=9.99134e-05, gnorm=2.081, loss_scale=1, train_wall=11, gb_free=2.8, wall=135310 2021-06-20 08:14:07 | INFO | train_inner | epoch 004: 2888 / 3002 loss=2.541, ppl=5.82, wps=5802.6, ups=0.09, wpb=64810, bsz=128, num_updates=11823, lr=9.99134e-05, gnorm=2.131, loss_scale=1, train_wall=11, gb_free=2.8, wall=135322 2021-06-20 08:14:18 | INFO | train_inner | epoch 004: 2889 / 3002 loss=2.465, ppl=5.52, wps=5857.9, ups=0.09, wpb=64823, bsz=128, num_updates=11824, lr=9.99134e-05, gnorm=2.238, loss_scale=1, train_wall=11, gb_free=2.8, wall=135333 2021-06-20 08:14:29 | INFO | train_inner | epoch 004: 2890 / 3002 loss=2.509, ppl=5.69, wps=5909.5, ups=0.09, wpb=64847, bsz=128, num_updates=11825, lr=9.99134e-05, gnorm=1.967, loss_scale=1, train_wall=11, gb_free=2.8, wall=135344 2021-06-20 08:14:40 | INFO | train_inner | epoch 004: 2891 / 3002 loss=2.603, ppl=6.07, wps=5921.1, ups=0.09, wpb=64705, bsz=128, num_updates=11826, lr=9.99134e-05, gnorm=2.018, loss_scale=1, train_wall=10, gb_free=2.8, wall=135355 2021-06-20 08:14:51 | INFO | train_inner | epoch 004: 2892 / 3002 loss=2.437, ppl=5.41, wps=5907.2, ups=0.09, wpb=64882, bsz=128, num_updates=11827, lr=9.99134e-05, gnorm=1.896, loss_scale=1, train_wall=11, gb_free=2.8, wall=135366 2021-06-20 08:15:02 | INFO | train_inner | epoch 004: 2893 / 3002 loss=2.723, ppl=6.6, wps=5848.8, ups=0.09, wpb=64766, bsz=128, num_updates=11828, lr=9.99134e-05, gnorm=2.082, loss_scale=1, train_wall=11, gb_free=2.8, wall=135377 2021-06-20 08:15:13 | INFO | train_inner | epoch 004: 2894 / 3002 loss=2.485, ppl=5.6, wps=5888.9, ups=0.09, wpb=64798, bsz=128, num_updates=11829, lr=9.99134e-05, gnorm=1.918, loss_scale=1, train_wall=11, gb_free=2.8, wall=135388 2021-06-20 08:15:24 | INFO | train_inner | epoch 004: 2895 / 3002 loss=2.59, ppl=6.02, wps=5829, ups=0.09, wpb=64796, bsz=128, num_updates=11830, lr=9.99134e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=135399 2021-06-20 08:15:36 | INFO | train_inner | epoch 004: 2896 / 3002 loss=2.375, ppl=5.19, wps=5714.4, ups=0.09, wpb=64722, bsz=128, num_updates=11831, lr=9.99133e-05, gnorm=1.943, loss_scale=1, train_wall=11, gb_free=2.8, wall=135410 2021-06-20 08:15:47 | INFO | train_inner | epoch 004: 2897 / 3002 loss=2.433, ppl=5.4, wps=5943, ups=0.09, wpb=64803, bsz=128, num_updates=11832, lr=9.99133e-05, gnorm=1.986, loss_scale=1, train_wall=10, gb_free=2.8, wall=135421 2021-06-20 08:15:58 | INFO | train_inner | epoch 004: 2898 / 3002 loss=2.46, ppl=5.5, wps=5868.1, ups=0.09, wpb=64786, bsz=128, num_updates=11833, lr=9.99133e-05, gnorm=2.004, loss_scale=1, train_wall=11, gb_free=2.8, wall=135432 2021-06-20 08:16:09 | INFO | train_inner | epoch 004: 2899 / 3002 loss=2.684, ppl=6.43, wps=5900, ups=0.09, wpb=64862, bsz=128, num_updates=11834, lr=9.99133e-05, gnorm=2.087, loss_scale=1, train_wall=11, gb_free=2.8, wall=135443 2021-06-20 08:16:20 | INFO | train_inner | epoch 004: 2900 / 3002 loss=2.657, ppl=6.31, wps=5818.2, ups=0.09, wpb=64838, bsz=128, num_updates=11835, lr=9.99133e-05, gnorm=2.104, loss_scale=1, train_wall=11, gb_free=2.8, wall=135454 2021-06-20 08:16:31 | INFO | train_inner | epoch 004: 2901 / 3002 loss=2.537, ppl=5.8, wps=5935.9, ups=0.09, wpb=64860, bsz=128, num_updates=11836, lr=9.99133e-05, gnorm=1.999, loss_scale=1, train_wall=10, gb_free=2.8, wall=135465 2021-06-20 08:16:42 | INFO | train_inner | epoch 004: 2902 / 3002 loss=2.433, ppl=5.4, wps=5736.5, ups=0.09, wpb=64892, bsz=128, num_updates=11837, lr=9.99133e-05, gnorm=2.016, loss_scale=1, train_wall=11, gb_free=2.8, wall=135476 2021-06-20 08:16:53 | INFO | train_inner | epoch 004: 2903 / 3002 loss=2.549, ppl=5.85, wps=5837.2, ups=0.09, wpb=64860, bsz=128, num_updates=11838, lr=9.99133e-05, gnorm=2.055, loss_scale=1, train_wall=11, gb_free=2.8, wall=135488 2021-06-20 08:17:04 | INFO | train_inner | epoch 004: 2904 / 3002 loss=2.456, ppl=5.49, wps=5783.2, ups=0.09, wpb=64828, bsz=128, num_updates=11839, lr=9.99133e-05, gnorm=2.038, loss_scale=1, train_wall=11, gb_free=2.8, wall=135499 2021-06-20 08:17:15 | INFO | train_inner | epoch 004: 2905 / 3002 loss=2.463, ppl=5.51, wps=5908, ups=0.09, wpb=64861, bsz=128, num_updates=11840, lr=9.99133e-05, gnorm=2.063, loss_scale=1, train_wall=10, gb_free=2.8, wall=135510 2021-06-20 08:17:26 | INFO | train_inner | epoch 004: 2906 / 3002 loss=2.732, ppl=6.64, wps=5822.3, ups=0.09, wpb=64811, bsz=128, num_updates=11841, lr=9.99133e-05, gnorm=2.096, loss_scale=1, train_wall=11, gb_free=2.8, wall=135521 2021-06-20 08:17:38 | INFO | train_inner | epoch 004: 2907 / 3002 loss=2.584, ppl=5.99, wps=5875.9, ups=0.09, wpb=64842, bsz=128, num_updates=11842, lr=9.99133e-05, gnorm=1.916, loss_scale=1, train_wall=11, gb_free=2.8, wall=135532 2021-06-20 08:17:49 | INFO | train_inner | epoch 004: 2908 / 3002 loss=2.474, ppl=5.56, wps=5816, ups=0.09, wpb=64842, bsz=128, num_updates=11843, lr=9.99132e-05, gnorm=1.908, loss_scale=1, train_wall=11, gb_free=2.8, wall=135543 2021-06-20 08:18:00 | INFO | train_inner | epoch 004: 2909 / 3002 loss=2.54, ppl=5.81, wps=5763.9, ups=0.09, wpb=64864, bsz=128, num_updates=11844, lr=9.99132e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=135554 2021-06-20 08:18:11 | INFO | train_inner | epoch 004: 2910 / 3002 loss=2.632, ppl=6.2, wps=5789, ups=0.09, wpb=64794, bsz=128, num_updates=11845, lr=9.99132e-05, gnorm=3.831, loss_scale=1, train_wall=11, gb_free=2.8, wall=135565 2021-06-20 08:18:22 | INFO | train_inner | epoch 004: 2911 / 3002 loss=2.537, ppl=5.8, wps=5793.3, ups=0.09, wpb=64833, bsz=128, num_updates=11846, lr=9.99132e-05, gnorm=2.02, loss_scale=1, train_wall=11, gb_free=2.8, wall=135577 2021-06-20 08:18:33 | INFO | train_inner | epoch 004: 2912 / 3002 loss=2.502, ppl=5.66, wps=5884.1, ups=0.09, wpb=64867, bsz=128, num_updates=11847, lr=9.99132e-05, gnorm=2.023, loss_scale=1, train_wall=11, gb_free=2.8, wall=135588 2021-06-20 08:18:44 | INFO | train_inner | epoch 004: 2913 / 3002 loss=2.622, ppl=6.15, wps=5810.7, ups=0.09, wpb=64855, bsz=128, num_updates=11848, lr=9.99132e-05, gnorm=2.158, loss_scale=1, train_wall=11, gb_free=2.8, wall=135599 2021-06-20 08:18:56 | INFO | train_inner | epoch 004: 2914 / 3002 loss=2.458, ppl=5.49, wps=5885.9, ups=0.09, wpb=64834, bsz=128, num_updates=11849, lr=9.99132e-05, gnorm=2.029, loss_scale=1, train_wall=11, gb_free=2.8, wall=135610 2021-06-20 08:19:07 | INFO | train_inner | epoch 004: 2915 / 3002 loss=2.672, ppl=6.37, wps=5892.8, ups=0.09, wpb=64843, bsz=128, num_updates=11850, lr=9.99132e-05, gnorm=2.054, loss_scale=1, train_wall=11, gb_free=2.8, wall=135621 2021-06-20 08:19:18 | INFO | train_inner | epoch 004: 2916 / 3002 loss=2.595, ppl=6.04, wps=5846.8, ups=0.09, wpb=64880, bsz=128, num_updates=11851, lr=9.99132e-05, gnorm=1.989, loss_scale=1, train_wall=11, gb_free=2.8, wall=135632 2021-06-20 08:19:29 | INFO | train_inner | epoch 004: 2917 / 3002 loss=2.544, ppl=5.83, wps=5900.2, ups=0.09, wpb=64798, bsz=128, num_updates=11852, lr=9.99132e-05, gnorm=2.028, loss_scale=1, train_wall=11, gb_free=2.8, wall=135643 2021-06-20 08:19:39 | INFO | train_inner | epoch 004: 2918 / 3002 loss=2.476, ppl=5.56, wps=6017.6, ups=0.09, wpb=64833, bsz=128, num_updates=11853, lr=9.99132e-05, gnorm=1.933, loss_scale=1, train_wall=10, gb_free=2.8, wall=135654 2021-06-20 08:19:50 | INFO | train_inner | epoch 004: 2919 / 3002 loss=2.472, ppl=5.55, wps=5903.6, ups=0.09, wpb=64818, bsz=128, num_updates=11854, lr=9.99132e-05, gnorm=1.933, loss_scale=1, train_wall=11, gb_free=2.8, wall=135665 2021-06-20 08:20:01 | INFO | train_inner | epoch 004: 2920 / 3002 loss=2.593, ppl=6.03, wps=5862.5, ups=0.09, wpb=64800, bsz=128, num_updates=11855, lr=9.99132e-05, gnorm=2.15, loss_scale=2, train_wall=11, gb_free=2.8, wall=135676 2021-06-20 08:20:12 | INFO | train_inner | epoch 004: 2921 / 3002 loss=2.578, ppl=5.97, wps=5881.7, ups=0.09, wpb=64803, bsz=128, num_updates=11856, lr=9.99131e-05, gnorm=1.934, loss_scale=2, train_wall=11, gb_free=2.8, wall=135687 2021-06-20 08:20:24 | INFO | train_inner | epoch 004: 2922 / 3002 loss=2.595, ppl=6.04, wps=5839.7, ups=0.09, wpb=64858, bsz=128, num_updates=11857, lr=9.99131e-05, gnorm=1.996, loss_scale=2, train_wall=11, gb_free=2.8, wall=135698 2021-06-20 08:20:35 | INFO | train_inner | epoch 004: 2923 / 3002 loss=2.477, ppl=5.57, wps=5838.7, ups=0.09, wpb=64837, bsz=128, num_updates=11858, lr=9.99131e-05, gnorm=1.968, loss_scale=2, train_wall=11, gb_free=2.8, wall=135709 2021-06-20 08:20:46 | INFO | train_inner | epoch 004: 2924 / 3002 loss=2.631, ppl=6.2, wps=5819.5, ups=0.09, wpb=64695, bsz=128, num_updates=11859, lr=9.99131e-05, gnorm=2.082, loss_scale=2, train_wall=11, gb_free=2.8, wall=135720 2021-06-20 08:20:57 | INFO | train_inner | epoch 004: 2925 / 3002 loss=2.562, ppl=5.91, wps=5930.7, ups=0.09, wpb=64901, bsz=128, num_updates=11860, lr=9.99131e-05, gnorm=2.053, loss_scale=2, train_wall=10, gb_free=2.8, wall=135731 2021-06-20 08:21:08 | INFO | train_inner | epoch 004: 2926 / 3002 loss=2.587, ppl=6.01, wps=5888.8, ups=0.09, wpb=64744, bsz=128, num_updates=11861, lr=9.99131e-05, gnorm=2.067, loss_scale=2, train_wall=11, gb_free=2.8, wall=135742 2021-06-20 08:21:19 | INFO | train_inner | epoch 004: 2927 / 3002 loss=2.675, ppl=6.39, wps=5898.3, ups=0.09, wpb=64849, bsz=128, num_updates=11862, lr=9.99131e-05, gnorm=2.577, loss_scale=2, train_wall=11, gb_free=2.8, wall=135753 2021-06-20 08:21:30 | INFO | train_inner | epoch 004: 2928 / 3002 loss=2.54, ppl=5.82, wps=5949.7, ups=0.09, wpb=64824, bsz=128, num_updates=11863, lr=9.99131e-05, gnorm=1.937, loss_scale=2, train_wall=10, gb_free=2.8, wall=135764 2021-06-20 08:21:41 | INFO | train_inner | epoch 004: 2929 / 3002 loss=2.71, ppl=6.55, wps=5784.9, ups=0.09, wpb=64717, bsz=128, num_updates=11864, lr=9.99131e-05, gnorm=1.985, loss_scale=2, train_wall=11, gb_free=2.8, wall=135775 2021-06-20 08:21:52 | INFO | train_inner | epoch 004: 2930 / 3002 loss=2.578, ppl=5.97, wps=5765.3, ups=0.09, wpb=64778, bsz=128, num_updates=11865, lr=9.99131e-05, gnorm=1.981, loss_scale=2, train_wall=11, gb_free=2.8, wall=135786 2021-06-20 08:22:03 | INFO | train_inner | epoch 004: 2931 / 3002 loss=2.614, ppl=6.12, wps=5931.3, ups=0.09, wpb=64864, bsz=128, num_updates=11866, lr=9.99131e-05, gnorm=1.979, loss_scale=2, train_wall=10, gb_free=2.8, wall=135797 2021-06-20 08:22:14 | INFO | train_inner | epoch 004: 2932 / 3002 loss=2.557, ppl=5.89, wps=5831.1, ups=0.09, wpb=64817, bsz=128, num_updates=11867, lr=9.99131e-05, gnorm=2.025, loss_scale=2, train_wall=11, gb_free=2.8, wall=135808 2021-06-20 08:22:25 | INFO | train_inner | epoch 004: 2933 / 3002 loss=2.741, ppl=6.69, wps=5917.1, ups=0.09, wpb=64825, bsz=128, num_updates=11868, lr=9.9913e-05, gnorm=2.014, loss_scale=2, train_wall=11, gb_free=2.8, wall=135819 2021-06-20 08:22:36 | INFO | train_inner | epoch 004: 2934 / 3002 loss=2.481, ppl=5.58, wps=5995.7, ups=0.09, wpb=64832, bsz=128, num_updates=11869, lr=9.9913e-05, gnorm=1.967, loss_scale=2, train_wall=10, gb_free=2.8, wall=135830 2021-06-20 08:22:47 | INFO | train_inner | epoch 004: 2935 / 3002 loss=2.577, ppl=5.97, wps=5733.3, ups=0.09, wpb=64792, bsz=128, num_updates=11870, lr=9.9913e-05, gnorm=1.992, loss_scale=2, train_wall=11, gb_free=2.8, wall=135841 2021-06-20 08:22:58 | INFO | train_inner | epoch 004: 2936 / 3002 loss=2.474, ppl=5.56, wps=6018.3, ups=0.09, wpb=64870, bsz=128, num_updates=11871, lr=9.9913e-05, gnorm=1.951, loss_scale=2, train_wall=10, gb_free=2.8, wall=135852 2021-06-20 08:23:09 | INFO | train_inner | epoch 004: 2937 / 3002 loss=2.517, ppl=5.72, wps=5955.4, ups=0.09, wpb=64837, bsz=128, num_updates=11872, lr=9.9913e-05, gnorm=1.986, loss_scale=2, train_wall=10, gb_free=2.8, wall=135863 2021-06-20 08:23:20 | INFO | train_inner | epoch 004: 2938 / 3002 loss=2.385, ppl=5.22, wps=5788.9, ups=0.09, wpb=64811, bsz=128, num_updates=11873, lr=9.9913e-05, gnorm=2.007, loss_scale=2, train_wall=11, gb_free=2.8, wall=135874 2021-06-20 08:23:31 | INFO | train_inner | epoch 004: 2939 / 3002 loss=2.624, ppl=6.17, wps=5845.9, ups=0.09, wpb=64825, bsz=128, num_updates=11874, lr=9.9913e-05, gnorm=2.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=135885 2021-06-20 08:23:42 | INFO | train_inner | epoch 004: 2940 / 3002 loss=2.642, ppl=6.24, wps=5848.8, ups=0.09, wpb=64829, bsz=128, num_updates=11875, lr=9.9913e-05, gnorm=2.023, loss_scale=2, train_wall=11, gb_free=2.8, wall=135896 2021-06-20 08:23:53 | INFO | train_inner | epoch 004: 2941 / 3002 loss=2.649, ppl=6.27, wps=5792.2, ups=0.09, wpb=64740, bsz=128, num_updates=11876, lr=9.9913e-05, gnorm=1.959, loss_scale=2, train_wall=11, gb_free=2.8, wall=135908 2021-06-20 08:24:04 | INFO | train_inner | epoch 004: 2942 / 3002 loss=2.442, ppl=5.43, wps=5900.4, ups=0.09, wpb=64814, bsz=128, num_updates=11877, lr=9.9913e-05, gnorm=1.934, loss_scale=2, train_wall=11, gb_free=2.8, wall=135919 2021-06-20 08:24:15 | INFO | train_inner | epoch 004: 2943 / 3002 loss=2.584, ppl=6, wps=5885.3, ups=0.09, wpb=64897, bsz=128, num_updates=11878, lr=9.9913e-05, gnorm=2.027, loss_scale=2, train_wall=11, gb_free=2.8, wall=135930 2021-06-20 08:24:26 | INFO | train_inner | epoch 004: 2944 / 3002 loss=2.514, ppl=5.71, wps=5838.4, ups=0.09, wpb=64779, bsz=128, num_updates=11879, lr=9.9913e-05, gnorm=6.123, loss_scale=2, train_wall=11, gb_free=2.8, wall=135941 2021-06-20 08:24:38 | INFO | train_inner | epoch 004: 2945 / 3002 loss=2.801, ppl=6.97, wps=5774, ups=0.09, wpb=64770, bsz=128, num_updates=11880, lr=9.9913e-05, gnorm=2.03, loss_scale=2, train_wall=11, gb_free=2.8, wall=135952 2021-06-20 08:24:49 | INFO | train_inner | epoch 004: 2946 / 3002 loss=2.417, ppl=5.34, wps=5899.9, ups=0.09, wpb=64792, bsz=128, num_updates=11881, lr=9.99129e-05, gnorm=2.034, loss_scale=2, train_wall=11, gb_free=2.8, wall=135963 2021-06-20 08:25:00 | INFO | train_inner | epoch 004: 2947 / 3002 loss=2.578, ppl=5.97, wps=5915.9, ups=0.09, wpb=64778, bsz=128, num_updates=11882, lr=9.99129e-05, gnorm=2.35, loss_scale=2, train_wall=10, gb_free=2.8, wall=135974 2021-06-20 08:25:11 | INFO | train_inner | epoch 004: 2948 / 3002 loss=2.718, ppl=6.58, wps=5907.5, ups=0.09, wpb=64841, bsz=128, num_updates=11883, lr=9.99129e-05, gnorm=2.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=135985 2021-06-20 08:25:22 | INFO | train_inner | epoch 004: 2949 / 3002 loss=2.562, ppl=5.9, wps=5796.9, ups=0.09, wpb=64816, bsz=128, num_updates=11884, lr=9.99129e-05, gnorm=1.985, loss_scale=2, train_wall=11, gb_free=2.8, wall=135996 2021-06-20 08:25:33 | INFO | train_inner | epoch 004: 2950 / 3002 loss=2.491, ppl=5.62, wps=5819.1, ups=0.09, wpb=64844, bsz=128, num_updates=11885, lr=9.99129e-05, gnorm=1.928, loss_scale=2, train_wall=11, gb_free=2.8, wall=136007 2021-06-20 08:25:44 | INFO | train_inner | epoch 004: 2951 / 3002 loss=2.632, ppl=6.2, wps=5810.4, ups=0.09, wpb=64818, bsz=128, num_updates=11886, lr=9.99129e-05, gnorm=1.939, loss_scale=2, train_wall=11, gb_free=2.8, wall=136018 2021-06-20 08:25:55 | INFO | train_inner | epoch 004: 2952 / 3002 loss=2.575, ppl=5.96, wps=5861.4, ups=0.09, wpb=64810, bsz=128, num_updates=11887, lr=9.99129e-05, gnorm=2.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=136029 2021-06-20 08:26:06 | INFO | train_inner | epoch 004: 2953 / 3002 loss=2.645, ppl=6.25, wps=5910.4, ups=0.09, wpb=64863, bsz=128, num_updates=11888, lr=9.99129e-05, gnorm=2.097, loss_scale=2, train_wall=11, gb_free=2.8, wall=136040 2021-06-20 08:26:17 | INFO | train_inner | epoch 004: 2954 / 3002 loss=2.475, ppl=5.56, wps=5920.2, ups=0.09, wpb=64820, bsz=128, num_updates=11889, lr=9.99129e-05, gnorm=2.002, loss_scale=2, train_wall=10, gb_free=2.8, wall=136051 2021-06-20 08:26:28 | INFO | train_inner | epoch 004: 2955 / 3002 loss=2.643, ppl=6.25, wps=5841.6, ups=0.09, wpb=64845, bsz=128, num_updates=11890, lr=9.99129e-05, gnorm=2.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=136062 2021-06-20 08:26:39 | INFO | train_inner | epoch 004: 2956 / 3002 loss=2.509, ppl=5.69, wps=5846.3, ups=0.09, wpb=64881, bsz=128, num_updates=11891, lr=9.99129e-05, gnorm=2.039, loss_scale=2, train_wall=11, gb_free=2.8, wall=136074 2021-06-20 08:26:50 | INFO | train_inner | epoch 004: 2957 / 3002 loss=2.608, ppl=6.1, wps=5974.2, ups=0.09, wpb=64789, bsz=128, num_updates=11892, lr=9.99129e-05, gnorm=1.991, loss_scale=2, train_wall=10, gb_free=2.8, wall=136084 2021-06-20 08:27:01 | INFO | train_inner | epoch 004: 2958 / 3002 loss=2.603, ppl=6.08, wps=5862.7, ups=0.09, wpb=64816, bsz=128, num_updates=11893, lr=9.99128e-05, gnorm=2.03, loss_scale=2, train_wall=11, gb_free=2.8, wall=136095 2021-06-20 08:27:12 | INFO | train_inner | epoch 004: 2959 / 3002 loss=2.499, ppl=5.65, wps=5772.9, ups=0.09, wpb=64858, bsz=128, num_updates=11894, lr=9.99128e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=136107 2021-06-20 08:27:23 | INFO | train_inner | epoch 004: 2960 / 3002 loss=2.515, ppl=5.71, wps=5957.1, ups=0.09, wpb=64841, bsz=128, num_updates=11895, lr=9.99128e-05, gnorm=2.053, loss_scale=2, train_wall=10, gb_free=2.8, wall=136118 2021-06-20 08:27:34 | INFO | train_inner | epoch 004: 2961 / 3002 loss=2.328, ppl=5.02, wps=5830.1, ups=0.09, wpb=64844, bsz=128, num_updates=11896, lr=9.99128e-05, gnorm=1.958, loss_scale=2, train_wall=11, gb_free=2.8, wall=136129 2021-06-20 08:27:45 | INFO | train_inner | epoch 004: 2962 / 3002 loss=2.626, ppl=6.17, wps=5870.2, ups=0.09, wpb=64899, bsz=128, num_updates=11897, lr=9.99128e-05, gnorm=1.941, loss_scale=2, train_wall=11, gb_free=2.8, wall=136140 2021-06-20 08:27:56 | INFO | train_inner | epoch 004: 2963 / 3002 loss=2.543, ppl=5.83, wps=5895, ups=0.09, wpb=64788, bsz=128, num_updates=11898, lr=9.99128e-05, gnorm=1.955, loss_scale=2, train_wall=10, gb_free=2.8, wall=136151 2021-06-20 08:28:08 | INFO | train_inner | epoch 004: 2964 / 3002 loss=2.592, ppl=6.03, wps=5831.3, ups=0.09, wpb=64851, bsz=128, num_updates=11899, lr=9.99128e-05, gnorm=2.169, loss_scale=2, train_wall=11, gb_free=2.8, wall=136162 2021-06-20 08:28:18 | INFO | train_inner | epoch 004: 2965 / 3002 loss=2.634, ppl=6.21, wps=5935.9, ups=0.09, wpb=64934, bsz=128, num_updates=11900, lr=9.99128e-05, gnorm=1.993, loss_scale=2, train_wall=10, gb_free=2.8, wall=136173 2021-06-20 08:28:30 | INFO | train_inner | epoch 004: 2966 / 3002 loss=2.517, ppl=5.72, wps=5829.3, ups=0.09, wpb=64829, bsz=128, num_updates=11901, lr=9.99128e-05, gnorm=2.129, loss_scale=2, train_wall=11, gb_free=2.8, wall=136184 2021-06-20 08:28:41 | INFO | train_inner | epoch 004: 2967 / 3002 loss=2.539, ppl=5.81, wps=5820.2, ups=0.09, wpb=64903, bsz=128, num_updates=11902, lr=9.99128e-05, gnorm=1.972, loss_scale=2, train_wall=11, gb_free=2.8, wall=136195 2021-06-20 08:28:52 | INFO | train_inner | epoch 004: 2968 / 3002 loss=2.532, ppl=5.78, wps=5783.4, ups=0.09, wpb=64773, bsz=128, num_updates=11903, lr=9.99128e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=136206 2021-06-20 08:29:03 | INFO | train_inner | epoch 004: 2969 / 3002 loss=2.453, ppl=5.48, wps=5902.1, ups=0.09, wpb=64913, bsz=128, num_updates=11904, lr=9.99128e-05, gnorm=2.06, loss_scale=2, train_wall=11, gb_free=2.8, wall=136217 2021-06-20 08:29:14 | INFO | train_inner | epoch 004: 2970 / 3002 loss=2.589, ppl=6.02, wps=5896.8, ups=0.09, wpb=64836, bsz=128, num_updates=11905, lr=9.99128e-05, gnorm=2.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=136228 2021-06-20 08:29:25 | INFO | train_inner | epoch 004: 2971 / 3002 loss=2.589, ppl=6.02, wps=5869.6, ups=0.09, wpb=64786, bsz=128, num_updates=11906, lr=9.99127e-05, gnorm=1.984, loss_scale=2, train_wall=11, gb_free=2.8, wall=136239 2021-06-20 08:29:36 | INFO | train_inner | epoch 004: 2972 / 3002 loss=2.439, ppl=5.42, wps=5890.8, ups=0.09, wpb=64876, bsz=128, num_updates=11907, lr=9.99127e-05, gnorm=1.996, loss_scale=2, train_wall=11, gb_free=2.8, wall=136250 2021-06-20 08:29:47 | INFO | train_inner | epoch 004: 2973 / 3002 loss=2.538, ppl=5.81, wps=6055.4, ups=0.09, wpb=64868, bsz=128, num_updates=11908, lr=9.99127e-05, gnorm=2.079, loss_scale=2, train_wall=10, gb_free=2.8, wall=136261 2021-06-20 08:29:58 | INFO | train_inner | epoch 004: 2974 / 3002 loss=2.475, ppl=5.56, wps=5934.6, ups=0.09, wpb=64850, bsz=128, num_updates=11909, lr=9.99127e-05, gnorm=1.977, loss_scale=2, train_wall=10, gb_free=2.8, wall=136272 2021-06-20 08:30:09 | INFO | train_inner | epoch 004: 2975 / 3002 loss=2.708, ppl=6.53, wps=5917.8, ups=0.09, wpb=64757, bsz=128, num_updates=11910, lr=9.99127e-05, gnorm=2.076, loss_scale=2, train_wall=10, gb_free=2.8, wall=136283 2021-06-20 08:30:20 | INFO | train_inner | epoch 004: 2976 / 3002 loss=2.549, ppl=5.85, wps=5930, ups=0.09, wpb=64799, bsz=128, num_updates=11911, lr=9.99127e-05, gnorm=2.064, loss_scale=2, train_wall=10, gb_free=2.8, wall=136294 2021-06-20 08:30:30 | INFO | train_inner | epoch 004: 2977 / 3002 loss=2.516, ppl=5.72, wps=5935.3, ups=0.09, wpb=64771, bsz=128, num_updates=11912, lr=9.99127e-05, gnorm=1.963, loss_scale=2, train_wall=10, gb_free=2.8, wall=136305 2021-06-20 08:30:42 | INFO | train_inner | epoch 004: 2978 / 3002 loss=2.678, ppl=6.4, wps=5754.1, ups=0.09, wpb=64832, bsz=128, num_updates=11913, lr=9.99127e-05, gnorm=1.999, loss_scale=2, train_wall=11, gb_free=2.8, wall=136316 2021-06-20 08:30:53 | INFO | train_inner | epoch 004: 2979 / 3002 loss=2.593, ppl=6.03, wps=5848.1, ups=0.09, wpb=64799, bsz=128, num_updates=11914, lr=9.99127e-05, gnorm=2.157, loss_scale=2, train_wall=11, gb_free=2.8, wall=136327 2021-06-20 08:31:04 | INFO | train_inner | epoch 004: 2980 / 3002 loss=2.612, ppl=6.11, wps=5852, ups=0.09, wpb=64857, bsz=128, num_updates=11915, lr=9.99127e-05, gnorm=1.978, loss_scale=2, train_wall=11, gb_free=2.8, wall=136338 2021-06-20 08:31:15 | INFO | train_inner | epoch 004: 2981 / 3002 loss=2.585, ppl=6, wps=5860.6, ups=0.09, wpb=64859, bsz=128, num_updates=11916, lr=9.99127e-05, gnorm=1.927, loss_scale=2, train_wall=11, gb_free=2.8, wall=136349 2021-06-20 08:31:26 | INFO | train_inner | epoch 004: 2982 / 3002 loss=2.478, ppl=5.57, wps=5847.3, ups=0.09, wpb=64793, bsz=128, num_updates=11917, lr=9.99127e-05, gnorm=2.042, loss_scale=2, train_wall=11, gb_free=2.8, wall=136360 2021-06-20 08:31:37 | INFO | train_inner | epoch 004: 2983 / 3002 loss=2.529, ppl=5.77, wps=5823.8, ups=0.09, wpb=64843, bsz=128, num_updates=11918, lr=9.99126e-05, gnorm=1.911, loss_scale=2, train_wall=11, gb_free=2.8, wall=136371 2021-06-20 08:31:48 | INFO | train_inner | epoch 004: 2984 / 3002 loss=2.451, ppl=5.47, wps=5888.5, ups=0.09, wpb=64900, bsz=128, num_updates=11919, lr=9.99126e-05, gnorm=1.996, loss_scale=2, train_wall=11, gb_free=2.8, wall=136382 2021-06-20 08:31:59 | INFO | train_inner | epoch 004: 2985 / 3002 loss=2.61, ppl=6.11, wps=5804.9, ups=0.09, wpb=64782, bsz=128, num_updates=11920, lr=9.99126e-05, gnorm=2.031, loss_scale=2, train_wall=11, gb_free=2.8, wall=136394 2021-06-20 08:32:10 | INFO | train_inner | epoch 004: 2986 / 3002 loss=2.469, ppl=5.54, wps=5918.1, ups=0.09, wpb=64774, bsz=128, num_updates=11921, lr=9.99126e-05, gnorm=2.038, loss_scale=2, train_wall=10, gb_free=2.8, wall=136405 2021-06-20 08:32:22 | INFO | train_inner | epoch 004: 2987 / 3002 loss=2.596, ppl=6.04, wps=5738.8, ups=0.09, wpb=64773, bsz=128, num_updates=11922, lr=9.99126e-05, gnorm=2.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=136416 2021-06-20 08:32:32 | INFO | train_inner | epoch 004: 2988 / 3002 loss=2.706, ppl=6.53, wps=5944.8, ups=0.09, wpb=64824, bsz=128, num_updates=11923, lr=9.99126e-05, gnorm=2.039, loss_scale=2, train_wall=10, gb_free=2.8, wall=136427 2021-06-20 08:32:43 | INFO | train_inner | epoch 004: 2989 / 3002 loss=2.608, ppl=6.1, wps=5894, ups=0.09, wpb=64878, bsz=128, num_updates=11924, lr=9.99126e-05, gnorm=2.062, loss_scale=2, train_wall=11, gb_free=2.8, wall=136438 2021-06-20 08:32:55 | INFO | train_inner | epoch 004: 2990 / 3002 loss=2.583, ppl=5.99, wps=5839.6, ups=0.09, wpb=64844, bsz=128, num_updates=11925, lr=9.99126e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=136449 2021-06-20 08:33:06 | INFO | train_inner | epoch 004: 2991 / 3002 loss=2.57, ppl=5.94, wps=5739, ups=0.09, wpb=64772, bsz=128, num_updates=11926, lr=9.99126e-05, gnorm=2.159, loss_scale=2, train_wall=11, gb_free=2.8, wall=136460 2021-06-20 08:33:17 | INFO | train_inner | epoch 004: 2992 / 3002 loss=2.446, ppl=5.45, wps=5835.4, ups=0.09, wpb=64773, bsz=128, num_updates=11927, lr=9.99126e-05, gnorm=1.932, loss_scale=2, train_wall=11, gb_free=2.8, wall=136471 2021-06-20 08:33:28 | INFO | train_inner | epoch 004: 2993 / 3002 loss=2.431, ppl=5.39, wps=5818.2, ups=0.09, wpb=64830, bsz=128, num_updates=11928, lr=9.99126e-05, gnorm=1.965, loss_scale=2, train_wall=11, gb_free=2.8, wall=136482 2021-06-20 08:33:39 | INFO | train_inner | epoch 004: 2994 / 3002 loss=2.571, ppl=5.94, wps=5919.2, ups=0.09, wpb=64823, bsz=128, num_updates=11929, lr=9.99126e-05, gnorm=2.164, loss_scale=2, train_wall=11, gb_free=2.8, wall=136493 2021-06-20 08:33:50 | INFO | train_inner | epoch 004: 2995 / 3002 loss=2.415, ppl=5.33, wps=5868.1, ups=0.09, wpb=64840, bsz=128, num_updates=11930, lr=9.99126e-05, gnorm=2.305, loss_scale=2, train_wall=11, gb_free=2.8, wall=136504 2021-06-20 08:34:01 | INFO | train_inner | epoch 004: 2996 / 3002 loss=2.555, ppl=5.88, wps=5921.5, ups=0.09, wpb=64798, bsz=128, num_updates=11931, lr=9.99125e-05, gnorm=2.065, loss_scale=2, train_wall=10, gb_free=2.8, wall=136515 2021-06-20 08:34:12 | INFO | train_inner | epoch 004: 2997 / 3002 loss=2.585, ppl=6, wps=5883.6, ups=0.09, wpb=64897, bsz=128, num_updates=11932, lr=9.99125e-05, gnorm=2.079, loss_scale=2, train_wall=11, gb_free=2.8, wall=136526 2021-06-20 08:34:23 | INFO | train_inner | epoch 004: 2998 / 3002 loss=2.469, ppl=5.54, wps=5887.8, ups=0.09, wpb=64854, bsz=128, num_updates=11933, lr=9.99125e-05, gnorm=2.15, loss_scale=2, train_wall=11, gb_free=2.8, wall=136537 2021-06-20 08:34:34 | INFO | train_inner | epoch 004: 2999 / 3002 loss=2.476, ppl=5.56, wps=5907.8, ups=0.09, wpb=64832, bsz=128, num_updates=11934, lr=9.99125e-05, gnorm=1.925, loss_scale=2, train_wall=11, gb_free=2.8, wall=136548 2021-06-20 08:34:45 | INFO | train_inner | epoch 004: 3000 / 3002 loss=2.497, ppl=5.64, wps=5837.1, ups=0.09, wpb=64749, bsz=128, num_updates=11935, lr=9.99125e-05, gnorm=1.949, loss_scale=2, train_wall=11, gb_free=2.8, wall=136559 2021-06-20 08:34:56 | INFO | train_inner | epoch 004: 3001 / 3002 loss=2.416, ppl=5.34, wps=5797.1, ups=0.09, wpb=64824, bsz=128, num_updates=11936, lr=9.99125e-05, gnorm=1.942, loss_scale=2, train_wall=11, gb_free=2.8, wall=136571 2021-06-20 08:35:03 | INFO | train_inner | epoch 004: 3002 / 3002 loss=2.395, ppl=5.26, wps=5755.2, ups=0.16, wpb=36396, bsz=72, num_updates=11937, lr=9.99125e-05, gnorm=2.61, loss_scale=2, train_wall=6, gb_free=2.8, wall=136577 2021-06-20 08:35:03 | INFO | fairseq_cli.train | begin validation on "valid" subset 2021-06-20 08:49:54 | INFO | valid | epoch 004 | valid on 'valid' subset | loss 2.405 | ppl 5.3 | wps 19798.9 | wpb 506.5 | bsz 1 | num_updates 11937 | best_loss 2.405 2021-06-20 08:49:54 | INFO | fairseq.checkpoint_utils | Preparing to save checkpoint for epoch 4 @ 11937 updates 2021-06-20 08:49:54 | INFO | fairseq.trainer | Saving checkpoint to checkpoints/checkpoint4.pt 2021-06-20 08:50:06 | INFO | fairseq.trainer | Finished saving checkpoint to checkpoints/checkpoint4.pt 2021-06-20 08:56:00 | INFO | fairseq.checkpoint_utils | Saved checkpoint checkpoints/checkpoint4.pt (epoch 4 @ 11937 updates, score 2.405) (writing took 366.0893832309812 seconds) 2021-06-20 08:56:05 | INFO | fairseq_cli.train | end of epoch 4 (average epoch stats below) 2021-06-20 08:56:05 | INFO | train | epoch 004 | loss 2.584 | ppl 6 | wps 5614.7 | ups 0.09 | wpb 64819.4 | bsz 128 | num_updates 11937 | lr 9.99125e-05 | gnorm 2.584 | loss_scale 2 | train_wall 31853 | gb_free 2.8 | wall 137839 2021-06-20 08:56:05 | INFO | fairseq.trainer | begin training epoch 5 2021-06-20 08:56:05 | INFO | fairseq_cli.train | Start iterating over samples 2021-06-20 08:56:16 | INFO | train_inner | epoch 005: 1 / 3002 loss=2.48, ppl=5.58, wps=50.9, ups=0, wpb=64737, bsz=128, num_updates=11938, lr=9.99125e-05, gnorm=2.045, loss_scale=2, train_wall=10, gb_free=2.8, wall=137850 2021-06-20 08:56:26 | INFO | train_inner | epoch 005: 2 / 3002 loss=2.668, ppl=6.35, wps=6195.2, ups=0.1, wpb=64796, bsz=128, num_updates=11939, lr=9.99125e-05, gnorm=2.046, loss_scale=2, train_wall=10, gb_free=2.8, wall=137861 2021-06-20 08:56:37 | INFO | train_inner | epoch 005: 3 / 3002 loss=2.558, ppl=5.89, wps=6234.1, ups=0.1, wpb=64911, bsz=128, num_updates=11940, lr=9.99125e-05, gnorm=2.097, loss_scale=2, train_wall=10, gb_free=2.8, wall=137871 2021-06-20 08:56:47 | INFO | train_inner | epoch 005: 4 / 3002 loss=2.412, ppl=5.32, wps=6313.1, ups=0.1, wpb=64855, bsz=128, num_updates=11941, lr=9.99125e-05, gnorm=1.938, loss_scale=2, train_wall=10, gb_free=2.8, wall=137881 2021-06-20 08:56:57 | INFO | train_inner | epoch 005: 5 / 3002 loss=2.434, ppl=5.4, wps=6257, ups=0.1, wpb=64869, bsz=128, num_updates=11942, lr=9.99125e-05, gnorm=1.996, loss_scale=2, train_wall=10, gb_free=2.8, wall=137892 2021-06-20 08:57:08 | INFO | train_inner | epoch 005: 6 / 3002 loss=2.461, ppl=5.5, wps=6194.2, ups=0.1, wpb=64805, bsz=128, num_updates=11943, lr=9.99124e-05, gnorm=2.074, loss_scale=2, train_wall=10, gb_free=2.8, wall=137902 2021-06-20 08:57:18 | INFO | train_inner | epoch 005: 7 / 3002 loss=2.403, ppl=5.29, wps=6071.5, ups=0.09, wpb=64854, bsz=128, num_updates=11944, lr=9.99124e-05, gnorm=2.019, loss_scale=2, train_wall=10, gb_free=2.8, wall=137913 2021-06-20 08:57:29 | INFO | train_inner | epoch 005: 8 / 3002 loss=2.508, ppl=5.69, wps=5988.3, ups=0.09, wpb=64883, bsz=128, num_updates=11945, lr=9.99124e-05, gnorm=2.024, loss_scale=2, train_wall=10, gb_free=2.8, wall=137924 2021-06-20 08:57:40 | INFO | train_inner | epoch 005: 9 / 3002 loss=2.441, ppl=5.43, wps=5876.4, ups=0.09, wpb=64864, bsz=128, num_updates=11946, lr=9.99124e-05, gnorm=1.975, loss_scale=2, train_wall=11, gb_free=2.8, wall=137935 2021-06-20 08:57:51 | INFO | train_inner | epoch 005: 10 / 3002 loss=2.49, ppl=5.62, wps=5908.4, ups=0.09, wpb=64797, bsz=128, num_updates=11947, lr=9.99124e-05, gnorm=1.936, loss_scale=2, train_wall=11, gb_free=2.8, wall=137946 2021-06-20 08:58:02 | INFO | train_inner | epoch 005: 11 / 3002 loss=2.404, ppl=5.29, wps=5825.7, ups=0.09, wpb=64782, bsz=128, num_updates=11948, lr=9.99124e-05, gnorm=1.974, loss_scale=2, train_wall=11, gb_free=2.8, wall=137957 2021-06-20 08:58:13 | INFO | train_inner | epoch 005: 12 / 3002 loss=2.525, ppl=5.75, wps=6032, ups=0.09, wpb=64828, bsz=128, num_updates=11949, lr=9.99124e-05, gnorm=2.08, loss_scale=2, train_wall=10, gb_free=2.8, wall=137967 2021-06-20 08:58:24 | INFO | train_inner | epoch 005: 13 / 3002 loss=2.615, ppl=6.13, wps=5809.3, ups=0.09, wpb=64772, bsz=128, num_updates=11950, lr=9.99124e-05, gnorm=2.069, loss_scale=2, train_wall=11, gb_free=2.8, wall=137979 2021-06-20 08:58:35 | INFO | train_inner | epoch 005: 14 / 3002 loss=2.355, ppl=5.12, wps=6020.8, ups=0.09, wpb=64855, bsz=128, num_updates=11951, lr=9.99124e-05, gnorm=1.997, loss_scale=2, train_wall=10, gb_free=2.8, wall=137989 2021-06-20 08:58:46 | INFO | train_inner | epoch 005: 15 / 3002 loss=2.475, ppl=5.56, wps=5949.7, ups=0.09, wpb=64878, bsz=128, num_updates=11952, lr=9.99124e-05, gnorm=1.986, loss_scale=2, train_wall=10, gb_free=2.8, wall=138000 2021-06-20 08:58:57 | INFO | train_inner | epoch 005: 16 / 3002 loss=2.464, ppl=5.52, wps=5989.8, ups=0.09, wpb=64852, bsz=128, num_updates=11953, lr=9.99124e-05, gnorm=2.262, loss_scale=2, train_wall=10, gb_free=2.8, wall=138011 2021-06-20 08:59:08 | INFO | train_inner | epoch 005: 17 / 3002 loss=2.456, ppl=5.49, wps=5756, ups=0.09, wpb=64848, bsz=128, num_updates=11954, lr=9.99124e-05, gnorm=2.039, loss_scale=2, train_wall=11, gb_free=2.8, wall=138022 2021-06-20 08:59:19 | INFO | train_inner | epoch 005: 18 / 3002 loss=2.538, ppl=5.81, wps=5768, ups=0.09, wpb=64835, bsz=128, num_updates=11955, lr=9.99124e-05, gnorm=2.073, loss_scale=2, train_wall=11, gb_free=2.8, wall=138034 2021-06-20 08:59:30 | INFO | train_inner | epoch 005: 19 / 3002 loss=2.55, ppl=5.85, wps=5797.9, ups=0.09, wpb=64835, bsz=128, num_updates=11956, lr=9.99123e-05, gnorm=1.948, loss_scale=2, train_wall=11, gb_free=2.8, wall=138045 2021-06-20 08:59:42 | INFO | train_inner | epoch 005: 20 / 3002 loss=2.534, ppl=5.79, wps=5725.5, ups=0.09, wpb=64891, bsz=128, num_updates=11957, lr=9.99123e-05, gnorm=2.014, loss_scale=2, train_wall=11, gb_free=2.8, wall=138056 2021-06-20 08:59:53 | INFO | train_inner | epoch 005: 21 / 3002 loss=2.448, ppl=5.46, wps=5971.8, ups=0.09, wpb=64839, bsz=128, num_updates=11958, lr=9.99123e-05, gnorm=2.005, loss_scale=2, train_wall=10, gb_free=2.8, wall=138067 2021-06-20 09:00:04 | INFO | train_inner | epoch 005: 22 / 3002 loss=2.596, ppl=6.05, wps=5883.8, ups=0.09, wpb=64758, bsz=128, num_updates=11959, lr=9.99123e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=138078 2021-06-20 09:00:15 | INFO | train_inner | epoch 005: 23 / 3002 loss=2.709, ppl=6.54, wps=5773.8, ups=0.09, wpb=64829, bsz=128, num_updates=11960, lr=9.99123e-05, gnorm=2.105, loss_scale=2, train_wall=11, gb_free=2.8, wall=138089 2021-06-20 09:00:26 | INFO | train_inner | epoch 005: 24 / 3002 loss=2.598, ppl=6.06, wps=5852, ups=0.09, wpb=64810, bsz=128, num_updates=11961, lr=9.99123e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=138100 2021-06-20 09:00:37 | INFO | train_inner | epoch 005: 25 / 3002 loss=2.506, ppl=5.68, wps=5793.7, ups=0.09, wpb=64792, bsz=128, num_updates=11962, lr=9.99123e-05, gnorm=1.947, loss_scale=2, train_wall=11, gb_free=2.8, wall=138111 2021-06-20 09:00:48 | INFO | train_inner | epoch 005: 26 / 3002 loss=2.467, ppl=5.53, wps=5957.2, ups=0.09, wpb=64899, bsz=128, num_updates=11963, lr=9.99123e-05, gnorm=1.987, loss_scale=2, train_wall=10, gb_free=2.8, wall=138122 2021-06-20 09:00:59 | INFO | train_inner | epoch 005: 27 / 3002 loss=2.524, ppl=5.75, wps=5851.8, ups=0.09, wpb=64758, bsz=128, num_updates=11964, lr=9.99123e-05, gnorm=1.958, loss_scale=2, train_wall=11, gb_free=2.8, wall=138133 2021-06-20 09:01:10 | INFO | train_inner | epoch 005: 28 / 3002 loss=2.587, ppl=6.01, wps=6031.5, ups=0.09, wpb=64783, bsz=128, num_updates=11965, lr=9.99123e-05, gnorm=2.062, loss_scale=2, train_wall=10, gb_free=2.8, wall=138144 2021-06-20 09:01:21 | INFO | train_inner | epoch 005: 29 / 3002 loss=2.442, ppl=5.43, wps=5986.3, ups=0.09, wpb=64850, bsz=128, num_updates=11966, lr=9.99123e-05, gnorm=2.009, loss_scale=2, train_wall=10, gb_free=2.8, wall=138155 2021-06-20 09:01:32 | INFO | train_inner | epoch 005: 30 / 3002 loss=2.419, ppl=5.35, wps=5868.7, ups=0.09, wpb=64785, bsz=128, num_updates=11967, lr=9.99123e-05, gnorm=1.981, loss_scale=2, train_wall=11, gb_free=2.8, wall=138166 2021-06-20 09:01:43 | INFO | train_inner | epoch 005: 31 / 3002 loss=2.598, ppl=6.05, wps=5778.6, ups=0.09, wpb=64777, bsz=128, num_updates=11968, lr=9.99122e-05, gnorm=2.063, loss_scale=2, train_wall=11, gb_free=2.8, wall=138177 2021-06-20 09:01:54 | INFO | train_inner | epoch 005: 32 / 3002 loss=2.401, ppl=5.28, wps=5763.2, ups=0.09, wpb=64844, bsz=128, num_updates=11969, lr=9.99122e-05, gnorm=2.17, loss_scale=2, train_wall=11, gb_free=2.8, wall=138189 2021-06-20 09:02:05 | INFO | train_inner | epoch 005: 33 / 3002 loss=2.624, ppl=6.17, wps=5827.2, ups=0.09, wpb=64781, bsz=128, num_updates=11970, lr=9.99122e-05, gnorm=2.04, loss_scale=2, train_wall=11, gb_free=2.8, wall=138200 2021-06-20 09:02:16 | INFO | train_inner | epoch 005: 34 / 3002 loss=2.575, ppl=5.96, wps=5852.6, ups=0.09, wpb=64822, bsz=128, num_updates=11971, lr=9.99122e-05, gnorm=2.046, loss_scale=2, train_wall=11, gb_free=2.8, wall=138211 2021-06-20 09:02:27 | INFO | train_inner | epoch 005: 35 / 3002 loss=2.632, ppl=6.2, wps=5858.6, ups=0.09, wpb=64827, bsz=128, num_updates=11972, lr=9.99122e-05, gnorm=2.035, loss_scale=2, train_wall=11, gb_free=2.8, wall=138222 2021-06-20 09:02:38 | INFO | train_inner | epoch 005: 36 / 3002 loss=2.489, ppl=5.61, wps=5902.8, ups=0.09, wpb=64793, bsz=128, num_updates=11973, lr=9.99122e-05, gnorm=1.922, loss_scale=2, train_wall=11, gb_free=2.8, wall=138233 2021-06-20 09:02:50 | INFO | train_inner | epoch 005: 37 / 3002 loss=2.503, ppl=5.67, wps=5828.2, ups=0.09, wpb=64842, bsz=128, num_updates=11974, lr=9.99122e-05, gnorm=1.951, loss_scale=2, train_wall=11, gb_free=2.8, wall=138244 2021-06-20 09:03:01 | INFO | train_inner | epoch 005: 38 / 3002 loss=2.44, ppl=5.43, wps=5896.8, ups=0.09, wpb=64769, bsz=128, num_updates=11975, lr=9.99122e-05, gnorm=2.137, loss_scale=2, train_wall=11, gb_free=2.8, wall=138255 2021-06-20 09:03:12 | INFO | train_inner | epoch 005: 39 / 3002 loss=2.615, ppl=6.13, wps=5769.7, ups=0.09, wpb=64805, bsz=128, num_updates=11976, lr=9.99122e-05, gnorm=1.948, loss_scale=2, train_wall=11, gb_free=2.8, wall=138266 2021-06-20 09:03:23 | INFO | train_inner | epoch 005: 40 / 3002 loss=2.501, ppl=5.66, wps=5749.9, ups=0.09, wpb=64912, bsz=128, num_updates=11977, lr=9.99122e-05, gnorm=1.984, loss_scale=2, train_wall=11, gb_free=2.8, wall=138277 2021-06-20 09:03:34 | INFO | train_inner | epoch 005: 41 / 3002 loss=2.56, ppl=5.9, wps=5889.2, ups=0.09, wpb=64841, bsz=128, num_updates=11978, lr=9.99122e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=138288 2021-06-20 09:03:45 | INFO | train_inner | epoch 005: 42 / 3002 loss=2.688, ppl=6.44, wps=5883.9, ups=0.09, wpb=64837, bsz=128, num_updates=11979, lr=9.99122e-05, gnorm=2.018, loss_scale=2, train_wall=11, gb_free=2.8, wall=138299 2021-06-20 09:03:56 | INFO | train_inner | epoch 005: 43 / 3002 loss=2.551, ppl=5.86, wps=5774.3, ups=0.09, wpb=64757, bsz=128, num_updates=11980, lr=9.99122e-05, gnorm=1.98, loss_scale=2, train_wall=11, gb_free=2.8, wall=138311 2021-06-20 09:04:08 | INFO | train_inner | epoch 005: 44 / 3002 loss=2.469, ppl=5.54, wps=5775.4, ups=0.09, wpb=64880, bsz=128, num_updates=11981, lr=9.99121e-05, gnorm=2.003, loss_scale=2, train_wall=11, gb_free=2.8, wall=138322 2021-06-20 09:04:19 | INFO | train_inner | epoch 005: 45 / 3002 loss=2.514, ppl=5.71, wps=5739.4, ups=0.09, wpb=64824, bsz=128, num_updates=11982, lr=9.99121e-05, gnorm=2.013, loss_scale=2, train_wall=11, gb_free=2.8, wall=138333 2021-06-20 09:04:30 | INFO | train_inner | epoch 005: 46 / 3002 loss=2.548, ppl=5.85, wps=5887.3, ups=0.09, wpb=64801, bsz=128, num_updates=11983, lr=9.99121e-05, gnorm=2.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=138344 2021-06-20 09:04:41 | INFO | train_inner | epoch 005: 47 / 3002 loss=2.519, ppl=5.73, wps=5751.9, ups=0.09, wpb=64862, bsz=128, num_updates=11984, lr=9.99121e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=138355 2021-06-20 09:04:52 | INFO | train_inner | epoch 005: 48 / 3002 loss=2.635, ppl=6.21, wps=5803.9, ups=0.09, wpb=64770, bsz=128, num_updates=11985, lr=9.99121e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=138367 2021-06-20 09:05:03 | INFO | train_inner | epoch 005: 49 / 3002 loss=2.541, ppl=5.82, wps=5801.2, ups=0.09, wpb=64763, bsz=128, num_updates=11986, lr=9.99121e-05, gnorm=1.95, loss_scale=4, train_wall=11, gb_free=2.8, wall=138378 2021-06-20 09:05:15 | INFO | train_inner | epoch 005: 50 / 3002 loss=2.701, ppl=6.5, wps=5851.4, ups=0.09, wpb=64830, bsz=128, num_updates=11987, lr=9.99121e-05, gnorm=1.977, loss_scale=4, train_wall=11, gb_free=2.8, wall=138389 2021-06-20 09:05:26 | INFO | train_inner | epoch 005: 51 / 3002 loss=2.41, ppl=5.31, wps=5894.2, ups=0.09, wpb=64925, bsz=128, num_updates=11988, lr=9.99121e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=138400 2021-06-20 09:05:37 | INFO | train_inner | epoch 005: 52 / 3002 loss=2.558, ppl=5.89, wps=5805.8, ups=0.09, wpb=64759, bsz=128, num_updates=11989, lr=9.99121e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=138411 2021-06-20 09:05:48 | INFO | train_inner | epoch 005: 53 / 3002 loss=2.407, ppl=5.31, wps=5838.2, ups=0.09, wpb=64805, bsz=128, num_updates=11990, lr=9.99121e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=138422 2021-06-20 09:05:59 | INFO | train_inner | epoch 005: 54 / 3002 loss=2.526, ppl=5.76, wps=5724.9, ups=0.09, wpb=64770, bsz=128, num_updates=11991, lr=9.99121e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=138433 2021-06-20 09:06:10 | INFO | train_inner | epoch 005: 55 / 3002 loss=2.489, ppl=5.61, wps=5859.4, ups=0.09, wpb=64817, bsz=128, num_updates=11992, lr=9.99121e-05, gnorm=2.041, loss_scale=4, train_wall=11, gb_free=2.8, wall=138444 2021-06-20 09:06:21 | INFO | train_inner | epoch 005: 56 / 3002 loss=2.456, ppl=5.49, wps=5957.3, ups=0.09, wpb=64785, bsz=128, num_updates=11993, lr=9.9912e-05, gnorm=2.012, loss_scale=4, train_wall=10, gb_free=2.8, wall=138455 2021-06-20 09:06:32 | INFO | train_inner | epoch 005: 57 / 3002 loss=2.352, ppl=5.11, wps=5866.4, ups=0.09, wpb=64828, bsz=128, num_updates=11994, lr=9.9912e-05, gnorm=1.98, loss_scale=4, train_wall=11, gb_free=2.8, wall=138466 2021-06-20 09:06:43 | INFO | train_inner | epoch 005: 58 / 3002 loss=2.552, ppl=5.86, wps=5939.1, ups=0.09, wpb=64777, bsz=128, num_updates=11995, lr=9.9912e-05, gnorm=1.943, loss_scale=4, train_wall=10, gb_free=2.8, wall=138477 2021-06-20 09:06:54 | INFO | train_inner | epoch 005: 59 / 3002 loss=2.609, ppl=6.1, wps=5775.3, ups=0.09, wpb=64826, bsz=128, num_updates=11996, lr=9.9912e-05, gnorm=2.016, loss_scale=4, train_wall=11, gb_free=2.8, wall=138489 2021-06-20 09:07:05 | INFO | train_inner | epoch 005: 60 / 3002 loss=2.733, ppl=6.65, wps=5926.2, ups=0.09, wpb=64866, bsz=128, num_updates=11997, lr=9.9912e-05, gnorm=2.064, loss_scale=4, train_wall=11, gb_free=2.8, wall=138499 2021-06-20 09:07:16 | INFO | train_inner | epoch 005: 61 / 3002 loss=2.557, ppl=5.89, wps=5981.4, ups=0.09, wpb=64842, bsz=128, num_updates=11998, lr=9.9912e-05, gnorm=2.001, loss_scale=4, train_wall=10, gb_free=2.8, wall=138510 2021-06-20 09:07:27 | INFO | train_inner | epoch 005: 62 / 3002 loss=2.493, ppl=5.63, wps=5886.4, ups=0.09, wpb=64850, bsz=128, num_updates=11999, lr=9.9912e-05, gnorm=1.956, loss_scale=4, train_wall=11, gb_free=2.8, wall=138521 2021-06-20 09:07:38 | INFO | train_inner | epoch 005: 63 / 3002 loss=2.563, ppl=5.91, wps=5913.9, ups=0.09, wpb=64812, bsz=128, num_updates=12000, lr=9.9912e-05, gnorm=2.012, loss_scale=4, train_wall=11, gb_free=2.8, wall=138532 2021-06-20 09:07:49 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-20 09:08:00 | INFO | train_inner | epoch 005: 65 / 3002 loss=2.482, ppl=5.59, wps=2927.1, ups=0.05, wpb=64859, bsz=128, num_updates=12001, lr=9.9912e-05, gnorm=2.001, loss_scale=2, train_wall=21, gb_free=2.8, wall=138554 2021-06-20 09:08:11 | INFO | train_inner | epoch 005: 66 / 3002 loss=2.491, ppl=5.62, wps=5838, ups=0.09, wpb=64806, bsz=128, num_updates=12002, lr=9.9912e-05, gnorm=1.924, loss_scale=2, train_wall=11, gb_free=2.8, wall=138566 2021-06-20 09:08:22 | INFO | train_inner | epoch 005: 67 / 3002 loss=2.548, ppl=5.85, wps=6005.4, ups=0.09, wpb=64828, bsz=128, num_updates=12003, lr=9.9912e-05, gnorm=2.048, loss_scale=2, train_wall=10, gb_free=2.8, wall=138576 2021-06-20 09:08:33 | INFO | train_inner | epoch 005: 68 / 3002 loss=2.617, ppl=6.13, wps=5785.6, ups=0.09, wpb=64788, bsz=128, num_updates=12004, lr=9.9912e-05, gnorm=2.035, loss_scale=2, train_wall=11, gb_free=2.8, wall=138588 2021-06-20 09:08:44 | INFO | train_inner | epoch 005: 69 / 3002 loss=2.442, ppl=5.43, wps=5810.7, ups=0.09, wpb=64758, bsz=128, num_updates=12005, lr=9.9912e-05, gnorm=1.96, loss_scale=2, train_wall=11, gb_free=2.8, wall=138599 2021-06-20 09:08:55 | INFO | train_inner | epoch 005: 70 / 3002 loss=2.461, ppl=5.51, wps=5896.5, ups=0.09, wpb=64870, bsz=128, num_updates=12006, lr=9.99119e-05, gnorm=1.929, loss_scale=2, train_wall=11, gb_free=2.8, wall=138610 2021-06-20 09:09:06 | INFO | train_inner | epoch 005: 71 / 3002 loss=2.589, ppl=6.02, wps=5857.7, ups=0.09, wpb=64860, bsz=128, num_updates=12007, lr=9.99119e-05, gnorm=2.023, loss_scale=2, train_wall=11, gb_free=2.8, wall=138621 2021-06-20 09:09:18 | INFO | train_inner | epoch 005: 72 / 3002 loss=2.543, ppl=5.83, wps=5804.8, ups=0.09, wpb=64935, bsz=128, num_updates=12008, lr=9.99119e-05, gnorm=1.966, loss_scale=2, train_wall=11, gb_free=2.8, wall=138632 2021-06-20 09:09:29 | INFO | train_inner | epoch 005: 73 / 3002 loss=2.502, ppl=5.66, wps=5869.8, ups=0.09, wpb=64869, bsz=128, num_updates=12009, lr=9.99119e-05, gnorm=1.995, loss_scale=2, train_wall=11, gb_free=2.8, wall=138643 2021-06-20 09:09:40 | INFO | train_inner | epoch 005: 74 / 3002 loss=2.555, ppl=5.88, wps=5864.5, ups=0.09, wpb=64760, bsz=128, num_updates=12010, lr=9.99119e-05, gnorm=2.198, loss_scale=2, train_wall=11, gb_free=2.8, wall=138654 2021-06-20 09:09:51 | INFO | train_inner | epoch 005: 75 / 3002 loss=2.514, ppl=5.71, wps=5963.4, ups=0.09, wpb=64866, bsz=128, num_updates=12011, lr=9.99119e-05, gnorm=1.985, loss_scale=2, train_wall=10, gb_free=2.8, wall=138665 2021-06-20 09:10:02 | INFO | train_inner | epoch 005: 76 / 3002 loss=2.697, ppl=6.48, wps=5891.5, ups=0.09, wpb=64835, bsz=128, num_updates=12012, lr=9.99119e-05, gnorm=2.048, loss_scale=2, train_wall=11, gb_free=2.8, wall=138676 2021-06-20 09:10:13 | INFO | train_inner | epoch 005: 77 / 3002 loss=2.614, ppl=6.12, wps=5819.3, ups=0.09, wpb=64850, bsz=128, num_updates=12013, lr=9.99119e-05, gnorm=2.062, loss_scale=2, train_wall=11, gb_free=2.8, wall=138687 2021-06-20 09:10:24 | INFO | train_inner | epoch 005: 78 / 3002 loss=2.7, ppl=6.5, wps=5925.9, ups=0.09, wpb=64743, bsz=128, num_updates=12014, lr=9.99119e-05, gnorm=2.022, loss_scale=2, train_wall=10, gb_free=2.8, wall=138698 2021-06-20 09:10:35 | INFO | train_inner | epoch 005: 79 / 3002 loss=2.467, ppl=5.53, wps=5828.1, ups=0.09, wpb=64744, bsz=128, num_updates=12015, lr=9.99119e-05, gnorm=2.04, loss_scale=2, train_wall=11, gb_free=2.8, wall=138709 2021-06-20 09:10:46 | INFO | train_inner | epoch 005: 80 / 3002 loss=2.431, ppl=5.39, wps=5837.5, ups=0.09, wpb=64817, bsz=128, num_updates=12016, lr=9.99119e-05, gnorm=1.984, loss_scale=2, train_wall=11, gb_free=2.8, wall=138720 2021-06-20 09:10:57 | INFO | train_inner | epoch 005: 81 / 3002 loss=2.509, ppl=5.69, wps=5903.5, ups=0.09, wpb=64821, bsz=128, num_updates=12017, lr=9.99119e-05, gnorm=1.997, loss_scale=2, train_wall=11, gb_free=2.8, wall=138731 2021-06-20 09:11:08 | INFO | train_inner | epoch 005: 82 / 3002 loss=2.583, ppl=5.99, wps=5869.5, ups=0.09, wpb=64839, bsz=128, num_updates=12018, lr=9.99118e-05, gnorm=1.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=138742 2021-06-20 09:11:19 | INFO | train_inner | epoch 005: 83 / 3002 loss=2.374, ppl=5.19, wps=5849.1, ups=0.09, wpb=64828, bsz=128, num_updates=12019, lr=9.99118e-05, gnorm=1.992, loss_scale=2, train_wall=11, gb_free=2.8, wall=138753 2021-06-20 09:11:30 | INFO | train_inner | epoch 005: 84 / 3002 loss=2.539, ppl=5.81, wps=5848.5, ups=0.09, wpb=64939, bsz=128, num_updates=12020, lr=9.99118e-05, gnorm=2.118, loss_scale=2, train_wall=11, gb_free=2.8, wall=138764 2021-06-20 09:11:41 | INFO | train_inner | epoch 005: 85 / 3002 loss=2.618, ppl=6.14, wps=5851.8, ups=0.09, wpb=64869, bsz=128, num_updates=12021, lr=9.99118e-05, gnorm=4.312, loss_scale=2, train_wall=11, gb_free=2.8, wall=138776 2021-06-20 09:11:52 | INFO | train_inner | epoch 005: 86 / 3002 loss=2.576, ppl=5.96, wps=5800.7, ups=0.09, wpb=64845, bsz=128, num_updates=12022, lr=9.99118e-05, gnorm=1.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=138787 2021-06-20 09:12:04 | INFO | train_inner | epoch 005: 87 / 3002 loss=2.479, ppl=5.58, wps=5798.3, ups=0.09, wpb=64853, bsz=128, num_updates=12023, lr=9.99118e-05, gnorm=1.919, loss_scale=2, train_wall=11, gb_free=2.8, wall=138798 2021-06-20 09:12:15 | INFO | train_inner | epoch 005: 88 / 3002 loss=2.545, ppl=5.84, wps=5779, ups=0.09, wpb=64792, bsz=128, num_updates=12024, lr=9.99118e-05, gnorm=2.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=138809 2021-06-20 09:12:26 | INFO | train_inner | epoch 005: 89 / 3002 loss=2.624, ppl=6.17, wps=5908.3, ups=0.09, wpb=64817, bsz=128, num_updates=12025, lr=9.99118e-05, gnorm=2.763, loss_scale=2, train_wall=10, gb_free=2.8, wall=138820 2021-06-20 09:12:37 | INFO | train_inner | epoch 005: 90 / 3002 loss=2.479, ppl=5.58, wps=5901, ups=0.09, wpb=64842, bsz=128, num_updates=12026, lr=9.99118e-05, gnorm=2.02, loss_scale=2, train_wall=11, gb_free=2.8, wall=138831 2021-06-20 09:12:48 | INFO | train_inner | epoch 005: 91 / 3002 loss=2.641, ppl=6.24, wps=5719.6, ups=0.09, wpb=64748, bsz=128, num_updates=12027, lr=9.99118e-05, gnorm=2.119, loss_scale=2, train_wall=11, gb_free=2.8, wall=138842 2021-06-20 09:12:59 | INFO | train_inner | epoch 005: 92 / 3002 loss=2.594, ppl=6.04, wps=5865.5, ups=0.09, wpb=64753, bsz=128, num_updates=12028, lr=9.99118e-05, gnorm=2.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=138853 2021-06-20 09:13:10 | INFO | train_inner | epoch 005: 93 / 3002 loss=2.527, ppl=5.76, wps=5970.4, ups=0.09, wpb=64871, bsz=128, num_updates=12029, lr=9.99118e-05, gnorm=2.017, loss_scale=2, train_wall=10, gb_free=2.8, wall=138864 2021-06-20 09:13:21 | INFO | train_inner | epoch 005: 94 / 3002 loss=2.51, ppl=5.7, wps=5768.1, ups=0.09, wpb=64833, bsz=128, num_updates=12030, lr=9.99118e-05, gnorm=1.952, loss_scale=2, train_wall=11, gb_free=2.8, wall=138876 2021-06-20 09:13:32 | INFO | train_inner | epoch 005: 95 / 3002 loss=2.575, ppl=5.96, wps=5897.5, ups=0.09, wpb=64824, bsz=128, num_updates=12031, lr=9.99117e-05, gnorm=2.055, loss_scale=2, train_wall=11, gb_free=2.8, wall=138887 2021-06-20 09:13:43 | INFO | train_inner | epoch 005: 96 / 3002 loss=2.537, ppl=5.8, wps=5811.9, ups=0.09, wpb=64753, bsz=128, num_updates=12032, lr=9.99117e-05, gnorm=2.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=138898 2021-06-20 09:13:55 | INFO | train_inner | epoch 005: 97 / 3002 loss=2.425, ppl=5.37, wps=5800.7, ups=0.09, wpb=64866, bsz=128, num_updates=12033, lr=9.99117e-05, gnorm=1.94, loss_scale=2, train_wall=11, gb_free=2.8, wall=138909 2021-06-20 09:14:06 | INFO | train_inner | epoch 005: 98 / 3002 loss=2.489, ppl=5.61, wps=5851.6, ups=0.09, wpb=64892, bsz=128, num_updates=12034, lr=9.99117e-05, gnorm=1.912, loss_scale=2, train_wall=11, gb_free=2.8, wall=138920 2021-06-20 09:14:17 | INFO | train_inner | epoch 005: 99 / 3002 loss=2.629, ppl=6.19, wps=5814.7, ups=0.09, wpb=64828, bsz=128, num_updates=12035, lr=9.99117e-05, gnorm=1.978, loss_scale=2, train_wall=11, gb_free=2.8, wall=138931 2021-06-20 09:14:28 | INFO | train_inner | epoch 005: 100 / 3002 loss=2.485, ppl=5.6, wps=5839.6, ups=0.09, wpb=64850, bsz=128, num_updates=12036, lr=9.99117e-05, gnorm=1.998, loss_scale=2, train_wall=11, gb_free=2.8, wall=138942 2021-06-20 09:14:39 | INFO | train_inner | epoch 005: 101 / 3002 loss=2.397, ppl=5.27, wps=5844, ups=0.09, wpb=64846, bsz=128, num_updates=12037, lr=9.99117e-05, gnorm=1.996, loss_scale=2, train_wall=11, gb_free=2.8, wall=138953 2021-06-20 09:14:50 | INFO | train_inner | epoch 005: 102 / 3002 loss=2.571, ppl=5.94, wps=5885.3, ups=0.09, wpb=64771, bsz=128, num_updates=12038, lr=9.99117e-05, gnorm=2.009, loss_scale=2, train_wall=11, gb_free=2.8, wall=138964 2021-06-20 09:15:01 | INFO | train_inner | epoch 005: 103 / 3002 loss=2.416, ppl=5.34, wps=5861.4, ups=0.09, wpb=64808, bsz=128, num_updates=12039, lr=9.99117e-05, gnorm=1.972, loss_scale=2, train_wall=11, gb_free=2.8, wall=138975 2021-06-20 09:15:12 | INFO | train_inner | epoch 005: 104 / 3002 loss=2.619, ppl=6.14, wps=5884, ups=0.09, wpb=64911, bsz=128, num_updates=12040, lr=9.99117e-05, gnorm=2.076, loss_scale=2, train_wall=11, gb_free=2.8, wall=138986 2021-06-20 09:15:23 | INFO | train_inner | epoch 005: 105 / 3002 loss=2.477, ppl=5.57, wps=5794.8, ups=0.09, wpb=64839, bsz=128, num_updates=12041, lr=9.99117e-05, gnorm=1.973, loss_scale=2, train_wall=11, gb_free=2.8, wall=138998 2021-06-20 09:15:35 | INFO | train_inner | epoch 005: 106 / 3002 loss=2.675, ppl=6.39, wps=5699.6, ups=0.09, wpb=64761, bsz=128, num_updates=12042, lr=9.99117e-05, gnorm=2.06, loss_scale=2, train_wall=11, gb_free=2.8, wall=139009 2021-06-20 09:15:46 | INFO | train_inner | epoch 005: 107 / 3002 loss=2.591, ppl=6.03, wps=5866.4, ups=0.09, wpb=64770, bsz=128, num_updates=12043, lr=9.99116e-05, gnorm=1.987, loss_scale=2, train_wall=11, gb_free=2.8, wall=139020 2021-06-20 09:15:57 | INFO | train_inner | epoch 005: 108 / 3002 loss=2.482, ppl=5.59, wps=5807, ups=0.09, wpb=64825, bsz=128, num_updates=12044, lr=9.99116e-05, gnorm=1.937, loss_scale=2, train_wall=11, gb_free=2.8, wall=139031 2021-06-20 09:16:08 | INFO | train_inner | epoch 005: 109 / 3002 loss=2.491, ppl=5.62, wps=5806.2, ups=0.09, wpb=64809, bsz=128, num_updates=12045, lr=9.99116e-05, gnorm=1.949, loss_scale=2, train_wall=11, gb_free=2.8, wall=139042 2021-06-20 09:16:19 | INFO | train_inner | epoch 005: 110 / 3002 loss=2.558, ppl=5.89, wps=5831.7, ups=0.09, wpb=64812, bsz=128, num_updates=12046, lr=9.99116e-05, gnorm=2.012, loss_scale=2, train_wall=11, gb_free=2.8, wall=139053 2021-06-20 09:16:30 | INFO | train_inner | epoch 005: 111 / 3002 loss=2.509, ppl=5.69, wps=5935.5, ups=0.09, wpb=64796, bsz=128, num_updates=12047, lr=9.99116e-05, gnorm=2.013, loss_scale=2, train_wall=10, gb_free=2.8, wall=139064 2021-06-20 09:16:41 | INFO | train_inner | epoch 005: 112 / 3002 loss=2.449, ppl=5.46, wps=5903.1, ups=0.09, wpb=64852, bsz=128, num_updates=12048, lr=9.99116e-05, gnorm=1.997, loss_scale=2, train_wall=11, gb_free=2.8, wall=139075 2021-06-20 09:16:52 | INFO | train_inner | epoch 005: 113 / 3002 loss=2.44, ppl=5.43, wps=5856.3, ups=0.09, wpb=64813, bsz=128, num_updates=12049, lr=9.99116e-05, gnorm=2.015, loss_scale=2, train_wall=11, gb_free=2.8, wall=139086 2021-06-20 09:17:03 | INFO | train_inner | epoch 005: 114 / 3002 loss=2.541, ppl=5.82, wps=5877.7, ups=0.09, wpb=64913, bsz=128, num_updates=12050, lr=9.99116e-05, gnorm=1.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=139097 2021-06-20 09:17:14 | INFO | train_inner | epoch 005: 115 / 3002 loss=2.397, ppl=5.27, wps=5899.1, ups=0.09, wpb=64892, bsz=128, num_updates=12051, lr=9.99116e-05, gnorm=5.467, loss_scale=2, train_wall=11, gb_free=2.8, wall=139108 2021-06-20 09:17:25 | INFO | train_inner | epoch 005: 116 / 3002 loss=2.452, ppl=5.47, wps=5898.7, ups=0.09, wpb=64886, bsz=128, num_updates=12052, lr=9.99116e-05, gnorm=1.994, loss_scale=2, train_wall=11, gb_free=2.8, wall=139119 2021-06-20 09:17:36 | INFO | train_inner | epoch 005: 117 / 3002 loss=2.485, ppl=5.6, wps=5740.7, ups=0.09, wpb=64907, bsz=128, num_updates=12053, lr=9.99116e-05, gnorm=1.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=139131 2021-06-20 09:17:47 | INFO | train_inner | epoch 005: 118 / 3002 loss=2.406, ppl=5.3, wps=5859.6, ups=0.09, wpb=64822, bsz=128, num_updates=12054, lr=9.99116e-05, gnorm=2.056, loss_scale=2, train_wall=11, gb_free=2.8, wall=139142 2021-06-20 09:17:59 | INFO | train_inner | epoch 005: 119 / 3002 loss=2.601, ppl=6.07, wps=5761.1, ups=0.09, wpb=64712, bsz=128, num_updates=12055, lr=9.99116e-05, gnorm=2.058, loss_scale=2, train_wall=11, gb_free=2.8, wall=139153 2021-06-20 09:18:10 | INFO | train_inner | epoch 005: 120 / 3002 loss=2.59, ppl=6.02, wps=5749.2, ups=0.09, wpb=64795, bsz=128, num_updates=12056, lr=9.99115e-05, gnorm=2.144, loss_scale=2, train_wall=11, gb_free=2.8, wall=139164 2021-06-20 09:18:21 | INFO | train_inner | epoch 005: 121 / 3002 loss=2.625, ppl=6.17, wps=5857.5, ups=0.09, wpb=64742, bsz=128, num_updates=12057, lr=9.99115e-05, gnorm=2.06, loss_scale=2, train_wall=11, gb_free=2.8, wall=139175 2021-06-20 09:18:32 | INFO | train_inner | epoch 005: 122 / 3002 loss=2.456, ppl=5.49, wps=5769.5, ups=0.09, wpb=64794, bsz=128, num_updates=12058, lr=9.99115e-05, gnorm=1.867, loss_scale=2, train_wall=11, gb_free=2.8, wall=139187 2021-06-20 09:18:43 | INFO | train_inner | epoch 005: 123 / 3002 loss=2.636, ppl=6.22, wps=5910.6, ups=0.09, wpb=64835, bsz=128, num_updates=12059, lr=9.99115e-05, gnorm=1.974, loss_scale=2, train_wall=11, gb_free=2.8, wall=139198 2021-06-20 09:18:55 | INFO | train_inner | epoch 005: 124 / 3002 loss=2.624, ppl=6.16, wps=5715.7, ups=0.09, wpb=64790, bsz=128, num_updates=12060, lr=9.99115e-05, gnorm=15.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=139209 2021-06-20 09:19:06 | INFO | train_inner | epoch 005: 125 / 3002 loss=2.529, ppl=5.77, wps=5900, ups=0.09, wpb=64846, bsz=128, num_updates=12061, lr=9.99115e-05, gnorm=1.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=139220 2021-06-20 09:19:17 | INFO | train_inner | epoch 005: 126 / 3002 loss=2.507, ppl=5.68, wps=5809.1, ups=0.09, wpb=64851, bsz=128, num_updates=12062, lr=9.99115e-05, gnorm=2.019, loss_scale=2, train_wall=11, gb_free=2.8, wall=139231 2021-06-20 09:19:28 | INFO | train_inner | epoch 005: 127 / 3002 loss=2.502, ppl=5.66, wps=5817.8, ups=0.09, wpb=64802, bsz=128, num_updates=12063, lr=9.99115e-05, gnorm=2.034, loss_scale=2, train_wall=11, gb_free=2.8, wall=139242 2021-06-20 09:19:39 | INFO | train_inner | epoch 005: 128 / 3002 loss=2.595, ppl=6.04, wps=5967.6, ups=0.09, wpb=64829, bsz=128, num_updates=12064, lr=9.99115e-05, gnorm=2.085, loss_scale=2, train_wall=10, gb_free=2.8, wall=139253 2021-06-20 09:19:50 | INFO | train_inner | epoch 005: 129 / 3002 loss=2.518, ppl=5.73, wps=5724.8, ups=0.09, wpb=64831, bsz=128, num_updates=12065, lr=9.99115e-05, gnorm=2.22, loss_scale=2, train_wall=11, gb_free=2.8, wall=139264 2021-06-20 09:20:01 | INFO | train_inner | epoch 005: 130 / 3002 loss=2.427, ppl=5.38, wps=5807.5, ups=0.09, wpb=64819, bsz=128, num_updates=12066, lr=9.99115e-05, gnorm=2.05, loss_scale=2, train_wall=11, gb_free=2.8, wall=139276 2021-06-20 09:20:12 | INFO | train_inner | epoch 005: 131 / 3002 loss=2.478, ppl=5.57, wps=5965.3, ups=0.09, wpb=64822, bsz=128, num_updates=12067, lr=9.99115e-05, gnorm=2.137, loss_scale=2, train_wall=10, gb_free=2.8, wall=139286 2021-06-20 09:20:23 | INFO | train_inner | epoch 005: 132 / 3002 loss=2.449, ppl=5.46, wps=5831.9, ups=0.09, wpb=64890, bsz=128, num_updates=12068, lr=9.99114e-05, gnorm=2.026, loss_scale=2, train_wall=11, gb_free=2.8, wall=139298 2021-06-20 09:20:34 | INFO | train_inner | epoch 005: 133 / 3002 loss=2.532, ppl=5.78, wps=5803.2, ups=0.09, wpb=64784, bsz=128, num_updates=12069, lr=9.99114e-05, gnorm=2.012, loss_scale=2, train_wall=11, gb_free=2.8, wall=139309 2021-06-20 09:20:45 | INFO | train_inner | epoch 005: 134 / 3002 loss=2.537, ppl=5.8, wps=5849.3, ups=0.09, wpb=64876, bsz=128, num_updates=12070, lr=9.99114e-05, gnorm=1.97, loss_scale=2, train_wall=11, gb_free=2.8, wall=139320 2021-06-20 09:20:57 | INFO | train_inner | epoch 005: 135 / 3002 loss=2.448, ppl=5.46, wps=5856.5, ups=0.09, wpb=64865, bsz=128, num_updates=12071, lr=9.99114e-05, gnorm=1.996, loss_scale=2, train_wall=11, gb_free=2.8, wall=139331 2021-06-20 09:21:08 | INFO | train_inner | epoch 005: 136 / 3002 loss=2.399, ppl=5.27, wps=5886.5, ups=0.09, wpb=64846, bsz=128, num_updates=12072, lr=9.99114e-05, gnorm=1.936, loss_scale=2, train_wall=11, gb_free=2.8, wall=139342 2021-06-20 09:21:19 | INFO | train_inner | epoch 005: 137 / 3002 loss=2.48, ppl=5.58, wps=5924.4, ups=0.09, wpb=64858, bsz=128, num_updates=12073, lr=9.99114e-05, gnorm=1.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=139353 2021-06-20 09:21:29 | INFO | train_inner | epoch 005: 138 / 3002 loss=2.467, ppl=5.53, wps=5931.7, ups=0.09, wpb=64859, bsz=128, num_updates=12074, lr=9.99114e-05, gnorm=1.945, loss_scale=2, train_wall=11, gb_free=2.8, wall=139364 2021-06-20 09:21:41 | INFO | train_inner | epoch 005: 139 / 3002 loss=2.462, ppl=5.51, wps=5826.6, ups=0.09, wpb=64788, bsz=128, num_updates=12075, lr=9.99114e-05, gnorm=1.955, loss_scale=2, train_wall=11, gb_free=2.8, wall=139375 2021-06-20 09:21:52 | INFO | train_inner | epoch 005: 140 / 3002 loss=2.509, ppl=5.69, wps=5755.2, ups=0.09, wpb=64833, bsz=128, num_updates=12076, lr=9.99114e-05, gnorm=2.363, loss_scale=2, train_wall=11, gb_free=2.8, wall=139386 2021-06-20 09:22:03 | INFO | train_inner | epoch 005: 141 / 3002 loss=2.447, ppl=5.45, wps=5843.8, ups=0.09, wpb=64869, bsz=128, num_updates=12077, lr=9.99114e-05, gnorm=1.988, loss_scale=2, train_wall=11, gb_free=2.8, wall=139397 2021-06-20 09:22:14 | INFO | train_inner | epoch 005: 142 / 3002 loss=2.611, ppl=6.11, wps=5882.6, ups=0.09, wpb=64840, bsz=128, num_updates=12078, lr=9.99114e-05, gnorm=2.089, loss_scale=2, train_wall=11, gb_free=2.8, wall=139408 2021-06-20 09:22:25 | INFO | train_inner | epoch 005: 143 / 3002 loss=2.549, ppl=5.85, wps=5824.7, ups=0.09, wpb=64853, bsz=128, num_updates=12079, lr=9.99114e-05, gnorm=2.025, loss_scale=2, train_wall=11, gb_free=2.8, wall=139419 2021-06-20 09:22:36 | INFO | train_inner | epoch 005: 144 / 3002 loss=2.457, ppl=5.49, wps=5823.6, ups=0.09, wpb=64769, bsz=128, num_updates=12080, lr=9.99114e-05, gnorm=2.027, loss_scale=2, train_wall=11, gb_free=2.8, wall=139431 2021-06-20 09:22:47 | INFO | train_inner | epoch 005: 145 / 3002 loss=2.656, ppl=6.3, wps=5834.2, ups=0.09, wpb=64820, bsz=128, num_updates=12081, lr=9.99113e-05, gnorm=2.179, loss_scale=2, train_wall=11, gb_free=2.8, wall=139442 2021-06-20 09:22:58 | INFO | train_inner | epoch 005: 146 / 3002 loss=2.597, ppl=6.05, wps=5901.1, ups=0.09, wpb=64849, bsz=128, num_updates=12082, lr=9.99113e-05, gnorm=2.418, loss_scale=2, train_wall=11, gb_free=2.8, wall=139453 2021-06-20 09:23:09 | INFO | train_inner | epoch 005: 147 / 3002 loss=2.516, ppl=5.72, wps=6007.2, ups=0.09, wpb=64850, bsz=128, num_updates=12083, lr=9.99113e-05, gnorm=2.009, loss_scale=2, train_wall=10, gb_free=2.8, wall=139463 2021-06-20 09:23:20 | INFO | train_inner | epoch 005: 148 / 3002 loss=2.427, ppl=5.38, wps=5928.6, ups=0.09, wpb=64860, bsz=128, num_updates=12084, lr=9.99113e-05, gnorm=2.007, loss_scale=2, train_wall=10, gb_free=2.8, wall=139474 2021-06-20 09:23:31 | INFO | train_inner | epoch 005: 149 / 3002 loss=2.513, ppl=5.71, wps=5895.3, ups=0.09, wpb=64902, bsz=128, num_updates=12085, lr=9.99113e-05, gnorm=1.978, loss_scale=2, train_wall=11, gb_free=2.8, wall=139485 2021-06-20 09:23:42 | INFO | train_inner | epoch 005: 150 / 3002 loss=2.494, ppl=5.63, wps=5720.6, ups=0.09, wpb=64775, bsz=128, num_updates=12086, lr=9.99113e-05, gnorm=1.988, loss_scale=2, train_wall=11, gb_free=2.8, wall=139497 2021-06-20 09:23:53 | INFO | train_inner | epoch 005: 151 / 3002 loss=2.684, ppl=6.43, wps=5857.9, ups=0.09, wpb=64813, bsz=128, num_updates=12087, lr=9.99113e-05, gnorm=2.106, loss_scale=2, train_wall=11, gb_free=2.8, wall=139508 2021-06-20 09:24:05 | INFO | train_inner | epoch 005: 152 / 3002 loss=2.472, ppl=5.55, wps=5758.7, ups=0.09, wpb=64840, bsz=128, num_updates=12088, lr=9.99113e-05, gnorm=2.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=139519 2021-06-20 09:24:16 | INFO | train_inner | epoch 005: 153 / 3002 loss=2.286, ppl=4.88, wps=5677.2, ups=0.09, wpb=64780, bsz=128, num_updates=12089, lr=9.99113e-05, gnorm=1.956, loss_scale=2, train_wall=11, gb_free=2.8, wall=139530 2021-06-20 09:24:27 | INFO | train_inner | epoch 005: 154 / 3002 loss=2.529, ppl=5.77, wps=5773, ups=0.09, wpb=64831, bsz=128, num_updates=12090, lr=9.99113e-05, gnorm=2.01, loss_scale=2, train_wall=11, gb_free=2.8, wall=139542 2021-06-20 09:24:38 | INFO | train_inner | epoch 005: 155 / 3002 loss=2.568, ppl=5.93, wps=5820.3, ups=0.09, wpb=64857, bsz=128, num_updates=12091, lr=9.99113e-05, gnorm=2.102, loss_scale=2, train_wall=11, gb_free=2.8, wall=139553 2021-06-20 09:24:49 | INFO | train_inner | epoch 005: 156 / 3002 loss=2.593, ppl=6.03, wps=5938.1, ups=0.09, wpb=64852, bsz=128, num_updates=12092, lr=9.99113e-05, gnorm=2.198, loss_scale=2, train_wall=10, gb_free=2.8, wall=139564 2021-06-20 09:25:01 | INFO | train_inner | epoch 005: 157 / 3002 loss=2.483, ppl=5.59, wps=5794.6, ups=0.09, wpb=64820, bsz=128, num_updates=12093, lr=9.99112e-05, gnorm=2.177, loss_scale=2, train_wall=11, gb_free=2.8, wall=139575 2021-06-20 09:25:12 | INFO | train_inner | epoch 005: 158 / 3002 loss=2.647, ppl=6.26, wps=5751.5, ups=0.09, wpb=64860, bsz=128, num_updates=12094, lr=9.99112e-05, gnorm=5.268, loss_scale=2, train_wall=11, gb_free=2.8, wall=139586 2021-06-20 09:25:23 | INFO | train_inner | epoch 005: 159 / 3002 loss=2.615, ppl=6.12, wps=5800.9, ups=0.09, wpb=64810, bsz=128, num_updates=12095, lr=9.99112e-05, gnorm=2.104, loss_scale=2, train_wall=11, gb_free=2.8, wall=139597 2021-06-20 09:25:34 | INFO | train_inner | epoch 005: 160 / 3002 loss=2.491, ppl=5.62, wps=5779.5, ups=0.09, wpb=64783, bsz=128, num_updates=12096, lr=9.99112e-05, gnorm=1.998, loss_scale=2, train_wall=11, gb_free=2.8, wall=139609 2021-06-20 09:25:45 | INFO | train_inner | epoch 005: 161 / 3002 loss=2.417, ppl=5.34, wps=5844.8, ups=0.09, wpb=64776, bsz=128, num_updates=12097, lr=9.99112e-05, gnorm=2.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=139620 2021-06-20 09:25:56 | INFO | train_inner | epoch 005: 162 / 3002 loss=2.624, ppl=6.16, wps=5823.7, ups=0.09, wpb=64777, bsz=128, num_updates=12098, lr=9.99112e-05, gnorm=2.159, loss_scale=2, train_wall=11, gb_free=2.8, wall=139631 2021-06-20 09:26:08 | INFO | train_inner | epoch 005: 163 / 3002 loss=2.448, ppl=5.46, wps=5794.7, ups=0.09, wpb=64791, bsz=128, num_updates=12099, lr=9.99112e-05, gnorm=2.013, loss_scale=2, train_wall=11, gb_free=2.8, wall=139642 2021-06-20 09:26:19 | INFO | train_inner | epoch 005: 164 / 3002 loss=2.59, ppl=6.02, wps=5899.5, ups=0.09, wpb=64875, bsz=128, num_updates=12100, lr=9.99112e-05, gnorm=2.177, loss_scale=2, train_wall=11, gb_free=2.8, wall=139653 2021-06-20 09:26:30 | INFO | train_inner | epoch 005: 165 / 3002 loss=2.44, ppl=5.43, wps=5841, ups=0.09, wpb=64758, bsz=128, num_updates=12101, lr=9.99112e-05, gnorm=2.12, loss_scale=2, train_wall=11, gb_free=2.8, wall=139664 2021-06-20 09:26:41 | INFO | train_inner | epoch 005: 166 / 3002 loss=2.462, ppl=5.51, wps=5750, ups=0.09, wpb=64868, bsz=128, num_updates=12102, lr=9.99112e-05, gnorm=2.286, loss_scale=2, train_wall=11, gb_free=2.8, wall=139675 2021-06-20 09:26:52 | INFO | train_inner | epoch 005: 167 / 3002 loss=2.477, ppl=5.57, wps=5808.6, ups=0.09, wpb=64874, bsz=128, num_updates=12103, lr=9.99112e-05, gnorm=2.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=139687 2021-06-20 09:27:03 | INFO | train_inner | epoch 005: 168 / 3002 loss=2.541, ppl=5.82, wps=5817.6, ups=0.09, wpb=64827, bsz=128, num_updates=12104, lr=9.99112e-05, gnorm=2.04, loss_scale=2, train_wall=11, gb_free=2.8, wall=139698 2021-06-20 09:27:14 | INFO | train_inner | epoch 005: 169 / 3002 loss=2.541, ppl=5.82, wps=5825.7, ups=0.09, wpb=64760, bsz=128, num_updates=12105, lr=9.99112e-05, gnorm=2.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=139709 2021-06-20 09:27:26 | INFO | train_inner | epoch 005: 170 / 3002 loss=2.709, ppl=6.54, wps=5784, ups=0.09, wpb=64790, bsz=128, num_updates=12106, lr=9.99111e-05, gnorm=2.143, loss_scale=2, train_wall=11, gb_free=2.8, wall=139720 2021-06-20 09:27:37 | INFO | train_inner | epoch 005: 171 / 3002 loss=2.654, ppl=6.29, wps=5845.9, ups=0.09, wpb=64827, bsz=128, num_updates=12107, lr=9.99111e-05, gnorm=3.846, loss_scale=2, train_wall=11, gb_free=2.8, wall=139731 2021-06-20 09:27:48 | INFO | train_inner | epoch 005: 172 / 3002 loss=2.58, ppl=5.98, wps=5849.7, ups=0.09, wpb=64866, bsz=128, num_updates=12108, lr=9.99111e-05, gnorm=1.97, loss_scale=2, train_wall=11, gb_free=2.8, wall=139742 2021-06-20 09:27:59 | INFO | train_inner | epoch 005: 173 / 3002 loss=2.482, ppl=5.59, wps=5794.1, ups=0.09, wpb=64795, bsz=128, num_updates=12109, lr=9.99111e-05, gnorm=2.039, loss_scale=2, train_wall=11, gb_free=2.8, wall=139753 2021-06-20 09:28:10 | INFO | train_inner | epoch 005: 174 / 3002 loss=2.527, ppl=5.76, wps=5894.6, ups=0.09, wpb=64764, bsz=128, num_updates=12110, lr=9.99111e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=139764 2021-06-20 09:28:21 | INFO | train_inner | epoch 005: 175 / 3002 loss=2.607, ppl=6.09, wps=5939, ups=0.09, wpb=64864, bsz=128, num_updates=12111, lr=9.99111e-05, gnorm=2.008, loss_scale=2, train_wall=10, gb_free=2.8, wall=139775 2021-06-20 09:28:32 | INFO | train_inner | epoch 005: 176 / 3002 loss=2.52, ppl=5.73, wps=5736.4, ups=0.09, wpb=64813, bsz=128, num_updates=12112, lr=9.99111e-05, gnorm=2.054, loss_scale=2, train_wall=11, gb_free=2.8, wall=139787 2021-06-20 09:28:43 | INFO | train_inner | epoch 005: 177 / 3002 loss=2.498, ppl=5.65, wps=5806.5, ups=0.09, wpb=64828, bsz=128, num_updates=12113, lr=9.99111e-05, gnorm=2.266, loss_scale=2, train_wall=11, gb_free=2.8, wall=139798 2021-06-20 09:28:54 | INFO | train_inner | epoch 005: 178 / 3002 loss=2.459, ppl=5.5, wps=5872.2, ups=0.09, wpb=64828, bsz=128, num_updates=12114, lr=9.99111e-05, gnorm=2.275, loss_scale=2, train_wall=11, gb_free=2.8, wall=139809 2021-06-20 09:29:06 | INFO | train_inner | epoch 005: 179 / 3002 loss=2.544, ppl=5.83, wps=5719.6, ups=0.09, wpb=64844, bsz=128, num_updates=12115, lr=9.99111e-05, gnorm=2.024, loss_scale=2, train_wall=11, gb_free=2.8, wall=139820 2021-06-20 09:29:17 | INFO | train_inner | epoch 005: 180 / 3002 loss=2.577, ppl=5.97, wps=5821.2, ups=0.09, wpb=64730, bsz=128, num_updates=12116, lr=9.99111e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=139831 2021-06-20 09:29:28 | INFO | train_inner | epoch 005: 181 / 3002 loss=2.423, ppl=5.36, wps=5843.3, ups=0.09, wpb=64787, bsz=128, num_updates=12117, lr=9.99111e-05, gnorm=2.065, loss_scale=2, train_wall=11, gb_free=2.8, wall=139842 2021-06-20 09:29:39 | INFO | train_inner | epoch 005: 182 / 3002 loss=2.619, ppl=6.14, wps=5838.6, ups=0.09, wpb=64831, bsz=128, num_updates=12118, lr=9.9911e-05, gnorm=2.041, loss_scale=2, train_wall=11, gb_free=2.8, wall=139853 2021-06-20 09:29:50 | INFO | train_inner | epoch 005: 183 / 3002 loss=2.455, ppl=5.48, wps=5874, ups=0.09, wpb=64898, bsz=128, num_updates=12119, lr=9.9911e-05, gnorm=1.978, loss_scale=2, train_wall=11, gb_free=2.8, wall=139864 2021-06-20 09:30:01 | INFO | train_inner | epoch 005: 184 / 3002 loss=2.568, ppl=5.93, wps=5691.7, ups=0.09, wpb=64803, bsz=128, num_updates=12120, lr=9.9911e-05, gnorm=2.086, loss_scale=2, train_wall=11, gb_free=2.8, wall=139876 2021-06-20 09:30:13 | INFO | train_inner | epoch 005: 185 / 3002 loss=2.437, ppl=5.42, wps=5837, ups=0.09, wpb=64916, bsz=128, num_updates=12121, lr=9.9911e-05, gnorm=2.008, loss_scale=2, train_wall=11, gb_free=2.8, wall=139887 2021-06-20 09:30:24 | INFO | train_inner | epoch 005: 186 / 3002 loss=2.612, ppl=6.11, wps=5804.5, ups=0.09, wpb=64816, bsz=128, num_updates=12122, lr=9.9911e-05, gnorm=2.003, loss_scale=2, train_wall=11, gb_free=2.8, wall=139898 2021-06-20 09:30:35 | INFO | train_inner | epoch 005: 187 / 3002 loss=2.457, ppl=5.49, wps=5795.5, ups=0.09, wpb=64801, bsz=128, num_updates=12123, lr=9.9911e-05, gnorm=2.119, loss_scale=2, train_wall=11, gb_free=2.8, wall=139909 2021-06-20 09:30:46 | INFO | train_inner | epoch 005: 188 / 3002 loss=2.66, ppl=6.32, wps=5820.2, ups=0.09, wpb=64777, bsz=128, num_updates=12124, lr=9.9911e-05, gnorm=2.112, loss_scale=2, train_wall=11, gb_free=2.8, wall=139920 2021-06-20 09:30:57 | INFO | train_inner | epoch 005: 189 / 3002 loss=2.456, ppl=5.49, wps=5826.7, ups=0.09, wpb=64879, bsz=128, num_updates=12125, lr=9.9911e-05, gnorm=2.053, loss_scale=2, train_wall=11, gb_free=2.8, wall=139932 2021-06-20 09:31:08 | INFO | train_inner | epoch 005: 190 / 3002 loss=2.589, ppl=6.02, wps=5796.3, ups=0.09, wpb=64739, bsz=128, num_updates=12126, lr=9.9911e-05, gnorm=2.275, loss_scale=2, train_wall=11, gb_free=2.8, wall=139943 2021-06-20 09:31:19 | INFO | train_inner | epoch 005: 191 / 3002 loss=2.616, ppl=6.13, wps=5907.9, ups=0.09, wpb=64816, bsz=128, num_updates=12127, lr=9.9911e-05, gnorm=2.094, loss_scale=2, train_wall=11, gb_free=2.8, wall=139954 2021-06-20 09:31:31 | INFO | train_inner | epoch 005: 192 / 3002 loss=2.502, ppl=5.67, wps=5811.8, ups=0.09, wpb=64826, bsz=128, num_updates=12128, lr=9.9911e-05, gnorm=2.053, loss_scale=4, train_wall=11, gb_free=2.8, wall=139965 2021-06-20 09:31:42 | INFO | train_inner | epoch 005: 193 / 3002 loss=2.532, ppl=5.78, wps=5855.8, ups=0.09, wpb=64839, bsz=128, num_updates=12129, lr=9.9911e-05, gnorm=2.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=139976 2021-06-20 09:31:53 | INFO | train_inner | epoch 005: 194 / 3002 loss=2.49, ppl=5.62, wps=5941.8, ups=0.09, wpb=64901, bsz=128, num_updates=12130, lr=9.9911e-05, gnorm=1.971, loss_scale=4, train_wall=10, gb_free=2.8, wall=139987 2021-06-20 09:32:04 | INFO | train_inner | epoch 005: 195 / 3002 loss=2.67, ppl=6.36, wps=5847.3, ups=0.09, wpb=64820, bsz=128, num_updates=12131, lr=9.99109e-05, gnorm=2.048, loss_scale=4, train_wall=11, gb_free=2.8, wall=139998 2021-06-20 09:32:15 | INFO | train_inner | epoch 005: 196 / 3002 loss=2.529, ppl=5.77, wps=5870.2, ups=0.09, wpb=64892, bsz=128, num_updates=12132, lr=9.99109e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=140009 2021-06-20 09:32:26 | INFO | train_inner | epoch 005: 197 / 3002 loss=2.659, ppl=6.32, wps=5869.5, ups=0.09, wpb=64781, bsz=128, num_updates=12133, lr=9.99109e-05, gnorm=2.008, loss_scale=4, train_wall=11, gb_free=2.8, wall=140020 2021-06-20 09:32:37 | INFO | train_inner | epoch 005: 198 / 3002 loss=2.423, ppl=5.36, wps=5876, ups=0.09, wpb=64806, bsz=128, num_updates=12134, lr=9.99109e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=140031 2021-06-20 09:32:48 | INFO | train_inner | epoch 005: 199 / 3002 loss=2.58, ppl=5.98, wps=5854.7, ups=0.09, wpb=64866, bsz=128, num_updates=12135, lr=9.99109e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=140042 2021-06-20 09:32:59 | INFO | train_inner | epoch 005: 200 / 3002 loss=2.507, ppl=5.68, wps=5936.1, ups=0.09, wpb=64813, bsz=128, num_updates=12136, lr=9.99109e-05, gnorm=1.906, loss_scale=4, train_wall=10, gb_free=2.8, wall=140053 2021-06-20 09:33:10 | INFO | train_inner | epoch 005: 201 / 3002 loss=2.483, ppl=5.59, wps=5862.8, ups=0.09, wpb=64857, bsz=128, num_updates=12137, lr=9.99109e-05, gnorm=4.701, loss_scale=4, train_wall=11, gb_free=2.8, wall=140064 2021-06-20 09:33:21 | INFO | train_inner | epoch 005: 202 / 3002 loss=2.53, ppl=5.77, wps=5856.8, ups=0.09, wpb=64822, bsz=128, num_updates=12138, lr=9.99109e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=140075 2021-06-20 09:33:32 | INFO | train_inner | epoch 005: 203 / 3002 loss=2.531, ppl=5.78, wps=5739.5, ups=0.09, wpb=64906, bsz=128, num_updates=12139, lr=9.99109e-05, gnorm=1.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=140086 2021-06-20 09:33:43 | INFO | train_inner | epoch 005: 204 / 3002 loss=2.606, ppl=6.09, wps=5840.2, ups=0.09, wpb=64833, bsz=128, num_updates=12140, lr=9.99109e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=140098 2021-06-20 09:33:54 | INFO | train_inner | epoch 005: 205 / 3002 loss=2.523, ppl=5.75, wps=5924.6, ups=0.09, wpb=64915, bsz=128, num_updates=12141, lr=9.99109e-05, gnorm=2.062, loss_scale=4, train_wall=10, gb_free=2.8, wall=140109 2021-06-20 09:34:05 | INFO | train_inner | epoch 005: 206 / 3002 loss=2.556, ppl=5.88, wps=5766.8, ups=0.09, wpb=64739, bsz=128, num_updates=12142, lr=9.99109e-05, gnorm=2.041, loss_scale=4, train_wall=11, gb_free=2.8, wall=140120 2021-06-20 09:34:17 | INFO | train_inner | epoch 005: 207 / 3002 loss=2.441, ppl=5.43, wps=5808.9, ups=0.09, wpb=64807, bsz=128, num_updates=12143, lr=9.99108e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=140131 2021-06-20 09:34:28 | INFO | train_inner | epoch 005: 208 / 3002 loss=2.593, ppl=6.03, wps=5734.1, ups=0.09, wpb=64813, bsz=128, num_updates=12144, lr=9.99108e-05, gnorm=2.088, loss_scale=4, train_wall=11, gb_free=2.8, wall=140142 2021-06-20 09:34:39 | INFO | train_inner | epoch 005: 209 / 3002 loss=2.589, ppl=6.02, wps=5902.4, ups=0.09, wpb=64830, bsz=128, num_updates=12145, lr=9.99108e-05, gnorm=2.085, loss_scale=4, train_wall=11, gb_free=2.8, wall=140153 2021-06-20 09:34:50 | INFO | train_inner | epoch 005: 210 / 3002 loss=2.534, ppl=5.79, wps=5964.6, ups=0.09, wpb=64853, bsz=128, num_updates=12146, lr=9.99108e-05, gnorm=1.942, loss_scale=4, train_wall=10, gb_free=2.8, wall=140164 2021-06-20 09:35:01 | INFO | train_inner | epoch 005: 211 / 3002 loss=2.524, ppl=5.75, wps=5830.3, ups=0.09, wpb=64753, bsz=128, num_updates=12147, lr=9.99108e-05, gnorm=1.968, loss_scale=4, train_wall=11, gb_free=2.8, wall=140175 2021-06-20 09:35:12 | INFO | train_inner | epoch 005: 212 / 3002 loss=2.387, ppl=5.23, wps=5910.7, ups=0.09, wpb=64777, bsz=128, num_updates=12148, lr=9.99108e-05, gnorm=2.698, loss_scale=4, train_wall=11, gb_free=2.8, wall=140186 2021-06-20 09:35:23 | INFO | train_inner | epoch 005: 213 / 3002 loss=2.626, ppl=6.17, wps=5869.3, ups=0.09, wpb=64860, bsz=128, num_updates=12149, lr=9.99108e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=140197 2021-06-20 09:35:34 | INFO | train_inner | epoch 005: 214 / 3002 loss=2.581, ppl=5.98, wps=5852.6, ups=0.09, wpb=64805, bsz=128, num_updates=12150, lr=9.99108e-05, gnorm=2.045, loss_scale=4, train_wall=11, gb_free=2.8, wall=140208 2021-06-20 09:35:45 | INFO | train_inner | epoch 005: 215 / 3002 loss=2.513, ppl=5.71, wps=5701.3, ups=0.09, wpb=64808, bsz=128, num_updates=12151, lr=9.99108e-05, gnorm=2.085, loss_scale=4, train_wall=11, gb_free=2.8, wall=140220 2021-06-20 09:35:57 | INFO | train_inner | epoch 005: 216 / 3002 loss=2.521, ppl=5.74, wps=5799.5, ups=0.09, wpb=64819, bsz=128, num_updates=12152, lr=9.99108e-05, gnorm=2.138, loss_scale=4, train_wall=11, gb_free=2.8, wall=140231 2021-06-20 09:36:07 | INFO | train_inner | epoch 005: 217 / 3002 loss=2.582, ppl=5.99, wps=5933.5, ups=0.09, wpb=64731, bsz=128, num_updates=12153, lr=9.99108e-05, gnorm=2.04, loss_scale=4, train_wall=10, gb_free=2.8, wall=140242 2021-06-20 09:36:18 | INFO | train_inner | epoch 005: 218 / 3002 loss=2.433, ppl=5.4, wps=5850.9, ups=0.09, wpb=64875, bsz=128, num_updates=12154, lr=9.99108e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=140253 2021-06-20 09:36:30 | INFO | train_inner | epoch 005: 219 / 3002 loss=2.544, ppl=5.83, wps=5893.6, ups=0.09, wpb=64963, bsz=128, num_updates=12155, lr=9.99108e-05, gnorm=2.109, loss_scale=4, train_wall=11, gb_free=2.8, wall=140264 2021-06-20 09:36:41 | INFO | train_inner | epoch 005: 220 / 3002 loss=2.561, ppl=5.9, wps=5904, ups=0.09, wpb=64898, bsz=128, num_updates=12156, lr=9.99107e-05, gnorm=1.981, loss_scale=4, train_wall=11, gb_free=2.8, wall=140275 2021-06-20 09:36:51 | INFO | train_inner | epoch 005: 221 / 3002 loss=2.509, ppl=5.69, wps=5912.8, ups=0.09, wpb=64769, bsz=128, num_updates=12157, lr=9.99107e-05, gnorm=2.001, loss_scale=4, train_wall=10, gb_free=2.8, wall=140286 2021-06-20 09:37:03 | INFO | train_inner | epoch 005: 222 / 3002 loss=2.401, ppl=5.28, wps=5776.2, ups=0.09, wpb=64860, bsz=128, num_updates=12158, lr=9.99107e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=140297 2021-06-20 09:37:14 | INFO | train_inner | epoch 005: 223 / 3002 loss=2.634, ppl=6.21, wps=5749.8, ups=0.09, wpb=64866, bsz=128, num_updates=12159, lr=9.99107e-05, gnorm=2.031, loss_scale=4, train_wall=11, gb_free=2.8, wall=140308 2021-06-20 09:37:25 | INFO | train_inner | epoch 005: 224 / 3002 loss=2.465, ppl=5.52, wps=5892.1, ups=0.09, wpb=64856, bsz=128, num_updates=12160, lr=9.99107e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=140319 2021-06-20 09:37:36 | INFO | train_inner | epoch 005: 225 / 3002 loss=2.488, ppl=5.61, wps=5788.2, ups=0.09, wpb=64802, bsz=128, num_updates=12161, lr=9.99107e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=140331 2021-06-20 09:37:47 | INFO | train_inner | epoch 005: 226 / 3002 loss=2.551, ppl=5.86, wps=5811.2, ups=0.09, wpb=64835, bsz=128, num_updates=12162, lr=9.99107e-05, gnorm=2.015, loss_scale=4, train_wall=11, gb_free=2.8, wall=140342 2021-06-20 09:37:59 | INFO | train_inner | epoch 005: 227 / 3002 loss=2.56, ppl=5.9, wps=5777.7, ups=0.09, wpb=64812, bsz=128, num_updates=12163, lr=9.99107e-05, gnorm=2.014, loss_scale=4, train_wall=11, gb_free=2.8, wall=140353 2021-06-20 09:38:09 | INFO | train_inner | epoch 005: 228 / 3002 loss=2.526, ppl=5.76, wps=5943.8, ups=0.09, wpb=64835, bsz=128, num_updates=12164, lr=9.99107e-05, gnorm=2.112, loss_scale=4, train_wall=10, gb_free=2.8, wall=140364 2021-06-20 09:38:21 | INFO | train_inner | epoch 005: 229 / 3002 loss=2.681, ppl=6.41, wps=5747.5, ups=0.09, wpb=64834, bsz=128, num_updates=12165, lr=9.99107e-05, gnorm=2.036, loss_scale=4, train_wall=11, gb_free=2.8, wall=140375 2021-06-20 09:38:32 | INFO | train_inner | epoch 005: 230 / 3002 loss=2.578, ppl=5.97, wps=5898.5, ups=0.09, wpb=64857, bsz=128, num_updates=12166, lr=9.99107e-05, gnorm=2.065, loss_scale=4, train_wall=11, gb_free=2.8, wall=140386 2021-06-20 09:38:43 | INFO | train_inner | epoch 005: 231 / 3002 loss=2.416, ppl=5.34, wps=5854.5, ups=0.09, wpb=64794, bsz=128, num_updates=12167, lr=9.99107e-05, gnorm=1.906, loss_scale=4, train_wall=11, gb_free=2.8, wall=140397 2021-06-20 09:38:54 | INFO | train_inner | epoch 005: 232 / 3002 loss=2.635, ppl=6.21, wps=5836.9, ups=0.09, wpb=64759, bsz=128, num_updates=12168, lr=9.99106e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=140408 2021-06-20 09:39:05 | INFO | train_inner | epoch 005: 233 / 3002 loss=2.418, ppl=5.34, wps=5857.8, ups=0.09, wpb=64936, bsz=128, num_updates=12169, lr=9.99106e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=140419 2021-06-20 09:39:16 | INFO | train_inner | epoch 005: 234 / 3002 loss=2.561, ppl=5.9, wps=5896.4, ups=0.09, wpb=64787, bsz=128, num_updates=12170, lr=9.99106e-05, gnorm=2.096, loss_scale=4, train_wall=11, gb_free=2.8, wall=140430 2021-06-20 09:39:27 | INFO | train_inner | epoch 005: 235 / 3002 loss=2.597, ppl=6.05, wps=5903.6, ups=0.09, wpb=64813, bsz=128, num_updates=12171, lr=9.99106e-05, gnorm=2.059, loss_scale=4, train_wall=10, gb_free=2.8, wall=140441 2021-06-20 09:39:38 | INFO | train_inner | epoch 005: 236 / 3002 loss=2.509, ppl=5.69, wps=5872.2, ups=0.09, wpb=64835, bsz=128, num_updates=12172, lr=9.99106e-05, gnorm=2.046, loss_scale=4, train_wall=11, gb_free=2.8, wall=140452 2021-06-20 09:39:49 | INFO | train_inner | epoch 005: 237 / 3002 loss=2.487, ppl=5.61, wps=5793.3, ups=0.09, wpb=64814, bsz=128, num_updates=12173, lr=9.99106e-05, gnorm=2.014, loss_scale=4, train_wall=11, gb_free=2.8, wall=140464 2021-06-20 09:40:00 | INFO | train_inner | epoch 005: 238 / 3002 loss=2.412, ppl=5.32, wps=6040.2, ups=0.09, wpb=64923, bsz=128, num_updates=12174, lr=9.99106e-05, gnorm=2.145, loss_scale=4, train_wall=10, gb_free=2.8, wall=140474 2021-06-20 09:40:11 | INFO | train_inner | epoch 005: 239 / 3002 loss=2.539, ppl=5.81, wps=5874.7, ups=0.09, wpb=64893, bsz=128, num_updates=12175, lr=9.99106e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=140485 2021-06-20 09:40:22 | INFO | train_inner | epoch 005: 240 / 3002 loss=2.428, ppl=5.38, wps=5955.9, ups=0.09, wpb=64869, bsz=128, num_updates=12176, lr=9.99106e-05, gnorm=2.217, loss_scale=4, train_wall=10, gb_free=2.8, wall=140496 2021-06-20 09:40:33 | INFO | train_inner | epoch 005: 241 / 3002 loss=2.477, ppl=5.57, wps=5765.7, ups=0.09, wpb=64708, bsz=128, num_updates=12177, lr=9.99106e-05, gnorm=2.089, loss_scale=4, train_wall=11, gb_free=2.8, wall=140507 2021-06-20 09:40:44 | INFO | train_inner | epoch 005: 242 / 3002 loss=2.598, ppl=6.06, wps=5943, ups=0.09, wpb=64792, bsz=128, num_updates=12178, lr=9.99106e-05, gnorm=1.935, loss_scale=4, train_wall=10, gb_free=2.8, wall=140518 2021-06-20 09:40:55 | INFO | train_inner | epoch 005: 243 / 3002 loss=2.512, ppl=5.7, wps=5899.4, ups=0.09, wpb=64822, bsz=128, num_updates=12179, lr=9.99106e-05, gnorm=5.656, loss_scale=4, train_wall=11, gb_free=2.8, wall=140529 2021-06-20 09:41:06 | INFO | train_inner | epoch 005: 244 / 3002 loss=2.459, ppl=5.5, wps=5778, ups=0.09, wpb=64836, bsz=128, num_updates=12180, lr=9.99106e-05, gnorm=1.934, loss_scale=4, train_wall=11, gb_free=2.8, wall=140541 2021-06-20 09:41:17 | INFO | train_inner | epoch 005: 245 / 3002 loss=2.642, ppl=6.24, wps=5851.5, ups=0.09, wpb=64886, bsz=128, num_updates=12181, lr=9.99105e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=140552 2021-06-20 09:41:28 | INFO | train_inner | epoch 005: 246 / 3002 loss=2.678, ppl=6.4, wps=5833.7, ups=0.09, wpb=64827, bsz=128, num_updates=12182, lr=9.99105e-05, gnorm=2.064, loss_scale=4, train_wall=11, gb_free=2.8, wall=140563 2021-06-20 09:41:39 | INFO | train_inner | epoch 005: 247 / 3002 loss=2.638, ppl=6.23, wps=5884.1, ups=0.09, wpb=64794, bsz=128, num_updates=12183, lr=9.99105e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=140574 2021-06-20 09:41:50 | INFO | train_inner | epoch 005: 248 / 3002 loss=2.707, ppl=6.53, wps=5964.6, ups=0.09, wpb=64768, bsz=128, num_updates=12184, lr=9.99105e-05, gnorm=1.994, loss_scale=4, train_wall=10, gb_free=2.8, wall=140585 2021-06-20 09:42:01 | INFO | train_inner | epoch 005: 249 / 3002 loss=2.496, ppl=5.64, wps=5897.5, ups=0.09, wpb=64905, bsz=128, num_updates=12185, lr=9.99105e-05, gnorm=2.015, loss_scale=4, train_wall=11, gb_free=2.8, wall=140596 2021-06-20 09:42:13 | INFO | train_inner | epoch 005: 250 / 3002 loss=2.451, ppl=5.47, wps=5731.7, ups=0.09, wpb=64793, bsz=128, num_updates=12186, lr=9.99105e-05, gnorm=2.043, loss_scale=4, train_wall=11, gb_free=2.8, wall=140607 2021-06-20 09:42:23 | INFO | train_inner | epoch 005: 251 / 3002 loss=2.555, ppl=5.88, wps=5978.8, ups=0.09, wpb=64874, bsz=128, num_updates=12187, lr=9.99105e-05, gnorm=2.128, loss_scale=4, train_wall=10, gb_free=2.8, wall=140618 2021-06-20 09:42:35 | INFO | train_inner | epoch 005: 252 / 3002 loss=2.518, ppl=5.73, wps=5825.8, ups=0.09, wpb=64798, bsz=128, num_updates=12188, lr=9.99105e-05, gnorm=2.039, loss_scale=4, train_wall=11, gb_free=2.8, wall=140629 2021-06-20 09:42:46 | INFO | train_inner | epoch 005: 253 / 3002 loss=2.723, ppl=6.6, wps=5878.8, ups=0.09, wpb=64858, bsz=128, num_updates=12189, lr=9.99105e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=140640 2021-06-20 09:42:57 | INFO | train_inner | epoch 005: 254 / 3002 loss=2.646, ppl=6.26, wps=5913.5, ups=0.09, wpb=64750, bsz=128, num_updates=12190, lr=9.99105e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=140651 2021-06-20 09:43:08 | INFO | train_inner | epoch 005: 255 / 3002 loss=2.504, ppl=5.67, wps=5849.2, ups=0.09, wpb=64794, bsz=128, num_updates=12191, lr=9.99105e-05, gnorm=1.936, loss_scale=4, train_wall=11, gb_free=2.8, wall=140662 2021-06-20 09:43:19 | INFO | train_inner | epoch 005: 256 / 3002 loss=2.473, ppl=5.55, wps=5811.8, ups=0.09, wpb=64840, bsz=128, num_updates=12192, lr=9.99105e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=140673 2021-06-20 09:43:30 | INFO | train_inner | epoch 005: 257 / 3002 loss=2.701, ppl=6.5, wps=5814.6, ups=0.09, wpb=64873, bsz=128, num_updates=12193, lr=9.99104e-05, gnorm=2.067, loss_scale=4, train_wall=11, gb_free=2.8, wall=140684 2021-06-20 09:43:41 | INFO | train_inner | epoch 005: 258 / 3002 loss=2.671, ppl=6.37, wps=5780.6, ups=0.09, wpb=64779, bsz=128, num_updates=12194, lr=9.99104e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=140695 2021-06-20 09:43:52 | INFO | train_inner | epoch 005: 259 / 3002 loss=2.536, ppl=5.8, wps=5930.4, ups=0.09, wpb=64847, bsz=128, num_updates=12195, lr=9.99104e-05, gnorm=1.99, loss_scale=4, train_wall=10, gb_free=2.8, wall=140706 2021-06-20 09:44:03 | INFO | train_inner | epoch 005: 260 / 3002 loss=2.462, ppl=5.51, wps=5782.4, ups=0.09, wpb=64819, bsz=128, num_updates=12196, lr=9.99104e-05, gnorm=2.046, loss_scale=4, train_wall=11, gb_free=2.8, wall=140718 2021-06-20 09:44:14 | INFO | train_inner | epoch 005: 261 / 3002 loss=2.428, ppl=5.38, wps=5821.9, ups=0.09, wpb=64790, bsz=128, num_updates=12197, lr=9.99104e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=140729 2021-06-20 09:44:25 | INFO | train_inner | epoch 005: 262 / 3002 loss=2.479, ppl=5.57, wps=5860.1, ups=0.09, wpb=64788, bsz=128, num_updates=12198, lr=9.99104e-05, gnorm=2.021, loss_scale=4, train_wall=11, gb_free=2.8, wall=140740 2021-06-20 09:44:37 | INFO | train_inner | epoch 005: 263 / 3002 loss=2.664, ppl=6.34, wps=5844.2, ups=0.09, wpb=64792, bsz=128, num_updates=12199, lr=9.99104e-05, gnorm=2.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=140751 2021-06-20 09:44:48 | INFO | train_inner | epoch 005: 264 / 3002 loss=2.505, ppl=5.68, wps=5851.4, ups=0.09, wpb=64823, bsz=128, num_updates=12200, lr=9.99104e-05, gnorm=2.386, loss_scale=4, train_wall=11, gb_free=2.8, wall=140762 2021-06-20 09:44:59 | INFO | train_inner | epoch 005: 265 / 3002 loss=2.511, ppl=5.7, wps=5884.3, ups=0.09, wpb=64735, bsz=128, num_updates=12201, lr=9.99104e-05, gnorm=2.072, loss_scale=4, train_wall=11, gb_free=2.8, wall=140773 2021-06-20 09:45:10 | INFO | train_inner | epoch 005: 266 / 3002 loss=2.476, ppl=5.56, wps=5953, ups=0.09, wpb=64883, bsz=128, num_updates=12202, lr=9.99104e-05, gnorm=1.962, loss_scale=4, train_wall=10, gb_free=2.8, wall=140784 2021-06-20 09:45:21 | INFO | train_inner | epoch 005: 267 / 3002 loss=2.485, ppl=5.6, wps=5771.9, ups=0.09, wpb=64750, bsz=128, num_updates=12203, lr=9.99104e-05, gnorm=2.821, loss_scale=4, train_wall=11, gb_free=2.8, wall=140795 2021-06-20 09:45:32 | INFO | train_inner | epoch 005: 268 / 3002 loss=2.532, ppl=5.79, wps=5999, ups=0.09, wpb=64842, bsz=128, num_updates=12204, lr=9.99104e-05, gnorm=2.097, loss_scale=4, train_wall=10, gb_free=2.8, wall=140806 2021-06-20 09:45:43 | INFO | train_inner | epoch 005: 269 / 3002 loss=2.48, ppl=5.58, wps=5838.6, ups=0.09, wpb=64882, bsz=128, num_updates=12205, lr=9.99104e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=140817 2021-06-20 09:45:54 | INFO | train_inner | epoch 005: 270 / 3002 loss=2.576, ppl=5.96, wps=5775.7, ups=0.09, wpb=64795, bsz=128, num_updates=12206, lr=9.99103e-05, gnorm=2.183, loss_scale=4, train_wall=11, gb_free=2.8, wall=140828 2021-06-20 09:46:05 | INFO | train_inner | epoch 005: 271 / 3002 loss=2.537, ppl=5.8, wps=5885.5, ups=0.09, wpb=64851, bsz=128, num_updates=12207, lr=9.99103e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=140839 2021-06-20 09:46:16 | INFO | train_inner | epoch 005: 272 / 3002 loss=2.67, ppl=6.37, wps=5801.7, ups=0.09, wpb=64794, bsz=128, num_updates=12208, lr=9.99103e-05, gnorm=3.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=140850 2021-06-20 09:46:27 | INFO | train_inner | epoch 005: 273 / 3002 loss=2.524, ppl=5.75, wps=5820, ups=0.09, wpb=64878, bsz=128, num_updates=12209, lr=9.99103e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=140862 2021-06-20 09:46:38 | INFO | train_inner | epoch 005: 274 / 3002 loss=2.601, ppl=6.07, wps=5889.8, ups=0.09, wpb=64910, bsz=128, num_updates=12210, lr=9.99103e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=140873 2021-06-20 09:46:49 | INFO | train_inner | epoch 005: 275 / 3002 loss=2.432, ppl=5.4, wps=5894.6, ups=0.09, wpb=64738, bsz=128, num_updates=12211, lr=9.99103e-05, gnorm=2.124, loss_scale=4, train_wall=11, gb_free=2.8, wall=140884 2021-06-20 09:47:00 | INFO | train_inner | epoch 005: 276 / 3002 loss=2.618, ppl=6.14, wps=6022.3, ups=0.09, wpb=64925, bsz=128, num_updates=12212, lr=9.99103e-05, gnorm=2.018, loss_scale=4, train_wall=10, gb_free=2.8, wall=140894 2021-06-20 09:47:11 | INFO | train_inner | epoch 005: 277 / 3002 loss=2.688, ppl=6.44, wps=5726.8, ups=0.09, wpb=64734, bsz=128, num_updates=12213, lr=9.99103e-05, gnorm=2.601, loss_scale=4, train_wall=11, gb_free=2.8, wall=140906 2021-06-20 09:47:23 | INFO | train_inner | epoch 005: 278 / 3002 loss=2.61, ppl=6.1, wps=5781.1, ups=0.09, wpb=64805, bsz=128, num_updates=12214, lr=9.99103e-05, gnorm=2.054, loss_scale=4, train_wall=11, gb_free=2.8, wall=140917 2021-06-20 09:47:34 | INFO | train_inner | epoch 005: 279 / 3002 loss=2.616, ppl=6.13, wps=5713.1, ups=0.09, wpb=64854, bsz=128, num_updates=12215, lr=9.99103e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=140928 2021-06-20 09:47:45 | INFO | train_inner | epoch 005: 280 / 3002 loss=2.435, ppl=5.41, wps=5781, ups=0.09, wpb=64807, bsz=128, num_updates=12216, lr=9.99103e-05, gnorm=2.012, loss_scale=4, train_wall=11, gb_free=2.8, wall=140939 2021-06-20 09:47:56 | INFO | train_inner | epoch 005: 281 / 3002 loss=2.59, ppl=6.02, wps=5868.9, ups=0.09, wpb=64827, bsz=128, num_updates=12217, lr=9.99103e-05, gnorm=2.669, loss_scale=4, train_wall=11, gb_free=2.8, wall=140950 2021-06-20 09:48:07 | INFO | train_inner | epoch 005: 282 / 3002 loss=2.547, ppl=5.84, wps=5891.5, ups=0.09, wpb=64813, bsz=128, num_updates=12218, lr=9.99102e-05, gnorm=2.114, loss_scale=4, train_wall=11, gb_free=2.8, wall=140961 2021-06-20 09:48:18 | INFO | train_inner | epoch 005: 283 / 3002 loss=2.642, ppl=6.24, wps=5855.6, ups=0.09, wpb=64809, bsz=128, num_updates=12219, lr=9.99102e-05, gnorm=2.1, loss_scale=4, train_wall=11, gb_free=2.8, wall=140973 2021-06-20 09:48:29 | INFO | train_inner | epoch 005: 284 / 3002 loss=2.624, ppl=6.17, wps=5877.7, ups=0.09, wpb=64831, bsz=128, num_updates=12220, lr=9.99102e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=140984 2021-06-20 09:48:40 | INFO | train_inner | epoch 005: 285 / 3002 loss=2.469, ppl=5.54, wps=5909.2, ups=0.09, wpb=64928, bsz=128, num_updates=12221, lr=9.99102e-05, gnorm=2.057, loss_scale=4, train_wall=11, gb_free=2.8, wall=140995 2021-06-20 09:48:51 | INFO | train_inner | epoch 005: 286 / 3002 loss=2.568, ppl=5.93, wps=5889.6, ups=0.09, wpb=64794, bsz=128, num_updates=12222, lr=9.99102e-05, gnorm=1.972, loss_scale=4, train_wall=11, gb_free=2.8, wall=141006 2021-06-20 09:49:02 | INFO | train_inner | epoch 005: 287 / 3002 loss=2.341, ppl=5.07, wps=5827.4, ups=0.09, wpb=64835, bsz=128, num_updates=12223, lr=9.99102e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=141017 2021-06-20 09:49:13 | INFO | train_inner | epoch 005: 288 / 3002 loss=2.372, ppl=5.17, wps=5829, ups=0.09, wpb=64811, bsz=128, num_updates=12224, lr=9.99102e-05, gnorm=1.991, loss_scale=4, train_wall=11, gb_free=2.8, wall=141028 2021-06-20 09:49:25 | INFO | train_inner | epoch 005: 289 / 3002 loss=2.669, ppl=6.36, wps=5783.9, ups=0.09, wpb=64813, bsz=128, num_updates=12225, lr=9.99102e-05, gnorm=2.082, loss_scale=4, train_wall=11, gb_free=2.8, wall=141039 2021-06-20 09:49:36 | INFO | train_inner | epoch 005: 290 / 3002 loss=2.505, ppl=5.68, wps=5968.6, ups=0.09, wpb=64839, bsz=128, num_updates=12226, lr=9.99102e-05, gnorm=1.979, loss_scale=4, train_wall=10, gb_free=2.8, wall=141050 2021-06-20 09:49:47 | INFO | train_inner | epoch 005: 291 / 3002 loss=2.482, ppl=5.59, wps=5883.2, ups=0.09, wpb=64837, bsz=128, num_updates=12227, lr=9.99102e-05, gnorm=2.085, loss_scale=4, train_wall=11, gb_free=2.8, wall=141061 2021-06-20 09:49:58 | INFO | train_inner | epoch 005: 292 / 3002 loss=2.572, ppl=5.95, wps=5824, ups=0.09, wpb=64835, bsz=128, num_updates=12228, lr=9.99102e-05, gnorm=1.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=141072 2021-06-20 09:50:08 | INFO | train_inner | epoch 005: 293 / 3002 loss=2.563, ppl=5.91, wps=6025.8, ups=0.09, wpb=64843, bsz=128, num_updates=12229, lr=9.99102e-05, gnorm=1.973, loss_scale=4, train_wall=10, gb_free=2.8, wall=141083 2021-06-20 09:50:20 | INFO | train_inner | epoch 005: 294 / 3002 loss=2.419, ppl=5.35, wps=5841.6, ups=0.09, wpb=64767, bsz=128, num_updates=12230, lr=9.99102e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=141094 2021-06-20 09:50:31 | INFO | train_inner | epoch 005: 295 / 3002 loss=2.651, ppl=6.28, wps=5792.1, ups=0.09, wpb=64839, bsz=128, num_updates=12231, lr=9.99101e-05, gnorm=1.997, loss_scale=4, train_wall=11, gb_free=2.8, wall=141105 2021-06-20 09:50:42 | INFO | train_inner | epoch 005: 296 / 3002 loss=2.417, ppl=5.34, wps=5898.3, ups=0.09, wpb=64892, bsz=128, num_updates=12232, lr=9.99101e-05, gnorm=1.886, loss_scale=4, train_wall=11, gb_free=2.8, wall=141116 2021-06-20 09:50:53 | INFO | train_inner | epoch 005: 297 / 3002 loss=2.448, ppl=5.46, wps=5778.1, ups=0.09, wpb=64891, bsz=128, num_updates=12233, lr=9.99101e-05, gnorm=2.097, loss_scale=4, train_wall=11, gb_free=2.8, wall=141127 2021-06-20 09:51:04 | INFO | train_inner | epoch 005: 298 / 3002 loss=2.404, ppl=5.29, wps=5853.1, ups=0.09, wpb=64777, bsz=128, num_updates=12234, lr=9.99101e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=141138 2021-06-20 09:51:15 | INFO | train_inner | epoch 005: 299 / 3002 loss=2.473, ppl=5.55, wps=5687.9, ups=0.09, wpb=64812, bsz=128, num_updates=12235, lr=9.99101e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=141150 2021-06-20 09:51:26 | INFO | train_inner | epoch 005: 300 / 3002 loss=2.515, ppl=5.72, wps=5867.3, ups=0.09, wpb=64742, bsz=128, num_updates=12236, lr=9.99101e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=141161 2021-06-20 09:51:37 | INFO | train_inner | epoch 005: 301 / 3002 loss=2.421, ppl=5.36, wps=5952.8, ups=0.09, wpb=64858, bsz=128, num_updates=12237, lr=9.99101e-05, gnorm=1.998, loss_scale=4, train_wall=10, gb_free=2.8, wall=141172 2021-06-20 09:51:48 | INFO | train_inner | epoch 005: 302 / 3002 loss=2.546, ppl=5.84, wps=5905.6, ups=0.09, wpb=64773, bsz=128, num_updates=12238, lr=9.99101e-05, gnorm=1.969, loss_scale=4, train_wall=10, gb_free=2.8, wall=141183 2021-06-20 09:51:59 | INFO | train_inner | epoch 005: 303 / 3002 loss=2.5, ppl=5.66, wps=5894.7, ups=0.09, wpb=64846, bsz=128, num_updates=12239, lr=9.99101e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=141194 2021-06-20 09:52:10 | INFO | train_inner | epoch 005: 304 / 3002 loss=2.446, ppl=5.45, wps=5884.9, ups=0.09, wpb=64834, bsz=128, num_updates=12240, lr=9.99101e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=141205 2021-06-20 09:52:21 | INFO | train_inner | epoch 005: 305 / 3002 loss=2.451, ppl=5.47, wps=5913.7, ups=0.09, wpb=64835, bsz=128, num_updates=12241, lr=9.99101e-05, gnorm=2.385, loss_scale=4, train_wall=10, gb_free=2.8, wall=141216 2021-06-20 09:52:33 | INFO | train_inner | epoch 005: 306 / 3002 loss=2.532, ppl=5.78, wps=5730, ups=0.09, wpb=64851, bsz=128, num_updates=12242, lr=9.99101e-05, gnorm=2.044, loss_scale=4, train_wall=11, gb_free=2.8, wall=141227 2021-06-20 09:52:44 | INFO | train_inner | epoch 005: 307 / 3002 loss=2.454, ppl=5.48, wps=5891.2, ups=0.09, wpb=64837, bsz=128, num_updates=12243, lr=9.991e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=141238 2021-06-20 09:52:54 | INFO | train_inner | epoch 005: 308 / 3002 loss=2.503, ppl=5.67, wps=6013.5, ups=0.09, wpb=64798, bsz=128, num_updates=12244, lr=9.991e-05, gnorm=2.89, loss_scale=4, train_wall=10, gb_free=2.8, wall=141249 2021-06-20 09:53:05 | INFO | train_inner | epoch 005: 309 / 3002 loss=2.556, ppl=5.88, wps=5858.5, ups=0.09, wpb=64768, bsz=128, num_updates=12245, lr=9.991e-05, gnorm=1.993, loss_scale=4, train_wall=11, gb_free=2.8, wall=141260 2021-06-20 09:53:17 | INFO | train_inner | epoch 005: 310 / 3002 loss=2.383, ppl=5.22, wps=5852.3, ups=0.09, wpb=64802, bsz=128, num_updates=12246, lr=9.991e-05, gnorm=1.897, loss_scale=4, train_wall=11, gb_free=2.8, wall=141271 2021-06-20 09:53:28 | INFO | train_inner | epoch 005: 311 / 3002 loss=2.444, ppl=5.44, wps=5872, ups=0.09, wpb=64873, bsz=128, num_updates=12247, lr=9.991e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=141282 2021-06-20 09:53:39 | INFO | train_inner | epoch 005: 312 / 3002 loss=2.511, ppl=5.7, wps=5821.3, ups=0.09, wpb=64797, bsz=128, num_updates=12248, lr=9.991e-05, gnorm=2.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=141293 2021-06-20 09:53:50 | INFO | train_inner | epoch 005: 313 / 3002 loss=2.645, ppl=6.26, wps=5852.5, ups=0.09, wpb=64796, bsz=128, num_updates=12249, lr=9.991e-05, gnorm=2.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=141304 2021-06-20 09:54:01 | INFO | train_inner | epoch 005: 314 / 3002 loss=2.469, ppl=5.54, wps=5863.2, ups=0.09, wpb=64823, bsz=128, num_updates=12250, lr=9.991e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=141315 2021-06-20 09:54:12 | INFO | train_inner | epoch 005: 315 / 3002 loss=2.581, ppl=5.98, wps=5769.7, ups=0.09, wpb=64769, bsz=128, num_updates=12251, lr=9.991e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=141326 2021-06-20 09:54:23 | INFO | train_inner | epoch 005: 316 / 3002 loss=2.6, ppl=6.06, wps=5801.6, ups=0.09, wpb=64834, bsz=128, num_updates=12252, lr=9.991e-05, gnorm=1.947, loss_scale=4, train_wall=11, gb_free=2.8, wall=141338 2021-06-20 09:54:34 | INFO | train_inner | epoch 005: 317 / 3002 loss=2.522, ppl=5.74, wps=5795.1, ups=0.09, wpb=64833, bsz=128, num_updates=12253, lr=9.991e-05, gnorm=1.933, loss_scale=4, train_wall=11, gb_free=2.8, wall=141349 2021-06-20 09:54:45 | INFO | train_inner | epoch 005: 318 / 3002 loss=2.646, ppl=6.26, wps=5871.4, ups=0.09, wpb=64860, bsz=128, num_updates=12254, lr=9.991e-05, gnorm=2.056, loss_scale=4, train_wall=11, gb_free=2.8, wall=141360 2021-06-20 09:54:57 | INFO | train_inner | epoch 005: 319 / 3002 loss=2.403, ppl=5.29, wps=5858.6, ups=0.09, wpb=64899, bsz=128, num_updates=12255, lr=9.991e-05, gnorm=1.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=141371 2021-06-20 09:55:08 | INFO | train_inner | epoch 005: 320 / 3002 loss=2.625, ppl=6.17, wps=5836.7, ups=0.09, wpb=64781, bsz=128, num_updates=12256, lr=9.99099e-05, gnorm=2.769, loss_scale=8, train_wall=11, gb_free=2.8, wall=141382 2021-06-20 09:55:19 | INFO | train_inner | epoch 005: 321 / 3002 loss=2.692, ppl=6.46, wps=5790.3, ups=0.09, wpb=64852, bsz=128, num_updates=12257, lr=9.99099e-05, gnorm=2.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=141393 2021-06-20 09:55:30 | INFO | train_inner | epoch 005: 322 / 3002 loss=2.592, ppl=6.03, wps=5878.6, ups=0.09, wpb=64918, bsz=128, num_updates=12258, lr=9.99099e-05, gnorm=2.085, loss_scale=8, train_wall=11, gb_free=2.8, wall=141404 2021-06-20 09:55:41 | INFO | train_inner | epoch 005: 323 / 3002 loss=2.53, ppl=5.78, wps=5764.8, ups=0.09, wpb=64811, bsz=128, num_updates=12259, lr=9.99099e-05, gnorm=2.544, loss_scale=8, train_wall=11, gb_free=2.8, wall=141415 2021-06-20 09:55:52 | INFO | train_inner | epoch 005: 324 / 3002 loss=2.47, ppl=5.54, wps=5841.3, ups=0.09, wpb=64715, bsz=128, num_updates=12260, lr=9.99099e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=141427 2021-06-20 09:56:03 | INFO | train_inner | epoch 005: 325 / 3002 loss=2.563, ppl=5.91, wps=5785.1, ups=0.09, wpb=64803, bsz=128, num_updates=12261, lr=9.99099e-05, gnorm=2.042, loss_scale=8, train_wall=11, gb_free=2.8, wall=141438 2021-06-20 09:56:15 | INFO | train_inner | epoch 005: 326 / 3002 loss=2.512, ppl=5.7, wps=5751.3, ups=0.09, wpb=64869, bsz=128, num_updates=12262, lr=9.99099e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=141449 2021-06-20 09:56:26 | INFO | train_inner | epoch 005: 327 / 3002 loss=2.655, ppl=6.3, wps=5813.6, ups=0.09, wpb=64822, bsz=128, num_updates=12263, lr=9.99099e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=141460 2021-06-20 09:56:37 | INFO | train_inner | epoch 005: 328 / 3002 loss=2.373, ppl=5.18, wps=5817.5, ups=0.09, wpb=64833, bsz=128, num_updates=12264, lr=9.99099e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=141471 2021-06-20 09:56:48 | INFO | train_inner | epoch 005: 329 / 3002 loss=2.374, ppl=5.18, wps=5884.9, ups=0.09, wpb=64875, bsz=128, num_updates=12265, lr=9.99099e-05, gnorm=2.063, loss_scale=8, train_wall=11, gb_free=2.8, wall=141482 2021-06-20 09:56:59 | INFO | train_inner | epoch 005: 330 / 3002 loss=2.547, ppl=5.84, wps=5899, ups=0.09, wpb=64863, bsz=128, num_updates=12266, lr=9.99099e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=141493 2021-06-20 09:57:10 | INFO | train_inner | epoch 005: 331 / 3002 loss=2.586, ppl=6.01, wps=5802.5, ups=0.09, wpb=64858, bsz=128, num_updates=12267, lr=9.99099e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=141505 2021-06-20 09:57:21 | INFO | train_inner | epoch 005: 332 / 3002 loss=2.653, ppl=6.29, wps=5840.1, ups=0.09, wpb=64864, bsz=128, num_updates=12268, lr=9.99098e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=141516 2021-06-20 09:57:33 | INFO | train_inner | epoch 005: 333 / 3002 loss=2.417, ppl=5.34, wps=5784.4, ups=0.09, wpb=64836, bsz=128, num_updates=12269, lr=9.99098e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=141527 2021-06-20 09:57:44 | INFO | train_inner | epoch 005: 334 / 3002 loss=2.452, ppl=5.47, wps=5876.5, ups=0.09, wpb=64859, bsz=128, num_updates=12270, lr=9.99098e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=141538 2021-06-20 09:57:55 | INFO | train_inner | epoch 005: 335 / 3002 loss=2.413, ppl=5.33, wps=5826.4, ups=0.09, wpb=64817, bsz=128, num_updates=12271, lr=9.99098e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=141549 2021-06-20 09:58:06 | INFO | train_inner | epoch 005: 336 / 3002 loss=2.689, ppl=6.45, wps=5873, ups=0.09, wpb=64800, bsz=128, num_updates=12272, lr=9.99098e-05, gnorm=2.066, loss_scale=8, train_wall=11, gb_free=2.8, wall=141560 2021-06-20 09:58:17 | INFO | train_inner | epoch 005: 337 / 3002 loss=2.552, ppl=5.86, wps=5761.6, ups=0.09, wpb=64815, bsz=128, num_updates=12273, lr=9.99098e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=141571 2021-06-20 09:58:28 | INFO | train_inner | epoch 005: 338 / 3002 loss=2.493, ppl=5.63, wps=5918, ups=0.09, wpb=64883, bsz=128, num_updates=12274, lr=9.99098e-05, gnorm=2.029, loss_scale=8, train_wall=11, gb_free=2.8, wall=141582 2021-06-20 09:58:39 | INFO | train_inner | epoch 005: 339 / 3002 loss=2.472, ppl=5.55, wps=5821.2, ups=0.09, wpb=64848, bsz=128, num_updates=12275, lr=9.99098e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=141593 2021-06-20 09:58:50 | INFO | train_inner | epoch 005: 340 / 3002 loss=2.3, ppl=4.92, wps=5878.5, ups=0.09, wpb=64898, bsz=128, num_updates=12276, lr=9.99098e-05, gnorm=2.092, loss_scale=8, train_wall=11, gb_free=2.8, wall=141604 2021-06-20 09:59:01 | INFO | train_inner | epoch 005: 341 / 3002 loss=2.538, ppl=5.81, wps=5835.4, ups=0.09, wpb=64842, bsz=128, num_updates=12277, lr=9.99098e-05, gnorm=2.039, loss_scale=8, train_wall=11, gb_free=2.8, wall=141616 2021-06-20 09:59:12 | INFO | train_inner | epoch 005: 342 / 3002 loss=2.474, ppl=5.55, wps=5812.4, ups=0.09, wpb=64849, bsz=128, num_updates=12278, lr=9.99098e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=141627 2021-06-20 09:59:24 | INFO | train_inner | epoch 005: 343 / 3002 loss=2.682, ppl=6.42, wps=5787.1, ups=0.09, wpb=64849, bsz=128, num_updates=12279, lr=9.99098e-05, gnorm=2.054, loss_scale=8, train_wall=11, gb_free=2.8, wall=141638 2021-06-20 09:59:34 | INFO | train_inner | epoch 005: 344 / 3002 loss=2.519, ppl=5.73, wps=5989.5, ups=0.09, wpb=64808, bsz=128, num_updates=12280, lr=9.99098e-05, gnorm=2.043, loss_scale=8, train_wall=10, gb_free=2.8, wall=141649 2021-06-20 09:59:46 | INFO | train_inner | epoch 005: 345 / 3002 loss=2.705, ppl=6.52, wps=5820.8, ups=0.09, wpb=64814, bsz=128, num_updates=12281, lr=9.99097e-05, gnorm=2.159, loss_scale=8, train_wall=11, gb_free=2.8, wall=141660 2021-06-20 09:59:56 | INFO | train_inner | epoch 005: 346 / 3002 loss=2.743, ppl=6.7, wps=5948.6, ups=0.09, wpb=64738, bsz=128, num_updates=12282, lr=9.99097e-05, gnorm=2.081, loss_scale=8, train_wall=10, gb_free=2.8, wall=141671 2021-06-20 10:00:07 | INFO | train_inner | epoch 005: 347 / 3002 loss=2.598, ppl=6.06, wps=5943.2, ups=0.09, wpb=64855, bsz=128, num_updates=12283, lr=9.99097e-05, gnorm=1.973, loss_scale=8, train_wall=10, gb_free=2.8, wall=141682 2021-06-20 10:00:18 | INFO | train_inner | epoch 005: 348 / 3002 loss=2.528, ppl=5.77, wps=5881.6, ups=0.09, wpb=64878, bsz=128, num_updates=12284, lr=9.99097e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=141693 2021-06-20 10:00:30 | INFO | train_inner | epoch 005: 349 / 3002 loss=2.627, ppl=6.18, wps=5781.1, ups=0.09, wpb=64840, bsz=128, num_updates=12285, lr=9.99097e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=141704 2021-06-20 10:00:41 | INFO | train_inner | epoch 005: 350 / 3002 loss=2.505, ppl=5.68, wps=5671.7, ups=0.09, wpb=64791, bsz=128, num_updates=12286, lr=9.99097e-05, gnorm=2.097, loss_scale=8, train_wall=11, gb_free=2.8, wall=141715 2021-06-20 10:00:52 | INFO | train_inner | epoch 005: 351 / 3002 loss=2.447, ppl=5.45, wps=5856.9, ups=0.09, wpb=64832, bsz=128, num_updates=12287, lr=9.99097e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=141726 2021-06-20 10:01:03 | INFO | train_inner | epoch 005: 352 / 3002 loss=2.448, ppl=5.46, wps=5849, ups=0.09, wpb=64815, bsz=128, num_updates=12288, lr=9.99097e-05, gnorm=2.061, loss_scale=8, train_wall=11, gb_free=2.8, wall=141738 2021-06-20 10:01:14 | INFO | train_inner | epoch 005: 353 / 3002 loss=2.479, ppl=5.57, wps=5759.2, ups=0.09, wpb=64758, bsz=128, num_updates=12289, lr=9.99097e-05, gnorm=2.107, loss_scale=8, train_wall=11, gb_free=2.8, wall=141749 2021-06-20 10:01:25 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-20 10:01:36 | INFO | train_inner | epoch 005: 355 / 3002 loss=2.665, ppl=6.34, wps=2957.5, ups=0.05, wpb=64845, bsz=128, num_updates=12290, lr=9.99097e-05, gnorm=4.009, loss_scale=4, train_wall=21, gb_free=2.8, wall=141771 2021-06-20 10:01:47 | INFO | train_inner | epoch 005: 356 / 3002 loss=2.625, ppl=6.17, wps=5957.9, ups=0.09, wpb=64834, bsz=128, num_updates=12291, lr=9.99097e-05, gnorm=2.008, loss_scale=4, train_wall=10, gb_free=2.8, wall=141782 2021-06-20 10:01:58 | INFO | train_inner | epoch 005: 357 / 3002 loss=2.429, ppl=5.38, wps=5864.9, ups=0.09, wpb=64873, bsz=128, num_updates=12292, lr=9.99097e-05, gnorm=2.156, loss_scale=4, train_wall=11, gb_free=2.8, wall=141793 2021-06-20 10:02:10 | INFO | train_inner | epoch 005: 358 / 3002 loss=2.351, ppl=5.1, wps=5773.3, ups=0.09, wpb=64886, bsz=128, num_updates=12293, lr=9.99096e-05, gnorm=2.064, loss_scale=4, train_wall=11, gb_free=2.8, wall=141804 2021-06-20 10:02:21 | INFO | train_inner | epoch 005: 359 / 3002 loss=2.39, ppl=5.24, wps=5783.6, ups=0.09, wpb=64786, bsz=128, num_updates=12294, lr=9.99096e-05, gnorm=1.956, loss_scale=4, train_wall=11, gb_free=2.8, wall=141815 2021-06-20 10:02:32 | INFO | train_inner | epoch 005: 360 / 3002 loss=2.524, ppl=5.75, wps=5996.5, ups=0.09, wpb=64756, bsz=128, num_updates=12295, lr=9.99096e-05, gnorm=1.986, loss_scale=4, train_wall=10, gb_free=2.8, wall=141826 2021-06-20 10:02:42 | INFO | train_inner | epoch 005: 361 / 3002 loss=2.462, ppl=5.51, wps=5947, ups=0.09, wpb=64798, bsz=128, num_updates=12296, lr=9.99096e-05, gnorm=1.975, loss_scale=4, train_wall=10, gb_free=2.8, wall=141837 2021-06-20 10:02:53 | INFO | train_inner | epoch 005: 362 / 3002 loss=2.597, ppl=6.05, wps=5894.9, ups=0.09, wpb=64842, bsz=128, num_updates=12297, lr=9.99096e-05, gnorm=2.333, loss_scale=4, train_wall=11, gb_free=2.8, wall=141848 2021-06-20 10:03:04 | INFO | train_inner | epoch 005: 363 / 3002 loss=2.534, ppl=5.79, wps=5883.5, ups=0.09, wpb=64808, bsz=128, num_updates=12298, lr=9.99096e-05, gnorm=1.973, loss_scale=4, train_wall=11, gb_free=2.8, wall=141859 2021-06-20 10:03:16 | INFO | train_inner | epoch 005: 364 / 3002 loss=2.361, ppl=5.14, wps=5802.5, ups=0.09, wpb=64785, bsz=128, num_updates=12299, lr=9.99096e-05, gnorm=1.968, loss_scale=4, train_wall=11, gb_free=2.8, wall=141870 2021-06-20 10:03:27 | INFO | train_inner | epoch 005: 365 / 3002 loss=2.455, ppl=5.48, wps=5735.3, ups=0.09, wpb=64853, bsz=128, num_updates=12300, lr=9.99096e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=141881 2021-06-20 10:03:38 | INFO | train_inner | epoch 005: 366 / 3002 loss=2.413, ppl=5.33, wps=5824.5, ups=0.09, wpb=64813, bsz=128, num_updates=12301, lr=9.99096e-05, gnorm=1.89, loss_scale=4, train_wall=11, gb_free=2.8, wall=141892 2021-06-20 10:03:49 | INFO | train_inner | epoch 005: 367 / 3002 loss=2.36, ppl=5.13, wps=5837.7, ups=0.09, wpb=64844, bsz=128, num_updates=12302, lr=9.99096e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=141903 2021-06-20 10:04:00 | INFO | train_inner | epoch 005: 368 / 3002 loss=2.45, ppl=5.46, wps=5880.3, ups=0.09, wpb=64850, bsz=128, num_updates=12303, lr=9.99096e-05, gnorm=1.913, loss_scale=4, train_wall=11, gb_free=2.8, wall=141915 2021-06-20 10:04:11 | INFO | train_inner | epoch 005: 369 / 3002 loss=2.521, ppl=5.74, wps=5796.1, ups=0.09, wpb=64790, bsz=128, num_updates=12304, lr=9.99096e-05, gnorm=1.988, loss_scale=4, train_wall=11, gb_free=2.8, wall=141926 2021-06-20 10:04:23 | INFO | train_inner | epoch 005: 370 / 3002 loss=2.462, ppl=5.51, wps=5812.4, ups=0.09, wpb=64922, bsz=128, num_updates=12305, lr=9.99096e-05, gnorm=2.064, loss_scale=4, train_wall=11, gb_free=2.8, wall=141937 2021-06-20 10:04:34 | INFO | train_inner | epoch 005: 371 / 3002 loss=2.646, ppl=6.26, wps=5756.7, ups=0.09, wpb=64773, bsz=128, num_updates=12306, lr=9.99095e-05, gnorm=5.288, loss_scale=4, train_wall=11, gb_free=2.8, wall=141948 2021-06-20 10:04:45 | INFO | train_inner | epoch 005: 372 / 3002 loss=2.532, ppl=5.78, wps=5977, ups=0.09, wpb=64857, bsz=128, num_updates=12307, lr=9.99095e-05, gnorm=2.058, loss_scale=4, train_wall=10, gb_free=2.8, wall=141959 2021-06-20 10:04:56 | INFO | train_inner | epoch 005: 373 / 3002 loss=2.461, ppl=5.51, wps=5775, ups=0.09, wpb=64833, bsz=128, num_updates=12308, lr=9.99095e-05, gnorm=2.402, loss_scale=4, train_wall=11, gb_free=2.8, wall=141970 2021-06-20 10:05:07 | INFO | train_inner | epoch 005: 374 / 3002 loss=2.396, ppl=5.26, wps=5761.1, ups=0.09, wpb=64838, bsz=128, num_updates=12309, lr=9.99095e-05, gnorm=2.06, loss_scale=4, train_wall=11, gb_free=2.8, wall=141981 2021-06-20 10:05:18 | INFO | train_inner | epoch 005: 375 / 3002 loss=2.591, ppl=6.03, wps=5800.9, ups=0.09, wpb=64845, bsz=128, num_updates=12310, lr=9.99095e-05, gnorm=1.961, loss_scale=4, train_wall=11, gb_free=2.8, wall=141993 2021-06-20 10:05:29 | INFO | train_inner | epoch 005: 376 / 3002 loss=2.619, ppl=6.14, wps=5838.7, ups=0.09, wpb=64877, bsz=128, num_updates=12311, lr=9.99095e-05, gnorm=1.956, loss_scale=4, train_wall=11, gb_free=2.8, wall=142004 2021-06-20 10:05:40 | INFO | train_inner | epoch 005: 377 / 3002 loss=2.407, ppl=5.31, wps=5913, ups=0.09, wpb=64870, bsz=128, num_updates=12312, lr=9.99095e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=142015 2021-06-20 10:05:51 | INFO | train_inner | epoch 005: 378 / 3002 loss=2.431, ppl=5.39, wps=5861.1, ups=0.09, wpb=64860, bsz=128, num_updates=12313, lr=9.99095e-05, gnorm=1.975, loss_scale=4, train_wall=11, gb_free=2.8, wall=142026 2021-06-20 10:06:02 | INFO | train_inner | epoch 005: 379 / 3002 loss=2.53, ppl=5.77, wps=5913.3, ups=0.09, wpb=64872, bsz=128, num_updates=12314, lr=9.99095e-05, gnorm=3.168, loss_scale=4, train_wall=10, gb_free=2.8, wall=142037 2021-06-20 10:06:13 | INFO | train_inner | epoch 005: 380 / 3002 loss=2.58, ppl=5.98, wps=5853.5, ups=0.09, wpb=64810, bsz=128, num_updates=12315, lr=9.99095e-05, gnorm=2.056, loss_scale=4, train_wall=11, gb_free=2.8, wall=142048 2021-06-20 10:06:25 | INFO | train_inner | epoch 005: 381 / 3002 loss=2.633, ppl=6.2, wps=5856.9, ups=0.09, wpb=64769, bsz=128, num_updates=12316, lr=9.99095e-05, gnorm=2.057, loss_scale=4, train_wall=11, gb_free=2.8, wall=142059 2021-06-20 10:06:35 | INFO | train_inner | epoch 005: 382 / 3002 loss=2.556, ppl=5.88, wps=5966.8, ups=0.09, wpb=64868, bsz=128, num_updates=12317, lr=9.99095e-05, gnorm=2.08, loss_scale=4, train_wall=10, gb_free=2.8, wall=142070 2021-06-20 10:06:47 | INFO | train_inner | epoch 005: 383 / 3002 loss=2.347, ppl=5.09, wps=5798.4, ups=0.09, wpb=64772, bsz=128, num_updates=12318, lr=9.99094e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=142081 2021-06-20 10:06:58 | INFO | train_inner | epoch 005: 384 / 3002 loss=2.486, ppl=5.6, wps=5811.2, ups=0.09, wpb=64805, bsz=128, num_updates=12319, lr=9.99094e-05, gnorm=2.011, loss_scale=4, train_wall=11, gb_free=2.8, wall=142092 2021-06-20 10:07:09 | INFO | train_inner | epoch 005: 385 / 3002 loss=2.514, ppl=5.71, wps=5804.7, ups=0.09, wpb=64787, bsz=128, num_updates=12320, lr=9.99094e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=142103 2021-06-20 10:07:20 | INFO | train_inner | epoch 005: 386 / 3002 loss=2.665, ppl=6.34, wps=5797.5, ups=0.09, wpb=64787, bsz=128, num_updates=12321, lr=9.99094e-05, gnorm=2.153, loss_scale=4, train_wall=11, gb_free=2.8, wall=142114 2021-06-20 10:07:31 | INFO | train_inner | epoch 005: 387 / 3002 loss=2.528, ppl=5.77, wps=5709.6, ups=0.09, wpb=64849, bsz=128, num_updates=12322, lr=9.99094e-05, gnorm=2.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=142126 2021-06-20 10:07:42 | INFO | train_inner | epoch 005: 388 / 3002 loss=2.54, ppl=5.82, wps=5945.7, ups=0.09, wpb=64870, bsz=128, num_updates=12323, lr=9.99094e-05, gnorm=1.963, loss_scale=4, train_wall=10, gb_free=2.8, wall=142137 2021-06-20 10:07:53 | INFO | train_inner | epoch 005: 389 / 3002 loss=2.511, ppl=5.7, wps=6036.2, ups=0.09, wpb=64892, bsz=128, num_updates=12324, lr=9.99094e-05, gnorm=1.913, loss_scale=4, train_wall=10, gb_free=2.8, wall=142147 2021-06-20 10:08:04 | INFO | train_inner | epoch 005: 390 / 3002 loss=2.648, ppl=6.27, wps=5830.8, ups=0.09, wpb=64851, bsz=128, num_updates=12325, lr=9.99094e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=142159 2021-06-20 10:08:15 | INFO | train_inner | epoch 005: 391 / 3002 loss=2.601, ppl=6.07, wps=5799.1, ups=0.09, wpb=64879, bsz=128, num_updates=12326, lr=9.99094e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=142170 2021-06-20 10:08:27 | INFO | train_inner | epoch 005: 392 / 3002 loss=2.651, ppl=6.28, wps=5827.3, ups=0.09, wpb=64789, bsz=128, num_updates=12327, lr=9.99094e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=142181 2021-06-20 10:08:38 | INFO | train_inner | epoch 005: 393 / 3002 loss=2.477, ppl=5.57, wps=5831.9, ups=0.09, wpb=64886, bsz=128, num_updates=12328, lr=9.99094e-05, gnorm=2.02, loss_scale=4, train_wall=11, gb_free=2.8, wall=142192 2021-06-20 10:08:49 | INFO | train_inner | epoch 005: 394 / 3002 loss=2.651, ppl=6.28, wps=5852.5, ups=0.09, wpb=64816, bsz=128, num_updates=12329, lr=9.99094e-05, gnorm=2.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=142203 2021-06-20 10:09:00 | INFO | train_inner | epoch 005: 395 / 3002 loss=2.432, ppl=5.4, wps=5865.5, ups=0.09, wpb=64844, bsz=128, num_updates=12330, lr=9.99094e-05, gnorm=2.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=142214 2021-06-20 10:09:11 | INFO | train_inner | epoch 005: 396 / 3002 loss=2.712, ppl=6.55, wps=5703.7, ups=0.09, wpb=64789, bsz=128, num_updates=12331, lr=9.99093e-05, gnorm=2.058, loss_scale=4, train_wall=11, gb_free=2.8, wall=142225 2021-06-20 10:09:22 | INFO | train_inner | epoch 005: 397 / 3002 loss=2.587, ppl=6.01, wps=5933.1, ups=0.09, wpb=64843, bsz=128, num_updates=12332, lr=9.99093e-05, gnorm=2.053, loss_scale=4, train_wall=10, gb_free=2.8, wall=142236 2021-06-20 10:09:33 | INFO | train_inner | epoch 005: 398 / 3002 loss=2.423, ppl=5.36, wps=5933.3, ups=0.09, wpb=64834, bsz=128, num_updates=12333, lr=9.99093e-05, gnorm=1.932, loss_scale=4, train_wall=10, gb_free=2.8, wall=142247 2021-06-20 10:09:44 | INFO | train_inner | epoch 005: 399 / 3002 loss=2.62, ppl=6.15, wps=5729.1, ups=0.09, wpb=64765, bsz=128, num_updates=12334, lr=9.99093e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=142259 2021-06-20 10:09:55 | INFO | train_inner | epoch 005: 400 / 3002 loss=2.432, ppl=5.4, wps=5904.1, ups=0.09, wpb=64812, bsz=128, num_updates=12335, lr=9.99093e-05, gnorm=2.154, loss_scale=4, train_wall=11, gb_free=2.8, wall=142270 2021-06-20 10:10:06 | INFO | train_inner | epoch 005: 401 / 3002 loss=2.364, ppl=5.15, wps=5863.9, ups=0.09, wpb=64902, bsz=128, num_updates=12336, lr=9.99093e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=142281 2021-06-20 10:10:17 | INFO | train_inner | epoch 005: 402 / 3002 loss=2.437, ppl=5.41, wps=5872.1, ups=0.09, wpb=64848, bsz=128, num_updates=12337, lr=9.99093e-05, gnorm=2.016, loss_scale=4, train_wall=11, gb_free=2.8, wall=142292 2021-06-20 10:10:28 | INFO | train_inner | epoch 005: 403 / 3002 loss=2.454, ppl=5.48, wps=5854.1, ups=0.09, wpb=64801, bsz=128, num_updates=12338, lr=9.99093e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=142303 2021-06-20 10:10:39 | INFO | train_inner | epoch 005: 404 / 3002 loss=2.52, ppl=5.74, wps=5970.6, ups=0.09, wpb=64799, bsz=128, num_updates=12339, lr=9.99093e-05, gnorm=2.159, loss_scale=4, train_wall=10, gb_free=2.8, wall=142314 2021-06-20 10:10:50 | INFO | train_inner | epoch 005: 405 / 3002 loss=2.559, ppl=5.89, wps=6016.7, ups=0.09, wpb=64788, bsz=128, num_updates=12340, lr=9.99093e-05, gnorm=2.098, loss_scale=4, train_wall=10, gb_free=2.8, wall=142324 2021-06-20 10:11:01 | INFO | train_inner | epoch 005: 406 / 3002 loss=2.539, ppl=5.81, wps=5908.5, ups=0.09, wpb=64851, bsz=128, num_updates=12341, lr=9.99093e-05, gnorm=1.988, loss_scale=4, train_wall=11, gb_free=2.8, wall=142335 2021-06-20 10:11:12 | INFO | train_inner | epoch 005: 407 / 3002 loss=2.429, ppl=5.39, wps=5729.6, ups=0.09, wpb=64751, bsz=128, num_updates=12342, lr=9.99093e-05, gnorm=2.285, loss_scale=4, train_wall=11, gb_free=2.8, wall=142347 2021-06-20 10:11:23 | INFO | train_inner | epoch 005: 408 / 3002 loss=2.397, ppl=5.27, wps=5902.1, ups=0.09, wpb=64815, bsz=128, num_updates=12343, lr=9.99092e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=142358 2021-06-20 10:11:35 | INFO | train_inner | epoch 005: 409 / 3002 loss=2.464, ppl=5.52, wps=5766.4, ups=0.09, wpb=64801, bsz=128, num_updates=12344, lr=9.99092e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=142369 2021-06-20 10:11:46 | INFO | train_inner | epoch 005: 410 / 3002 loss=2.366, ppl=5.15, wps=5839.2, ups=0.09, wpb=64812, bsz=128, num_updates=12345, lr=9.99092e-05, gnorm=1.939, loss_scale=4, train_wall=11, gb_free=2.8, wall=142380 2021-06-20 10:11:56 | INFO | train_inner | epoch 005: 411 / 3002 loss=2.526, ppl=5.76, wps=6031, ups=0.09, wpb=64876, bsz=128, num_updates=12346, lr=9.99092e-05, gnorm=2.009, loss_scale=4, train_wall=10, gb_free=2.8, wall=142391 2021-06-20 10:12:07 | INFO | train_inner | epoch 005: 412 / 3002 loss=2.378, ppl=5.2, wps=5863, ups=0.09, wpb=64843, bsz=128, num_updates=12347, lr=9.99092e-05, gnorm=2.416, loss_scale=4, train_wall=11, gb_free=2.8, wall=142402 2021-06-20 10:12:19 | INFO | train_inner | epoch 005: 413 / 3002 loss=2.582, ppl=5.99, wps=5810.8, ups=0.09, wpb=64768, bsz=128, num_updates=12348, lr=9.99092e-05, gnorm=3.877, loss_scale=4, train_wall=11, gb_free=2.8, wall=142413 2021-06-20 10:12:30 | INFO | train_inner | epoch 005: 414 / 3002 loss=2.644, ppl=6.25, wps=5817.7, ups=0.09, wpb=64786, bsz=128, num_updates=12349, lr=9.99092e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=142424 2021-06-20 10:12:41 | INFO | train_inner | epoch 005: 415 / 3002 loss=2.582, ppl=5.99, wps=5921, ups=0.09, wpb=64877, bsz=128, num_updates=12350, lr=9.99092e-05, gnorm=2.118, loss_scale=4, train_wall=11, gb_free=2.8, wall=142435 2021-06-20 10:12:52 | INFO | train_inner | epoch 005: 416 / 3002 loss=2.453, ppl=5.48, wps=5824.4, ups=0.09, wpb=64907, bsz=128, num_updates=12351, lr=9.99092e-05, gnorm=2.194, loss_scale=4, train_wall=11, gb_free=2.8, wall=142446 2021-06-20 10:13:03 | INFO | train_inner | epoch 005: 417 / 3002 loss=2.634, ppl=6.21, wps=5843.9, ups=0.09, wpb=64854, bsz=128, num_updates=12352, lr=9.99092e-05, gnorm=1.931, loss_scale=4, train_wall=11, gb_free=2.8, wall=142457 2021-06-20 10:13:14 | INFO | train_inner | epoch 005: 418 / 3002 loss=2.426, ppl=5.37, wps=5813.6, ups=0.09, wpb=64837, bsz=128, num_updates=12353, lr=9.99092e-05, gnorm=2.268, loss_scale=4, train_wall=11, gb_free=2.8, wall=142468 2021-06-20 10:13:25 | INFO | train_inner | epoch 005: 419 / 3002 loss=2.452, ppl=5.47, wps=5905.6, ups=0.09, wpb=64871, bsz=128, num_updates=12354, lr=9.99092e-05, gnorm=1.934, loss_scale=4, train_wall=11, gb_free=2.8, wall=142479 2021-06-20 10:13:36 | INFO | train_inner | epoch 005: 420 / 3002 loss=2.541, ppl=5.82, wps=5908.6, ups=0.09, wpb=64865, bsz=128, num_updates=12355, lr=9.99092e-05, gnorm=1.986, loss_scale=4, train_wall=10, gb_free=2.8, wall=142490 2021-06-20 10:13:47 | INFO | train_inner | epoch 005: 421 / 3002 loss=2.591, ppl=6.03, wps=5882.2, ups=0.09, wpb=64814, bsz=128, num_updates=12356, lr=9.99091e-05, gnorm=1.929, loss_scale=4, train_wall=11, gb_free=2.8, wall=142501 2021-06-20 10:13:58 | INFO | train_inner | epoch 005: 422 / 3002 loss=2.42, ppl=5.35, wps=5799.8, ups=0.09, wpb=64796, bsz=128, num_updates=12357, lr=9.99091e-05, gnorm=1.991, loss_scale=4, train_wall=11, gb_free=2.8, wall=142513 2021-06-20 10:14:09 | INFO | train_inner | epoch 005: 423 / 3002 loss=2.425, ppl=5.37, wps=5890.9, ups=0.09, wpb=64874, bsz=128, num_updates=12358, lr=9.99091e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=142524 2021-06-20 10:14:20 | INFO | train_inner | epoch 005: 424 / 3002 loss=2.501, ppl=5.66, wps=5866.1, ups=0.09, wpb=64799, bsz=128, num_updates=12359, lr=9.99091e-05, gnorm=2.162, loss_scale=4, train_wall=11, gb_free=2.8, wall=142535 2021-06-20 10:14:31 | INFO | train_inner | epoch 005: 425 / 3002 loss=2.53, ppl=5.77, wps=5859, ups=0.09, wpb=64768, bsz=128, num_updates=12360, lr=9.99091e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=142546 2021-06-20 10:14:43 | INFO | train_inner | epoch 005: 426 / 3002 loss=2.501, ppl=5.66, wps=5807.2, ups=0.09, wpb=64797, bsz=128, num_updates=12361, lr=9.99091e-05, gnorm=2.078, loss_scale=4, train_wall=11, gb_free=2.8, wall=142557 2021-06-20 10:14:54 | INFO | train_inner | epoch 005: 427 / 3002 loss=2.594, ppl=6.04, wps=5919.9, ups=0.09, wpb=64887, bsz=128, num_updates=12362, lr=9.99091e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=142568 2021-06-20 10:15:04 | INFO | train_inner | epoch 005: 428 / 3002 loss=2.691, ppl=6.46, wps=5971.3, ups=0.09, wpb=64855, bsz=128, num_updates=12363, lr=9.99091e-05, gnorm=2.004, loss_scale=4, train_wall=10, gb_free=2.8, wall=142579 2021-06-20 10:15:15 | INFO | train_inner | epoch 005: 429 / 3002 loss=2.42, ppl=5.35, wps=5839.2, ups=0.09, wpb=64866, bsz=128, num_updates=12364, lr=9.99091e-05, gnorm=2.125, loss_scale=4, train_wall=11, gb_free=2.8, wall=142590 2021-06-20 10:15:26 | INFO | train_inner | epoch 005: 430 / 3002 loss=2.493, ppl=5.63, wps=5891.4, ups=0.09, wpb=64863, bsz=128, num_updates=12365, lr=9.99091e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=142601 2021-06-20 10:15:37 | INFO | train_inner | epoch 005: 431 / 3002 loss=2.531, ppl=5.78, wps=5917.9, ups=0.09, wpb=64761, bsz=128, num_updates=12366, lr=9.99091e-05, gnorm=2.078, loss_scale=4, train_wall=10, gb_free=2.8, wall=142612 2021-06-20 10:15:48 | INFO | train_inner | epoch 005: 432 / 3002 loss=2.475, ppl=5.56, wps=5886.6, ups=0.09, wpb=64839, bsz=128, num_updates=12367, lr=9.99091e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=142623 2021-06-20 10:16:00 | INFO | train_inner | epoch 005: 433 / 3002 loss=2.55, ppl=5.86, wps=5804.1, ups=0.09, wpb=64798, bsz=128, num_updates=12368, lr=9.9909e-05, gnorm=2.101, loss_scale=4, train_wall=11, gb_free=2.8, wall=142634 2021-06-20 10:16:11 | INFO | train_inner | epoch 005: 434 / 3002 loss=2.609, ppl=6.1, wps=5842.5, ups=0.09, wpb=64815, bsz=128, num_updates=12369, lr=9.9909e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=142645 2021-06-20 10:16:22 | INFO | train_inner | epoch 005: 435 / 3002 loss=2.608, ppl=6.1, wps=5798.1, ups=0.09, wpb=64842, bsz=128, num_updates=12370, lr=9.9909e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=142656 2021-06-20 10:16:33 | INFO | train_inner | epoch 005: 436 / 3002 loss=2.527, ppl=5.76, wps=5764.8, ups=0.09, wpb=64820, bsz=128, num_updates=12371, lr=9.9909e-05, gnorm=2.085, loss_scale=4, train_wall=11, gb_free=2.8, wall=142667 2021-06-20 10:16:44 | INFO | train_inner | epoch 005: 437 / 3002 loss=2.668, ppl=6.35, wps=5748.2, ups=0.09, wpb=64799, bsz=128, num_updates=12372, lr=9.9909e-05, gnorm=2.068, loss_scale=4, train_wall=11, gb_free=2.8, wall=142679 2021-06-20 10:16:56 | INFO | train_inner | epoch 005: 438 / 3002 loss=2.554, ppl=5.87, wps=5836.2, ups=0.09, wpb=64880, bsz=128, num_updates=12373, lr=9.9909e-05, gnorm=1.934, loss_scale=4, train_wall=11, gb_free=2.8, wall=142690 2021-06-20 10:17:07 | INFO | train_inner | epoch 005: 439 / 3002 loss=2.516, ppl=5.72, wps=5761.4, ups=0.09, wpb=64827, bsz=128, num_updates=12374, lr=9.9909e-05, gnorm=1.991, loss_scale=4, train_wall=11, gb_free=2.8, wall=142701 2021-06-20 10:17:18 | INFO | train_inner | epoch 005: 440 / 3002 loss=2.548, ppl=5.85, wps=5792.9, ups=0.09, wpb=64865, bsz=128, num_updates=12375, lr=9.9909e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=142712 2021-06-20 10:17:29 | INFO | train_inner | epoch 005: 441 / 3002 loss=2.659, ppl=6.32, wps=5806.6, ups=0.09, wpb=64712, bsz=128, num_updates=12376, lr=9.9909e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=142723 2021-06-20 10:17:40 | INFO | train_inner | epoch 005: 442 / 3002 loss=2.632, ppl=6.2, wps=5781.4, ups=0.09, wpb=64886, bsz=128, num_updates=12377, lr=9.9909e-05, gnorm=2.099, loss_scale=4, train_wall=11, gb_free=2.8, wall=142735 2021-06-20 10:17:52 | INFO | train_inner | epoch 005: 443 / 3002 loss=2.482, ppl=5.59, wps=5811.6, ups=0.09, wpb=64837, bsz=128, num_updates=12378, lr=9.9909e-05, gnorm=2.008, loss_scale=4, train_wall=11, gb_free=2.8, wall=142746 2021-06-20 10:18:03 | INFO | train_inner | epoch 005: 444 / 3002 loss=2.593, ppl=6.03, wps=5860.8, ups=0.09, wpb=64804, bsz=128, num_updates=12379, lr=9.9909e-05, gnorm=1.919, loss_scale=4, train_wall=11, gb_free=2.8, wall=142757 2021-06-20 10:18:14 | INFO | train_inner | epoch 005: 445 / 3002 loss=2.644, ppl=6.25, wps=5777.3, ups=0.09, wpb=64746, bsz=128, num_updates=12380, lr=9.9909e-05, gnorm=2.038, loss_scale=4, train_wall=11, gb_free=2.8, wall=142768 2021-06-20 10:18:25 | INFO | train_inner | epoch 005: 446 / 3002 loss=2.615, ppl=6.13, wps=5832.5, ups=0.09, wpb=64871, bsz=128, num_updates=12381, lr=9.99089e-05, gnorm=1.935, loss_scale=4, train_wall=11, gb_free=2.8, wall=142779 2021-06-20 10:18:36 | INFO | train_inner | epoch 005: 447 / 3002 loss=2.414, ppl=5.33, wps=5882.1, ups=0.09, wpb=64814, bsz=128, num_updates=12382, lr=9.99089e-05, gnorm=1.991, loss_scale=4, train_wall=11, gb_free=2.8, wall=142790 2021-06-20 10:18:47 | INFO | train_inner | epoch 005: 448 / 3002 loss=2.556, ppl=5.88, wps=5803.6, ups=0.09, wpb=64827, bsz=128, num_updates=12383, lr=9.99089e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=142801 2021-06-20 10:18:58 | INFO | train_inner | epoch 005: 449 / 3002 loss=2.426, ppl=5.37, wps=5949.2, ups=0.09, wpb=64859, bsz=128, num_updates=12384, lr=9.99089e-05, gnorm=2.18, loss_scale=4, train_wall=10, gb_free=2.8, wall=142812 2021-06-20 10:19:09 | INFO | train_inner | epoch 005: 450 / 3002 loss=2.426, ppl=5.38, wps=5943.3, ups=0.09, wpb=64871, bsz=128, num_updates=12385, lr=9.99089e-05, gnorm=2.051, loss_scale=4, train_wall=10, gb_free=2.8, wall=142823 2021-06-20 10:19:20 | INFO | train_inner | epoch 005: 451 / 3002 loss=2.547, ppl=5.84, wps=5780.2, ups=0.09, wpb=64819, bsz=128, num_updates=12386, lr=9.99089e-05, gnorm=2.015, loss_scale=4, train_wall=11, gb_free=2.8, wall=142834 2021-06-20 10:19:31 | INFO | train_inner | epoch 005: 452 / 3002 loss=2.557, ppl=5.88, wps=5782, ups=0.09, wpb=64824, bsz=128, num_updates=12387, lr=9.99089e-05, gnorm=1.969, loss_scale=4, train_wall=11, gb_free=2.8, wall=142846 2021-06-20 10:19:43 | INFO | train_inner | epoch 005: 453 / 3002 loss=2.527, ppl=5.77, wps=5794, ups=0.09, wpb=64813, bsz=128, num_updates=12388, lr=9.99089e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=142857 2021-06-20 10:19:54 | INFO | train_inner | epoch 005: 454 / 3002 loss=2.475, ppl=5.56, wps=5894.1, ups=0.09, wpb=64838, bsz=128, num_updates=12389, lr=9.99089e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=142868 2021-06-20 10:20:05 | INFO | train_inner | epoch 005: 455 / 3002 loss=2.494, ppl=5.63, wps=5829.6, ups=0.09, wpb=64835, bsz=128, num_updates=12390, lr=9.99089e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=142879 2021-06-20 10:20:16 | INFO | train_inner | epoch 005: 456 / 3002 loss=2.455, ppl=5.48, wps=5922.5, ups=0.09, wpb=64830, bsz=128, num_updates=12391, lr=9.99089e-05, gnorm=2.041, loss_scale=4, train_wall=10, gb_free=2.8, wall=142890 2021-06-20 10:20:27 | INFO | train_inner | epoch 005: 457 / 3002 loss=2.538, ppl=5.81, wps=5830.3, ups=0.09, wpb=64852, bsz=128, num_updates=12392, lr=9.99089e-05, gnorm=2.011, loss_scale=4, train_wall=11, gb_free=2.8, wall=142901 2021-06-20 10:20:38 | INFO | train_inner | epoch 005: 458 / 3002 loss=2.425, ppl=5.37, wps=5868.9, ups=0.09, wpb=64822, bsz=128, num_updates=12393, lr=9.99088e-05, gnorm=1.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=142912 2021-06-20 10:20:49 | INFO | train_inner | epoch 005: 459 / 3002 loss=2.52, ppl=5.73, wps=5806.2, ups=0.09, wpb=64828, bsz=128, num_updates=12394, lr=9.99088e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=142923 2021-06-20 10:21:00 | INFO | train_inner | epoch 005: 460 / 3002 loss=2.707, ppl=6.53, wps=5836.3, ups=0.09, wpb=64779, bsz=128, num_updates=12395, lr=9.99088e-05, gnorm=2.069, loss_scale=4, train_wall=11, gb_free=2.8, wall=142934 2021-06-20 10:21:11 | INFO | train_inner | epoch 005: 461 / 3002 loss=2.617, ppl=6.13, wps=5769.6, ups=0.09, wpb=64799, bsz=128, num_updates=12396, lr=9.99088e-05, gnorm=2.011, loss_scale=4, train_wall=11, gb_free=2.8, wall=142946 2021-06-20 10:21:22 | INFO | train_inner | epoch 005: 462 / 3002 loss=2.652, ppl=6.28, wps=5829.3, ups=0.09, wpb=64799, bsz=128, num_updates=12397, lr=9.99088e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=142957 2021-06-20 10:21:33 | INFO | train_inner | epoch 005: 463 / 3002 loss=2.513, ppl=5.71, wps=5960.3, ups=0.09, wpb=64867, bsz=128, num_updates=12398, lr=9.99088e-05, gnorm=2.044, loss_scale=4, train_wall=10, gb_free=2.8, wall=142968 2021-06-20 10:21:44 | INFO | train_inner | epoch 005: 464 / 3002 loss=2.39, ppl=5.24, wps=5838.2, ups=0.09, wpb=64815, bsz=128, num_updates=12399, lr=9.99088e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=142979 2021-06-20 10:21:56 | INFO | train_inner | epoch 005: 465 / 3002 loss=2.4, ppl=5.28, wps=5758.3, ups=0.09, wpb=64844, bsz=128, num_updates=12400, lr=9.99088e-05, gnorm=2.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=142990 2021-06-20 10:22:07 | INFO | train_inner | epoch 005: 466 / 3002 loss=2.479, ppl=5.58, wps=5848.8, ups=0.09, wpb=64952, bsz=128, num_updates=12401, lr=9.99088e-05, gnorm=1.974, loss_scale=4, train_wall=11, gb_free=2.8, wall=143001 2021-06-20 10:22:18 | INFO | train_inner | epoch 005: 467 / 3002 loss=2.633, ppl=6.2, wps=5815.1, ups=0.09, wpb=64802, bsz=128, num_updates=12402, lr=9.99088e-05, gnorm=2.101, loss_scale=4, train_wall=11, gb_free=2.8, wall=143012 2021-06-20 10:22:29 | INFO | train_inner | epoch 005: 468 / 3002 loss=2.572, ppl=5.95, wps=5954.1, ups=0.09, wpb=64835, bsz=128, num_updates=12403, lr=9.99088e-05, gnorm=2.045, loss_scale=4, train_wall=10, gb_free=2.8, wall=143023 2021-06-20 10:22:40 | INFO | train_inner | epoch 005: 469 / 3002 loss=2.57, ppl=5.94, wps=5767.9, ups=0.09, wpb=64802, bsz=128, num_updates=12404, lr=9.99088e-05, gnorm=2.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=143034 2021-06-20 10:22:51 | INFO | train_inner | epoch 005: 470 / 3002 loss=2.596, ppl=6.04, wps=5857.8, ups=0.09, wpb=64920, bsz=128, num_updates=12405, lr=9.99088e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=143045 2021-06-20 10:23:02 | INFO | train_inner | epoch 005: 471 / 3002 loss=2.543, ppl=5.83, wps=5935.4, ups=0.09, wpb=64759, bsz=128, num_updates=12406, lr=9.99087e-05, gnorm=2.095, loss_scale=4, train_wall=10, gb_free=2.8, wall=143056 2021-06-20 10:23:13 | INFO | train_inner | epoch 005: 472 / 3002 loss=2.633, ppl=6.2, wps=5774.8, ups=0.09, wpb=64755, bsz=128, num_updates=12407, lr=9.99087e-05, gnorm=2.032, loss_scale=4, train_wall=11, gb_free=2.8, wall=143068 2021-06-20 10:23:24 | INFO | train_inner | epoch 005: 473 / 3002 loss=2.483, ppl=5.59, wps=5886.8, ups=0.09, wpb=64824, bsz=128, num_updates=12408, lr=9.99087e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=143079 2021-06-20 10:23:35 | INFO | train_inner | epoch 005: 474 / 3002 loss=2.559, ppl=5.89, wps=5980.9, ups=0.09, wpb=64861, bsz=128, num_updates=12409, lr=9.99087e-05, gnorm=1.961, loss_scale=4, train_wall=10, gb_free=2.8, wall=143089 2021-06-20 10:23:46 | INFO | train_inner | epoch 005: 475 / 3002 loss=2.455, ppl=5.48, wps=5874.4, ups=0.09, wpb=64691, bsz=128, num_updates=12410, lr=9.99087e-05, gnorm=2.073, loss_scale=4, train_wall=11, gb_free=2.8, wall=143100 2021-06-20 10:23:57 | INFO | train_inner | epoch 005: 476 / 3002 loss=2.394, ppl=5.26, wps=5700.7, ups=0.09, wpb=64846, bsz=128, num_updates=12411, lr=9.99087e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=143112 2021-06-20 10:24:09 | INFO | train_inner | epoch 005: 477 / 3002 loss=2.532, ppl=5.78, wps=5809.9, ups=0.09, wpb=64859, bsz=128, num_updates=12412, lr=9.99087e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=143123 2021-06-20 10:24:20 | INFO | train_inner | epoch 005: 478 / 3002 loss=2.554, ppl=5.87, wps=5746.3, ups=0.09, wpb=64770, bsz=128, num_updates=12413, lr=9.99087e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=143134 2021-06-20 10:24:31 | INFO | train_inner | epoch 005: 479 / 3002 loss=2.42, ppl=5.35, wps=5802.8, ups=0.09, wpb=64794, bsz=128, num_updates=12414, lr=9.99087e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=143145 2021-06-20 10:24:42 | INFO | train_inner | epoch 005: 480 / 3002 loss=2.632, ppl=6.2, wps=5855.3, ups=0.09, wpb=64802, bsz=128, num_updates=12415, lr=9.99087e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=143156 2021-06-20 10:24:53 | INFO | train_inner | epoch 005: 481 / 3002 loss=2.683, ppl=6.42, wps=5880.2, ups=0.09, wpb=64829, bsz=128, num_updates=12416, lr=9.99087e-05, gnorm=2.065, loss_scale=4, train_wall=11, gb_free=2.8, wall=143167 2021-06-20 10:25:04 | INFO | train_inner | epoch 005: 482 / 3002 loss=2.61, ppl=6.1, wps=5924.1, ups=0.09, wpb=64874, bsz=128, num_updates=12417, lr=9.99087e-05, gnorm=1.992, loss_scale=8, train_wall=10, gb_free=2.8, wall=143178 2021-06-20 10:25:15 | INFO | train_inner | epoch 005: 483 / 3002 loss=2.582, ppl=5.99, wps=5790.9, ups=0.09, wpb=64816, bsz=128, num_updates=12418, lr=9.99086e-05, gnorm=2.046, loss_scale=8, train_wall=11, gb_free=2.8, wall=143190 2021-06-20 10:25:27 | INFO | train_inner | epoch 005: 484 / 3002 loss=2.587, ppl=6.01, wps=5749.6, ups=0.09, wpb=64818, bsz=128, num_updates=12419, lr=9.99086e-05, gnorm=1.907, loss_scale=8, train_wall=11, gb_free=2.8, wall=143201 2021-06-20 10:25:37 | INFO | train_inner | epoch 005: 485 / 3002 loss=2.647, ppl=6.26, wps=5950.7, ups=0.09, wpb=64867, bsz=128, num_updates=12420, lr=9.99086e-05, gnorm=2.014, loss_scale=8, train_wall=10, gb_free=2.8, wall=143212 2021-06-20 10:25:49 | INFO | train_inner | epoch 005: 486 / 3002 loss=2.397, ppl=5.27, wps=5810.7, ups=0.09, wpb=64772, bsz=128, num_updates=12421, lr=9.99086e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=143223 2021-06-20 10:26:00 | INFO | train_inner | epoch 005: 487 / 3002 loss=2.567, ppl=5.93, wps=5806.9, ups=0.09, wpb=64801, bsz=128, num_updates=12422, lr=9.99086e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=143234 2021-06-20 10:26:11 | INFO | train_inner | epoch 005: 488 / 3002 loss=2.316, ppl=4.98, wps=5718.2, ups=0.09, wpb=64797, bsz=128, num_updates=12423, lr=9.99086e-05, gnorm=1.895, loss_scale=8, train_wall=11, gb_free=2.8, wall=143245 2021-06-20 10:26:22 | INFO | train_inner | epoch 005: 489 / 3002 loss=2.625, ppl=6.17, wps=5791.2, ups=0.09, wpb=64874, bsz=128, num_updates=12424, lr=9.99086e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=143257 2021-06-20 10:26:33 | INFO | train_inner | epoch 005: 490 / 3002 loss=2.624, ppl=6.17, wps=5902.6, ups=0.09, wpb=64884, bsz=128, num_updates=12425, lr=9.99086e-05, gnorm=2.056, loss_scale=8, train_wall=11, gb_free=2.8, wall=143268 2021-06-20 10:26:44 | INFO | train_inner | epoch 005: 491 / 3002 loss=2.528, ppl=5.77, wps=5869.8, ups=0.09, wpb=64840, bsz=128, num_updates=12426, lr=9.99086e-05, gnorm=1.985, loss_scale=8, train_wall=11, gb_free=2.8, wall=143279 2021-06-20 10:26:55 | INFO | train_inner | epoch 005: 492 / 3002 loss=2.365, ppl=5.15, wps=5864.1, ups=0.09, wpb=64860, bsz=128, num_updates=12427, lr=9.99086e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=143290 2021-06-20 10:27:07 | INFO | train_inner | epoch 005: 493 / 3002 loss=2.533, ppl=5.79, wps=5764.9, ups=0.09, wpb=64875, bsz=128, num_updates=12428, lr=9.99086e-05, gnorm=2.045, loss_scale=8, train_wall=11, gb_free=2.8, wall=143301 2021-06-20 10:27:18 | INFO | train_inner | epoch 005: 494 / 3002 loss=2.474, ppl=5.56, wps=5839.4, ups=0.09, wpb=64871, bsz=128, num_updates=12429, lr=9.99086e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=143312 2021-06-20 10:27:29 | INFO | train_inner | epoch 005: 495 / 3002 loss=2.551, ppl=5.86, wps=5845.7, ups=0.09, wpb=64845, bsz=128, num_updates=12430, lr=9.99086e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=143323 2021-06-20 10:27:40 | INFO | train_inner | epoch 005: 496 / 3002 loss=2.619, ppl=6.14, wps=5899.6, ups=0.09, wpb=64834, bsz=128, num_updates=12431, lr=9.99085e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=143334 2021-06-20 10:27:51 | INFO | train_inner | epoch 005: 497 / 3002 loss=2.704, ppl=6.52, wps=5807.2, ups=0.09, wpb=64738, bsz=128, num_updates=12432, lr=9.99085e-05, gnorm=2.032, loss_scale=8, train_wall=11, gb_free=2.8, wall=143345 2021-06-20 10:28:02 | INFO | train_inner | epoch 005: 498 / 3002 loss=2.549, ppl=5.85, wps=5786.3, ups=0.09, wpb=64790, bsz=128, num_updates=12433, lr=9.99085e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=143357 2021-06-20 10:28:13 | INFO | train_inner | epoch 005: 499 / 3002 loss=2.512, ppl=5.7, wps=5865.8, ups=0.09, wpb=64838, bsz=128, num_updates=12434, lr=9.99085e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=143368 2021-06-20 10:28:24 | INFO | train_inner | epoch 005: 500 / 3002 loss=2.565, ppl=5.92, wps=5818.7, ups=0.09, wpb=64821, bsz=128, num_updates=12435, lr=9.99085e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=143379 2021-06-20 10:28:36 | INFO | train_inner | epoch 005: 501 / 3002 loss=2.586, ppl=6.01, wps=5815, ups=0.09, wpb=64783, bsz=128, num_updates=12436, lr=9.99085e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=143390 2021-06-20 10:28:47 | INFO | train_inner | epoch 005: 502 / 3002 loss=2.476, ppl=5.56, wps=5902.4, ups=0.09, wpb=64795, bsz=128, num_updates=12437, lr=9.99085e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=143401 2021-06-20 10:28:58 | INFO | train_inner | epoch 005: 503 / 3002 loss=2.49, ppl=5.62, wps=5852.9, ups=0.09, wpb=64874, bsz=128, num_updates=12438, lr=9.99085e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=143412 2021-06-20 10:29:09 | INFO | train_inner | epoch 005: 504 / 3002 loss=2.434, ppl=5.4, wps=5874.3, ups=0.09, wpb=64785, bsz=128, num_updates=12439, lr=9.99085e-05, gnorm=2.004, loss_scale=8, train_wall=11, gb_free=2.8, wall=143423 2021-06-20 10:29:20 | INFO | train_inner | epoch 005: 505 / 3002 loss=2.403, ppl=5.29, wps=5903.6, ups=0.09, wpb=64907, bsz=128, num_updates=12440, lr=9.99085e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=143434 2021-06-20 10:29:31 | INFO | train_inner | epoch 005: 506 / 3002 loss=2.478, ppl=5.57, wps=5738.2, ups=0.09, wpb=64816, bsz=128, num_updates=12441, lr=9.99085e-05, gnorm=2.011, loss_scale=8, train_wall=11, gb_free=2.8, wall=143445 2021-06-20 10:29:42 | INFO | train_inner | epoch 005: 507 / 3002 loss=2.518, ppl=5.73, wps=5779.9, ups=0.09, wpb=64791, bsz=128, num_updates=12442, lr=9.99085e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=143456 2021-06-20 10:29:53 | INFO | train_inner | epoch 005: 508 / 3002 loss=2.5, ppl=5.66, wps=5864.1, ups=0.09, wpb=64801, bsz=128, num_updates=12443, lr=9.99084e-05, gnorm=2.043, loss_scale=8, train_wall=11, gb_free=2.8, wall=143468 2021-06-20 10:30:04 | INFO | train_inner | epoch 005: 509 / 3002 loss=2.466, ppl=5.52, wps=5856.4, ups=0.09, wpb=64854, bsz=128, num_updates=12444, lr=9.99084e-05, gnorm=2.411, loss_scale=8, train_wall=11, gb_free=2.8, wall=143479 2021-06-20 10:30:15 | INFO | train_inner | epoch 005: 510 / 3002 loss=2.479, ppl=5.57, wps=5815.9, ups=0.09, wpb=64864, bsz=128, num_updates=12445, lr=9.99084e-05, gnorm=5.315, loss_scale=8, train_wall=11, gb_free=2.8, wall=143490 2021-06-20 10:30:27 | INFO | train_inner | epoch 005: 511 / 3002 loss=2.716, ppl=6.57, wps=5803.1, ups=0.09, wpb=64847, bsz=128, num_updates=12446, lr=9.99084e-05, gnorm=1.864, loss_scale=8, train_wall=11, gb_free=2.8, wall=143501 2021-06-20 10:30:38 | INFO | train_inner | epoch 005: 512 / 3002 loss=2.401, ppl=5.28, wps=5858.9, ups=0.09, wpb=64896, bsz=128, num_updates=12447, lr=9.99084e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=143512 2021-06-20 10:30:49 | INFO | train_inner | epoch 005: 513 / 3002 loss=2.594, ppl=6.04, wps=5800, ups=0.09, wpb=64800, bsz=128, num_updates=12448, lr=9.99084e-05, gnorm=2.051, loss_scale=8, train_wall=11, gb_free=2.8, wall=143523 2021-06-20 10:31:00 | INFO | train_inner | epoch 005: 514 / 3002 loss=2.45, ppl=5.46, wps=5925.3, ups=0.09, wpb=64866, bsz=128, num_updates=12449, lr=9.99084e-05, gnorm=1.968, loss_scale=8, train_wall=10, gb_free=2.8, wall=143534 2021-06-20 10:31:11 | INFO | train_inner | epoch 005: 515 / 3002 loss=2.56, ppl=5.9, wps=5813.5, ups=0.09, wpb=64749, bsz=128, num_updates=12450, lr=9.99084e-05, gnorm=2.754, loss_scale=8, train_wall=11, gb_free=2.8, wall=143545 2021-06-20 10:31:22 | INFO | train_inner | epoch 005: 516 / 3002 loss=2.503, ppl=5.67, wps=5831.8, ups=0.09, wpb=64893, bsz=128, num_updates=12451, lr=9.99084e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=143556 2021-06-20 10:31:33 | INFO | train_inner | epoch 005: 517 / 3002 loss=2.555, ppl=5.88, wps=5784.4, ups=0.09, wpb=64849, bsz=128, num_updates=12452, lr=9.99084e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=143568 2021-06-20 10:31:44 | INFO | train_inner | epoch 005: 518 / 3002 loss=2.514, ppl=5.71, wps=5766.6, ups=0.09, wpb=64735, bsz=128, num_updates=12453, lr=9.99084e-05, gnorm=2.018, loss_scale=8, train_wall=11, gb_free=2.8, wall=143579 2021-06-20 10:31:56 | INFO | train_inner | epoch 005: 519 / 3002 loss=2.499, ppl=5.65, wps=5787.9, ups=0.09, wpb=64836, bsz=128, num_updates=12454, lr=9.99084e-05, gnorm=1.89, loss_scale=8, train_wall=11, gb_free=2.8, wall=143590 2021-06-20 10:32:07 | INFO | train_inner | epoch 005: 520 / 3002 loss=2.46, ppl=5.5, wps=5792.3, ups=0.09, wpb=64803, bsz=128, num_updates=12455, lr=9.99084e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=143601 2021-06-20 10:32:18 | INFO | train_inner | epoch 005: 521 / 3002 loss=2.633, ppl=6.2, wps=5832.3, ups=0.09, wpb=64876, bsz=128, num_updates=12456, lr=9.99083e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=143612 2021-06-20 10:32:29 | INFO | train_inner | epoch 005: 522 / 3002 loss=2.559, ppl=5.89, wps=5794.8, ups=0.09, wpb=64812, bsz=128, num_updates=12457, lr=9.99083e-05, gnorm=2.041, loss_scale=8, train_wall=11, gb_free=2.8, wall=143624 2021-06-20 10:32:40 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-20 10:32:51 | INFO | train_inner | epoch 005: 524 / 3002 loss=2.651, ppl=6.28, wps=2951.9, ups=0.05, wpb=64807, bsz=128, num_updates=12458, lr=9.99083e-05, gnorm=2.33, loss_scale=4, train_wall=21, gb_free=2.8, wall=143645 2021-06-20 10:33:02 | INFO | train_inner | epoch 005: 525 / 3002 loss=2.552, ppl=5.87, wps=5799.3, ups=0.09, wpb=64784, bsz=128, num_updates=12459, lr=9.99083e-05, gnorm=2.072, loss_scale=4, train_wall=11, gb_free=2.8, wall=143657 2021-06-20 10:33:14 | INFO | train_inner | epoch 005: 526 / 3002 loss=2.471, ppl=5.55, wps=5713.2, ups=0.09, wpb=64847, bsz=128, num_updates=12460, lr=9.99083e-05, gnorm=11.592, loss_scale=4, train_wall=11, gb_free=2.8, wall=143668 2021-06-20 10:33:25 | INFO | train_inner | epoch 005: 527 / 3002 loss=2.624, ppl=6.16, wps=5841.3, ups=0.09, wpb=64751, bsz=128, num_updates=12461, lr=9.99083e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=143679 2021-06-20 10:33:36 | INFO | train_inner | epoch 005: 528 / 3002 loss=2.623, ppl=6.16, wps=5909.4, ups=0.09, wpb=64842, bsz=128, num_updates=12462, lr=9.99083e-05, gnorm=1.948, loss_scale=4, train_wall=10, gb_free=2.8, wall=143690 2021-06-20 10:33:47 | INFO | train_inner | epoch 005: 529 / 3002 loss=2.561, ppl=5.9, wps=5701.7, ups=0.09, wpb=64814, bsz=128, num_updates=12463, lr=9.99083e-05, gnorm=2.048, loss_scale=4, train_wall=11, gb_free=2.8, wall=143701 2021-06-20 10:33:58 | INFO | train_inner | epoch 005: 530 / 3002 loss=2.548, ppl=5.85, wps=5973.1, ups=0.09, wpb=64872, bsz=128, num_updates=12464, lr=9.99083e-05, gnorm=2.122, loss_scale=4, train_wall=10, gb_free=2.8, wall=143712 2021-06-20 10:34:09 | INFO | train_inner | epoch 005: 531 / 3002 loss=2.643, ppl=6.24, wps=5861.9, ups=0.09, wpb=64797, bsz=128, num_updates=12465, lr=9.99083e-05, gnorm=2.06, loss_scale=4, train_wall=11, gb_free=2.8, wall=143723 2021-06-20 10:34:20 | INFO | train_inner | epoch 005: 532 / 3002 loss=2.571, ppl=5.94, wps=5725.2, ups=0.09, wpb=64878, bsz=128, num_updates=12466, lr=9.99083e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=143735 2021-06-20 10:34:32 | INFO | train_inner | epoch 005: 533 / 3002 loss=2.408, ppl=5.31, wps=5786.8, ups=0.09, wpb=64802, bsz=128, num_updates=12467, lr=9.99083e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=143746 2021-06-20 10:34:43 | INFO | train_inner | epoch 005: 534 / 3002 loss=2.465, ppl=5.52, wps=5866.8, ups=0.09, wpb=64807, bsz=128, num_updates=12468, lr=9.99082e-05, gnorm=2.105, loss_scale=4, train_wall=11, gb_free=2.8, wall=143757 2021-06-20 10:34:54 | INFO | train_inner | epoch 005: 535 / 3002 loss=2.579, ppl=5.98, wps=5767.5, ups=0.09, wpb=64788, bsz=128, num_updates=12469, lr=9.99082e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=143768 2021-06-20 10:35:05 | INFO | train_inner | epoch 005: 536 / 3002 loss=2.562, ppl=5.9, wps=5783.9, ups=0.09, wpb=64820, bsz=128, num_updates=12470, lr=9.99082e-05, gnorm=1.98, loss_scale=4, train_wall=11, gb_free=2.8, wall=143779 2021-06-20 10:35:16 | INFO | train_inner | epoch 005: 537 / 3002 loss=2.464, ppl=5.52, wps=5780.1, ups=0.09, wpb=64884, bsz=128, num_updates=12471, lr=9.99082e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=143791 2021-06-20 10:35:27 | INFO | train_inner | epoch 005: 538 / 3002 loss=2.469, ppl=5.54, wps=5851, ups=0.09, wpb=64843, bsz=128, num_updates=12472, lr=9.99082e-05, gnorm=1.87, loss_scale=4, train_wall=11, gb_free=2.8, wall=143802 2021-06-20 10:35:39 | INFO | train_inner | epoch 005: 539 / 3002 loss=2.661, ppl=6.32, wps=5774.7, ups=0.09, wpb=64750, bsz=128, num_updates=12473, lr=9.99082e-05, gnorm=2.062, loss_scale=4, train_wall=11, gb_free=2.8, wall=143813 2021-06-20 10:35:50 | INFO | train_inner | epoch 005: 540 / 3002 loss=2.544, ppl=5.83, wps=5861.2, ups=0.09, wpb=64826, bsz=128, num_updates=12474, lr=9.99082e-05, gnorm=2.039, loss_scale=4, train_wall=11, gb_free=2.8, wall=143824 2021-06-20 10:36:01 | INFO | train_inner | epoch 005: 541 / 3002 loss=2.608, ppl=6.1, wps=5793, ups=0.09, wpb=64828, bsz=128, num_updates=12475, lr=9.99082e-05, gnorm=1.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=143835 2021-06-20 10:36:12 | INFO | train_inner | epoch 005: 542 / 3002 loss=2.59, ppl=6.02, wps=5886.7, ups=0.09, wpb=64822, bsz=128, num_updates=12476, lr=9.99082e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=143846 2021-06-20 10:36:23 | INFO | train_inner | epoch 005: 543 / 3002 loss=2.552, ppl=5.86, wps=5728.7, ups=0.09, wpb=64806, bsz=128, num_updates=12477, lr=9.99082e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=143857 2021-06-20 10:36:34 | INFO | train_inner | epoch 005: 544 / 3002 loss=2.595, ppl=6.04, wps=5874.7, ups=0.09, wpb=64809, bsz=128, num_updates=12478, lr=9.99082e-05, gnorm=2.105, loss_scale=4, train_wall=11, gb_free=2.8, wall=143868 2021-06-20 10:36:45 | INFO | train_inner | epoch 005: 545 / 3002 loss=2.629, ppl=6.19, wps=5967.1, ups=0.09, wpb=64861, bsz=128, num_updates=12479, lr=9.99082e-05, gnorm=2.031, loss_scale=4, train_wall=10, gb_free=2.8, wall=143879 2021-06-20 10:36:56 | INFO | train_inner | epoch 005: 546 / 3002 loss=2.531, ppl=5.78, wps=5796, ups=0.09, wpb=64784, bsz=128, num_updates=12480, lr=9.99082e-05, gnorm=1.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=143891 2021-06-20 10:37:07 | INFO | train_inner | epoch 005: 547 / 3002 loss=2.434, ppl=5.4, wps=5863.2, ups=0.09, wpb=64849, bsz=128, num_updates=12481, lr=9.99081e-05, gnorm=1.909, loss_scale=4, train_wall=11, gb_free=2.8, wall=143902 2021-06-20 10:37:18 | INFO | train_inner | epoch 005: 548 / 3002 loss=2.424, ppl=5.37, wps=5776.8, ups=0.09, wpb=64924, bsz=128, num_updates=12482, lr=9.99081e-05, gnorm=3.802, loss_scale=4, train_wall=11, gb_free=2.8, wall=143913 2021-06-20 10:37:30 | INFO | train_inner | epoch 005: 549 / 3002 loss=2.667, ppl=6.35, wps=5734.4, ups=0.09, wpb=64806, bsz=128, num_updates=12483, lr=9.99081e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=143924 2021-06-20 10:37:41 | INFO | train_inner | epoch 005: 550 / 3002 loss=2.545, ppl=5.84, wps=5756.7, ups=0.09, wpb=64838, bsz=128, num_updates=12484, lr=9.99081e-05, gnorm=1.984, loss_scale=4, train_wall=11, gb_free=2.8, wall=143935 2021-06-20 10:37:52 | INFO | train_inner | epoch 005: 551 / 3002 loss=2.509, ppl=5.69, wps=5809, ups=0.09, wpb=64877, bsz=128, num_updates=12485, lr=9.99081e-05, gnorm=1.932, loss_scale=4, train_wall=11, gb_free=2.8, wall=143947 2021-06-20 10:38:03 | INFO | train_inner | epoch 005: 552 / 3002 loss=2.583, ppl=5.99, wps=5797.8, ups=0.09, wpb=64921, bsz=128, num_updates=12486, lr=9.99081e-05, gnorm=1.969, loss_scale=4, train_wall=11, gb_free=2.8, wall=143958 2021-06-20 10:38:15 | INFO | train_inner | epoch 005: 553 / 3002 loss=2.767, ppl=6.81, wps=5838.1, ups=0.09, wpb=64875, bsz=128, num_updates=12487, lr=9.99081e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=143969 2021-06-20 10:38:26 | INFO | train_inner | epoch 005: 554 / 3002 loss=2.491, ppl=5.62, wps=5811, ups=0.09, wpb=64823, bsz=128, num_updates=12488, lr=9.99081e-05, gnorm=2.035, loss_scale=4, train_wall=11, gb_free=2.8, wall=143980 2021-06-20 10:38:37 | INFO | train_inner | epoch 005: 555 / 3002 loss=2.564, ppl=5.91, wps=5936.4, ups=0.09, wpb=64851, bsz=128, num_updates=12489, lr=9.99081e-05, gnorm=2.106, loss_scale=4, train_wall=10, gb_free=2.8, wall=143991 2021-06-20 10:38:48 | INFO | train_inner | epoch 005: 556 / 3002 loss=2.565, ppl=5.92, wps=5862.3, ups=0.09, wpb=64839, bsz=128, num_updates=12490, lr=9.99081e-05, gnorm=2.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=144002 2021-06-20 10:38:59 | INFO | train_inner | epoch 005: 557 / 3002 loss=2.437, ppl=5.41, wps=5892.7, ups=0.09, wpb=64802, bsz=128, num_updates=12491, lr=9.99081e-05, gnorm=1.919, loss_scale=4, train_wall=11, gb_free=2.8, wall=144013 2021-06-20 10:39:10 | INFO | train_inner | epoch 005: 558 / 3002 loss=2.501, ppl=5.66, wps=5752.5, ups=0.09, wpb=64785, bsz=128, num_updates=12492, lr=9.99081e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=144024 2021-06-20 10:39:21 | INFO | train_inner | epoch 005: 559 / 3002 loss=2.584, ppl=6, wps=5877, ups=0.09, wpb=64788, bsz=128, num_updates=12493, lr=9.9908e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=144035 2021-06-20 10:39:32 | INFO | train_inner | epoch 005: 560 / 3002 loss=2.655, ppl=6.3, wps=5782.1, ups=0.09, wpb=64789, bsz=128, num_updates=12494, lr=9.9908e-05, gnorm=2.046, loss_scale=4, train_wall=11, gb_free=2.8, wall=144046 2021-06-20 10:39:43 | INFO | train_inner | epoch 005: 561 / 3002 loss=2.641, ppl=6.24, wps=5786.9, ups=0.09, wpb=64798, bsz=128, num_updates=12495, lr=9.9908e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=144058 2021-06-20 10:39:55 | INFO | train_inner | epoch 005: 562 / 3002 loss=2.601, ppl=6.07, wps=5785.4, ups=0.09, wpb=64819, bsz=128, num_updates=12496, lr=9.9908e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=144069 2021-06-20 10:40:06 | INFO | train_inner | epoch 005: 563 / 3002 loss=2.458, ppl=5.49, wps=5782.5, ups=0.09, wpb=64861, bsz=128, num_updates=12497, lr=9.9908e-05, gnorm=2.062, loss_scale=4, train_wall=11, gb_free=2.8, wall=144080 2021-06-20 10:40:17 | INFO | train_inner | epoch 005: 564 / 3002 loss=2.62, ppl=6.15, wps=5891.7, ups=0.09, wpb=64845, bsz=128, num_updates=12498, lr=9.9908e-05, gnorm=1.949, loss_scale=4, train_wall=11, gb_free=2.8, wall=144091 2021-06-20 10:40:28 | INFO | train_inner | epoch 005: 565 / 3002 loss=2.595, ppl=6.04, wps=5874.2, ups=0.09, wpb=64877, bsz=128, num_updates=12499, lr=9.9908e-05, gnorm=2.209, loss_scale=4, train_wall=11, gb_free=2.8, wall=144102 2021-06-20 10:40:39 | INFO | train_inner | epoch 005: 566 / 3002 loss=2.491, ppl=5.62, wps=5871.6, ups=0.09, wpb=64862, bsz=128, num_updates=12500, lr=9.9908e-05, gnorm=2.099, loss_scale=4, train_wall=11, gb_free=2.8, wall=144113 2021-06-20 10:40:50 | INFO | train_inner | epoch 005: 567 / 3002 loss=2.476, ppl=5.56, wps=5886.7, ups=0.09, wpb=64849, bsz=128, num_updates=12501, lr=9.9908e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=144124 2021-06-20 10:41:01 | INFO | train_inner | epoch 005: 568 / 3002 loss=2.557, ppl=5.88, wps=5811.3, ups=0.09, wpb=64827, bsz=128, num_updates=12502, lr=9.9908e-05, gnorm=2.116, loss_scale=4, train_wall=11, gb_free=2.8, wall=144135 2021-06-20 10:41:12 | INFO | train_inner | epoch 005: 569 / 3002 loss=2.429, ppl=5.38, wps=5862.5, ups=0.09, wpb=64815, bsz=128, num_updates=12503, lr=9.9908e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=144146 2021-06-20 10:41:23 | INFO | train_inner | epoch 005: 570 / 3002 loss=2.469, ppl=5.54, wps=5913.7, ups=0.09, wpb=64828, bsz=128, num_updates=12504, lr=9.9908e-05, gnorm=2.036, loss_scale=4, train_wall=10, gb_free=2.8, wall=144157 2021-06-20 10:41:34 | INFO | train_inner | epoch 005: 571 / 3002 loss=2.477, ppl=5.57, wps=5790.7, ups=0.09, wpb=64829, bsz=128, num_updates=12505, lr=9.9908e-05, gnorm=2.159, loss_scale=4, train_wall=11, gb_free=2.8, wall=144169 2021-06-20 10:41:45 | INFO | train_inner | epoch 005: 572 / 3002 loss=2.476, ppl=5.56, wps=5811.5, ups=0.09, wpb=64873, bsz=128, num_updates=12506, lr=9.99079e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=144180 2021-06-20 10:41:56 | INFO | train_inner | epoch 005: 573 / 3002 loss=2.707, ppl=6.53, wps=5956.9, ups=0.09, wpb=64825, bsz=128, num_updates=12507, lr=9.99079e-05, gnorm=2.007, loss_scale=4, train_wall=10, gb_free=2.8, wall=144191 2021-06-20 10:42:07 | INFO | train_inner | epoch 005: 574 / 3002 loss=2.543, ppl=5.83, wps=5948.8, ups=0.09, wpb=64831, bsz=128, num_updates=12508, lr=9.99079e-05, gnorm=1.979, loss_scale=4, train_wall=10, gb_free=2.8, wall=144202 2021-06-20 10:42:18 | INFO | train_inner | epoch 005: 575 / 3002 loss=2.613, ppl=6.12, wps=5879.1, ups=0.09, wpb=64802, bsz=128, num_updates=12509, lr=9.99079e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=144213 2021-06-20 10:42:29 | INFO | train_inner | epoch 005: 576 / 3002 loss=2.416, ppl=5.34, wps=5802.8, ups=0.09, wpb=64948, bsz=128, num_updates=12510, lr=9.99079e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=144224 2021-06-20 10:42:41 | INFO | train_inner | epoch 005: 577 / 3002 loss=2.505, ppl=5.68, wps=5759.5, ups=0.09, wpb=64833, bsz=128, num_updates=12511, lr=9.99079e-05, gnorm=1.988, loss_scale=4, train_wall=11, gb_free=2.8, wall=144235 2021-06-20 10:42:52 | INFO | train_inner | epoch 005: 578 / 3002 loss=2.778, ppl=6.86, wps=5804.5, ups=0.09, wpb=64749, bsz=128, num_updates=12512, lr=9.99079e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=144246 2021-06-20 10:43:03 | INFO | train_inner | epoch 005: 579 / 3002 loss=2.531, ppl=5.78, wps=5790.9, ups=0.09, wpb=64811, bsz=128, num_updates=12513, lr=9.99079e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=144257 2021-06-20 10:43:14 | INFO | train_inner | epoch 005: 580 / 3002 loss=2.574, ppl=5.95, wps=5792.1, ups=0.09, wpb=64834, bsz=128, num_updates=12514, lr=9.99079e-05, gnorm=1.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=144269 2021-06-20 10:43:25 | INFO | train_inner | epoch 005: 581 / 3002 loss=2.542, ppl=5.82, wps=5822.2, ups=0.09, wpb=64719, bsz=128, num_updates=12515, lr=9.99079e-05, gnorm=2.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=144280 2021-06-20 10:43:36 | INFO | train_inner | epoch 005: 582 / 3002 loss=2.573, ppl=5.95, wps=5914.8, ups=0.09, wpb=64956, bsz=128, num_updates=12516, lr=9.99079e-05, gnorm=2.046, loss_scale=4, train_wall=11, gb_free=2.8, wall=144291 2021-06-20 10:43:47 | INFO | train_inner | epoch 005: 583 / 3002 loss=2.537, ppl=5.8, wps=5922, ups=0.09, wpb=64824, bsz=128, num_updates=12517, lr=9.99079e-05, gnorm=1.964, loss_scale=4, train_wall=11, gb_free=2.8, wall=144302 2021-06-20 10:43:58 | INFO | train_inner | epoch 005: 584 / 3002 loss=2.509, ppl=5.69, wps=5880.7, ups=0.09, wpb=64873, bsz=128, num_updates=12518, lr=9.99078e-05, gnorm=1.927, loss_scale=4, train_wall=11, gb_free=2.8, wall=144313 2021-06-20 10:44:09 | INFO | train_inner | epoch 005: 585 / 3002 loss=2.333, ppl=5.04, wps=5947, ups=0.09, wpb=64880, bsz=128, num_updates=12519, lr=9.99078e-05, gnorm=2.006, loss_scale=4, train_wall=10, gb_free=2.8, wall=144324 2021-06-20 10:44:20 | INFO | train_inner | epoch 005: 586 / 3002 loss=2.576, ppl=5.96, wps=5857.6, ups=0.09, wpb=64830, bsz=128, num_updates=12520, lr=9.99078e-05, gnorm=2.031, loss_scale=4, train_wall=11, gb_free=2.8, wall=144335 2021-06-20 10:44:31 | INFO | train_inner | epoch 005: 587 / 3002 loss=2.586, ppl=6, wps=5891.5, ups=0.09, wpb=64814, bsz=128, num_updates=12521, lr=9.99078e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=144346 2021-06-20 10:44:42 | INFO | train_inner | epoch 005: 588 / 3002 loss=2.51, ppl=5.7, wps=5812, ups=0.09, wpb=64939, bsz=128, num_updates=12522, lr=9.99078e-05, gnorm=2.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=144357 2021-06-20 10:44:54 | INFO | train_inner | epoch 005: 589 / 3002 loss=2.414, ppl=5.33, wps=5767, ups=0.09, wpb=64836, bsz=128, num_updates=12523, lr=9.99078e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=144368 2021-06-20 10:45:05 | INFO | train_inner | epoch 005: 590 / 3002 loss=2.558, ppl=5.89, wps=5802.8, ups=0.09, wpb=64860, bsz=128, num_updates=12524, lr=9.99078e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=144379 2021-06-20 10:45:16 | INFO | train_inner | epoch 005: 591 / 3002 loss=2.437, ppl=5.42, wps=5753.7, ups=0.09, wpb=64774, bsz=128, num_updates=12525, lr=9.99078e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=144390 2021-06-20 10:45:27 | INFO | train_inner | epoch 005: 592 / 3002 loss=2.436, ppl=5.41, wps=5822, ups=0.09, wpb=64807, bsz=128, num_updates=12526, lr=9.99078e-05, gnorm=1.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=144402 2021-06-20 10:45:38 | INFO | train_inner | epoch 005: 593 / 3002 loss=2.407, ppl=5.3, wps=5884.3, ups=0.09, wpb=64785, bsz=128, num_updates=12527, lr=9.99078e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=144413 2021-06-20 10:45:49 | INFO | train_inner | epoch 005: 594 / 3002 loss=2.447, ppl=5.45, wps=5930.5, ups=0.09, wpb=64866, bsz=128, num_updates=12528, lr=9.99078e-05, gnorm=2.035, loss_scale=4, train_wall=10, gb_free=2.8, wall=144424 2021-06-20 10:46:00 | INFO | train_inner | epoch 005: 595 / 3002 loss=2.666, ppl=6.34, wps=5820.8, ups=0.09, wpb=64800, bsz=128, num_updates=12529, lr=9.99078e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=144435 2021-06-20 10:46:11 | INFO | train_inner | epoch 005: 596 / 3002 loss=2.42, ppl=5.35, wps=5818.6, ups=0.09, wpb=64898, bsz=128, num_updates=12530, lr=9.99078e-05, gnorm=1.888, loss_scale=4, train_wall=11, gb_free=2.8, wall=144446 2021-06-20 10:46:23 | INFO | train_inner | epoch 005: 597 / 3002 loss=2.554, ppl=5.87, wps=5791.4, ups=0.09, wpb=64742, bsz=128, num_updates=12531, lr=9.99077e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=144457 2021-06-20 10:46:34 | INFO | train_inner | epoch 005: 598 / 3002 loss=2.451, ppl=5.47, wps=5788.9, ups=0.09, wpb=64818, bsz=128, num_updates=12532, lr=9.99077e-05, gnorm=1.926, loss_scale=4, train_wall=11, gb_free=2.8, wall=144468 2021-06-20 10:46:45 | INFO | train_inner | epoch 005: 599 / 3002 loss=2.394, ppl=5.26, wps=5857, ups=0.09, wpb=64791, bsz=128, num_updates=12533, lr=9.99077e-05, gnorm=2.01, loss_scale=4, train_wall=11, gb_free=2.8, wall=144479 2021-06-20 10:46:56 | INFO | train_inner | epoch 005: 600 / 3002 loss=2.531, ppl=5.78, wps=5863.8, ups=0.09, wpb=64819, bsz=128, num_updates=12534, lr=9.99077e-05, gnorm=2.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=144490 2021-06-20 10:47:07 | INFO | train_inner | epoch 005: 601 / 3002 loss=2.516, ppl=5.72, wps=5802.6, ups=0.09, wpb=64724, bsz=128, num_updates=12535, lr=9.99077e-05, gnorm=1.911, loss_scale=4, train_wall=11, gb_free=2.8, wall=144501 2021-06-20 10:47:18 | INFO | train_inner | epoch 005: 602 / 3002 loss=2.651, ppl=6.28, wps=5824.3, ups=0.09, wpb=64891, bsz=128, num_updates=12536, lr=9.99077e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=144513 2021-06-20 10:47:29 | INFO | train_inner | epoch 005: 603 / 3002 loss=2.405, ppl=5.3, wps=5899.3, ups=0.09, wpb=64902, bsz=128, num_updates=12537, lr=9.99077e-05, gnorm=2.021, loss_scale=4, train_wall=11, gb_free=2.8, wall=144524 2021-06-20 10:47:41 | INFO | train_inner | epoch 005: 604 / 3002 loss=2.507, ppl=5.69, wps=5755.3, ups=0.09, wpb=64902, bsz=128, num_updates=12538, lr=9.99077e-05, gnorm=1.949, loss_scale=4, train_wall=11, gb_free=2.8, wall=144535 2021-06-20 10:47:52 | INFO | train_inner | epoch 005: 605 / 3002 loss=2.377, ppl=5.2, wps=5896.9, ups=0.09, wpb=64814, bsz=128, num_updates=12539, lr=9.99077e-05, gnorm=1.924, loss_scale=4, train_wall=11, gb_free=2.8, wall=144546 2021-06-20 10:48:03 | INFO | train_inner | epoch 005: 606 / 3002 loss=2.564, ppl=5.91, wps=5864.9, ups=0.09, wpb=64897, bsz=128, num_updates=12540, lr=9.99077e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=144557 2021-06-20 10:48:14 | INFO | train_inner | epoch 005: 607 / 3002 loss=2.512, ppl=5.7, wps=5757.3, ups=0.09, wpb=64881, bsz=128, num_updates=12541, lr=9.99077e-05, gnorm=1.923, loss_scale=4, train_wall=11, gb_free=2.8, wall=144568 2021-06-20 10:48:25 | INFO | train_inner | epoch 005: 608 / 3002 loss=2.6, ppl=6.06, wps=5855.2, ups=0.09, wpb=64832, bsz=128, num_updates=12542, lr=9.99077e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=144579 2021-06-20 10:48:36 | INFO | train_inner | epoch 005: 609 / 3002 loss=2.447, ppl=5.45, wps=5788.5, ups=0.09, wpb=64810, bsz=128, num_updates=12543, lr=9.99076e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=144590 2021-06-20 10:48:47 | INFO | train_inner | epoch 005: 610 / 3002 loss=2.468, ppl=5.53, wps=5921.3, ups=0.09, wpb=64894, bsz=128, num_updates=12544, lr=9.99076e-05, gnorm=2.008, loss_scale=4, train_wall=10, gb_free=2.8, wall=144601 2021-06-20 10:48:58 | INFO | train_inner | epoch 005: 611 / 3002 loss=2.497, ppl=5.64, wps=5786.9, ups=0.09, wpb=64826, bsz=128, num_updates=12545, lr=9.99076e-05, gnorm=2.038, loss_scale=4, train_wall=11, gb_free=2.8, wall=144613 2021-06-20 10:49:09 | INFO | train_inner | epoch 005: 612 / 3002 loss=2.489, ppl=5.61, wps=5869.8, ups=0.09, wpb=64864, bsz=128, num_updates=12546, lr=9.99076e-05, gnorm=2.084, loss_scale=4, train_wall=11, gb_free=2.8, wall=144624 2021-06-20 10:49:21 | INFO | train_inner | epoch 005: 613 / 3002 loss=2.459, ppl=5.5, wps=5790.1, ups=0.09, wpb=64753, bsz=128, num_updates=12547, lr=9.99076e-05, gnorm=1.923, loss_scale=4, train_wall=11, gb_free=2.8, wall=144635 2021-06-20 10:49:32 | INFO | train_inner | epoch 005: 614 / 3002 loss=2.491, ppl=5.62, wps=5885.3, ups=0.09, wpb=64867, bsz=128, num_updates=12548, lr=9.99076e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=144646 2021-06-20 10:49:43 | INFO | train_inner | epoch 005: 615 / 3002 loss=2.542, ppl=5.82, wps=5802.7, ups=0.09, wpb=64832, bsz=128, num_updates=12549, lr=9.99076e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=144657 2021-06-20 10:49:54 | INFO | train_inner | epoch 005: 616 / 3002 loss=2.634, ppl=6.21, wps=5873.2, ups=0.09, wpb=64800, bsz=128, num_updates=12550, lr=9.99076e-05, gnorm=1.961, loss_scale=4, train_wall=11, gb_free=2.8, wall=144668 2021-06-20 10:50:05 | INFO | train_inner | epoch 005: 617 / 3002 loss=2.486, ppl=5.6, wps=5874.8, ups=0.09, wpb=64860, bsz=128, num_updates=12551, lr=9.99076e-05, gnorm=2.02, loss_scale=4, train_wall=11, gb_free=2.8, wall=144679 2021-06-20 10:50:16 | INFO | train_inner | epoch 005: 618 / 3002 loss=2.433, ppl=5.4, wps=5959.9, ups=0.09, wpb=64851, bsz=128, num_updates=12552, lr=9.99076e-05, gnorm=1.939, loss_scale=4, train_wall=10, gb_free=2.8, wall=144690 2021-06-20 10:50:27 | INFO | train_inner | epoch 005: 619 / 3002 loss=2.627, ppl=6.18, wps=5859.2, ups=0.09, wpb=64792, bsz=128, num_updates=12553, lr=9.99076e-05, gnorm=1.927, loss_scale=4, train_wall=11, gb_free=2.8, wall=144701 2021-06-20 10:50:38 | INFO | train_inner | epoch 005: 620 / 3002 loss=2.659, ppl=6.32, wps=5930.2, ups=0.09, wpb=64825, bsz=128, num_updates=12554, lr=9.99076e-05, gnorm=3.043, loss_scale=4, train_wall=10, gb_free=2.8, wall=144712 2021-06-20 10:50:49 | INFO | train_inner | epoch 005: 621 / 3002 loss=2.508, ppl=5.69, wps=5797.7, ups=0.09, wpb=64837, bsz=128, num_updates=12555, lr=9.99076e-05, gnorm=1.909, loss_scale=4, train_wall=11, gb_free=2.8, wall=144723 2021-06-20 10:51:00 | INFO | train_inner | epoch 005: 622 / 3002 loss=2.427, ppl=5.38, wps=5742.8, ups=0.09, wpb=64752, bsz=128, num_updates=12556, lr=9.99075e-05, gnorm=1.911, loss_scale=4, train_wall=11, gb_free=2.8, wall=144734 2021-06-20 10:51:11 | INFO | train_inner | epoch 005: 623 / 3002 loss=2.659, ppl=6.32, wps=5713.7, ups=0.09, wpb=64773, bsz=128, num_updates=12557, lr=9.99075e-05, gnorm=1.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=144746 2021-06-20 10:51:23 | INFO | train_inner | epoch 005: 624 / 3002 loss=2.382, ppl=5.21, wps=5784.3, ups=0.09, wpb=64857, bsz=128, num_updates=12558, lr=9.99075e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=144757 2021-06-20 10:51:34 | INFO | train_inner | epoch 005: 625 / 3002 loss=2.514, ppl=5.71, wps=5884.7, ups=0.09, wpb=64781, bsz=128, num_updates=12559, lr=9.99075e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=144768 2021-06-20 10:51:45 | INFO | train_inner | epoch 005: 626 / 3002 loss=2.432, ppl=5.4, wps=5895.5, ups=0.09, wpb=64893, bsz=128, num_updates=12560, lr=9.99075e-05, gnorm=3.563, loss_scale=4, train_wall=11, gb_free=2.8, wall=144779 2021-06-20 10:51:56 | INFO | train_inner | epoch 005: 627 / 3002 loss=2.556, ppl=5.88, wps=5807.8, ups=0.09, wpb=64827, bsz=128, num_updates=12561, lr=9.99075e-05, gnorm=2.015, loss_scale=4, train_wall=11, gb_free=2.8, wall=144790 2021-06-20 10:52:07 | INFO | train_inner | epoch 005: 628 / 3002 loss=2.545, ppl=5.83, wps=5964, ups=0.09, wpb=64793, bsz=128, num_updates=12562, lr=9.99075e-05, gnorm=2.044, loss_scale=4, train_wall=10, gb_free=2.8, wall=144801 2021-06-20 10:52:18 | INFO | train_inner | epoch 005: 629 / 3002 loss=2.399, ppl=5.27, wps=5784.6, ups=0.09, wpb=64830, bsz=128, num_updates=12563, lr=9.99075e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=144812 2021-06-20 10:52:29 | INFO | train_inner | epoch 005: 630 / 3002 loss=2.549, ppl=5.85, wps=5923.1, ups=0.09, wpb=64797, bsz=128, num_updates=12564, lr=9.99075e-05, gnorm=1.913, loss_scale=4, train_wall=10, gb_free=2.8, wall=144823 2021-06-20 10:52:40 | INFO | train_inner | epoch 005: 631 / 3002 loss=2.507, ppl=5.69, wps=5929.7, ups=0.09, wpb=64862, bsz=128, num_updates=12565, lr=9.99075e-05, gnorm=1.936, loss_scale=4, train_wall=10, gb_free=2.8, wall=144834 2021-06-20 10:52:51 | INFO | train_inner | epoch 005: 632 / 3002 loss=2.586, ppl=6, wps=5738, ups=0.09, wpb=64895, bsz=128, num_updates=12566, lr=9.99075e-05, gnorm=1.974, loss_scale=4, train_wall=11, gb_free=2.8, wall=144845 2021-06-20 10:53:02 | INFO | train_inner | epoch 005: 633 / 3002 loss=2.519, ppl=5.73, wps=5748.4, ups=0.09, wpb=64818, bsz=128, num_updates=12567, lr=9.99075e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=144857 2021-06-20 10:53:13 | INFO | train_inner | epoch 005: 634 / 3002 loss=2.545, ppl=5.83, wps=5921.6, ups=0.09, wpb=64825, bsz=128, num_updates=12568, lr=9.99074e-05, gnorm=1.992, loss_scale=4, train_wall=10, gb_free=2.8, wall=144868 2021-06-20 10:53:25 | INFO | train_inner | epoch 005: 635 / 3002 loss=2.612, ppl=6.11, wps=5781.4, ups=0.09, wpb=64898, bsz=128, num_updates=12569, lr=9.99074e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=144879 2021-06-20 10:53:36 | INFO | train_inner | epoch 005: 636 / 3002 loss=2.425, ppl=5.37, wps=5711.3, ups=0.09, wpb=64818, bsz=128, num_updates=12570, lr=9.99074e-05, gnorm=2.016, loss_scale=4, train_wall=11, gb_free=2.8, wall=144890 2021-06-20 10:53:47 | INFO | train_inner | epoch 005: 637 / 3002 loss=2.498, ppl=5.65, wps=5783.6, ups=0.09, wpb=64888, bsz=128, num_updates=12571, lr=9.99074e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=144901 2021-06-20 10:53:58 | INFO | train_inner | epoch 005: 638 / 3002 loss=2.462, ppl=5.51, wps=5907.3, ups=0.09, wpb=64907, bsz=128, num_updates=12572, lr=9.99074e-05, gnorm=2.1, loss_scale=4, train_wall=11, gb_free=2.8, wall=144912 2021-06-20 10:54:09 | INFO | train_inner | epoch 005: 639 / 3002 loss=2.444, ppl=5.44, wps=5906.6, ups=0.09, wpb=64789, bsz=128, num_updates=12573, lr=9.99074e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=144923 2021-06-20 10:54:20 | INFO | train_inner | epoch 005: 640 / 3002 loss=2.489, ppl=5.62, wps=5785.9, ups=0.09, wpb=64899, bsz=128, num_updates=12574, lr=9.99074e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=144935 2021-06-20 10:54:32 | INFO | train_inner | epoch 005: 641 / 3002 loss=2.525, ppl=5.76, wps=5773.8, ups=0.09, wpb=64857, bsz=128, num_updates=12575, lr=9.99074e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=144946 2021-06-20 10:54:43 | INFO | train_inner | epoch 005: 642 / 3002 loss=2.582, ppl=5.99, wps=5801.6, ups=0.09, wpb=64788, bsz=128, num_updates=12576, lr=9.99074e-05, gnorm=2.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=144957 2021-06-20 10:54:54 | INFO | train_inner | epoch 005: 643 / 3002 loss=2.53, ppl=5.78, wps=5890.1, ups=0.09, wpb=64828, bsz=128, num_updates=12577, lr=9.99074e-05, gnorm=2.083, loss_scale=4, train_wall=11, gb_free=2.8, wall=144968 2021-06-20 10:55:05 | INFO | train_inner | epoch 005: 644 / 3002 loss=2.474, ppl=5.56, wps=5787.3, ups=0.09, wpb=64878, bsz=128, num_updates=12578, lr=9.99074e-05, gnorm=1.89, loss_scale=4, train_wall=11, gb_free=2.8, wall=144979 2021-06-20 10:55:16 | INFO | train_inner | epoch 005: 645 / 3002 loss=2.525, ppl=5.75, wps=5886.5, ups=0.09, wpb=64808, bsz=128, num_updates=12579, lr=9.99074e-05, gnorm=1.933, loss_scale=4, train_wall=11, gb_free=2.8, wall=144990 2021-06-20 10:55:27 | INFO | train_inner | epoch 005: 646 / 3002 loss=2.419, ppl=5.35, wps=5753.1, ups=0.09, wpb=64807, bsz=128, num_updates=12580, lr=9.99074e-05, gnorm=2.021, loss_scale=4, train_wall=11, gb_free=2.8, wall=145002 2021-06-20 10:55:38 | INFO | train_inner | epoch 005: 647 / 3002 loss=2.608, ppl=6.1, wps=5838.5, ups=0.09, wpb=64862, bsz=128, num_updates=12581, lr=9.99073e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=145013 2021-06-20 10:55:49 | INFO | train_inner | epoch 005: 648 / 3002 loss=2.491, ppl=5.62, wps=5842.6, ups=0.09, wpb=64874, bsz=128, num_updates=12582, lr=9.99073e-05, gnorm=2.074, loss_scale=4, train_wall=11, gb_free=2.8, wall=145024 2021-06-20 10:56:00 | INFO | train_inner | epoch 005: 649 / 3002 loss=2.516, ppl=5.72, wps=5974, ups=0.09, wpb=64828, bsz=128, num_updates=12583, lr=9.99073e-05, gnorm=2.123, loss_scale=4, train_wall=10, gb_free=2.8, wall=145035 2021-06-20 10:56:11 | INFO | train_inner | epoch 005: 650 / 3002 loss=2.448, ppl=5.46, wps=5830.3, ups=0.09, wpb=64887, bsz=128, num_updates=12584, lr=9.99073e-05, gnorm=1.937, loss_scale=4, train_wall=11, gb_free=2.8, wall=145046 2021-06-20 10:56:23 | INFO | train_inner | epoch 005: 651 / 3002 loss=2.631, ppl=6.19, wps=5801.7, ups=0.09, wpb=64728, bsz=128, num_updates=12585, lr=9.99073e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=145057 2021-06-20 10:56:34 | INFO | train_inner | epoch 005: 652 / 3002 loss=2.429, ppl=5.39, wps=5810, ups=0.09, wpb=64878, bsz=128, num_updates=12586, lr=9.99073e-05, gnorm=2.054, loss_scale=8, train_wall=11, gb_free=2.8, wall=145068 2021-06-20 10:56:45 | INFO | train_inner | epoch 005: 653 / 3002 loss=2.276, ppl=4.84, wps=5901.9, ups=0.09, wpb=64891, bsz=128, num_updates=12587, lr=9.99073e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=145079 2021-06-20 10:56:56 | INFO | train_inner | epoch 005: 654 / 3002 loss=2.538, ppl=5.81, wps=5829.2, ups=0.09, wpb=64806, bsz=128, num_updates=12588, lr=9.99073e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=145090 2021-06-20 10:57:07 | INFO | train_inner | epoch 005: 655 / 3002 loss=2.627, ppl=6.18, wps=5891.5, ups=0.09, wpb=64816, bsz=128, num_updates=12589, lr=9.99073e-05, gnorm=2.064, loss_scale=8, train_wall=11, gb_free=2.8, wall=145101 2021-06-20 10:57:18 | INFO | train_inner | epoch 005: 656 / 3002 loss=2.42, ppl=5.35, wps=5892.1, ups=0.09, wpb=64894, bsz=128, num_updates=12590, lr=9.99073e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=145112 2021-06-20 10:57:29 | INFO | train_inner | epoch 005: 657 / 3002 loss=2.581, ppl=5.98, wps=5853.9, ups=0.09, wpb=64831, bsz=128, num_updates=12591, lr=9.99073e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=145123 2021-06-20 10:57:40 | INFO | train_inner | epoch 005: 658 / 3002 loss=2.509, ppl=5.69, wps=5868.8, ups=0.09, wpb=64898, bsz=128, num_updates=12592, lr=9.99073e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=145134 2021-06-20 10:57:51 | INFO | train_inner | epoch 005: 659 / 3002 loss=2.516, ppl=5.72, wps=5789.4, ups=0.09, wpb=64915, bsz=128, num_updates=12593, lr=9.99072e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=145146 2021-06-20 10:58:02 | INFO | train_inner | epoch 005: 660 / 3002 loss=2.422, ppl=5.36, wps=5754.2, ups=0.09, wpb=64720, bsz=128, num_updates=12594, lr=9.99072e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=145157 2021-06-20 10:58:14 | INFO | train_inner | epoch 005: 661 / 3002 loss=2.42, ppl=5.35, wps=5872.3, ups=0.09, wpb=64934, bsz=128, num_updates=12595, lr=9.99072e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=145168 2021-06-20 10:58:25 | INFO | train_inner | epoch 005: 662 / 3002 loss=2.462, ppl=5.51, wps=5886.6, ups=0.09, wpb=64790, bsz=128, num_updates=12596, lr=9.99072e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=145179 2021-06-20 10:58:36 | INFO | train_inner | epoch 005: 663 / 3002 loss=2.608, ppl=6.1, wps=5744.2, ups=0.09, wpb=64781, bsz=128, num_updates=12597, lr=9.99072e-05, gnorm=2.072, loss_scale=8, train_wall=11, gb_free=2.8, wall=145190 2021-06-20 10:58:47 | INFO | train_inner | epoch 005: 664 / 3002 loss=2.374, ppl=5.18, wps=5812.5, ups=0.09, wpb=64814, bsz=128, num_updates=12598, lr=9.99072e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=145201 2021-06-20 10:58:58 | INFO | train_inner | epoch 005: 665 / 3002 loss=2.617, ppl=6.13, wps=5879.6, ups=0.09, wpb=64882, bsz=128, num_updates=12599, lr=9.99072e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=145212 2021-06-20 10:59:09 | INFO | train_inner | epoch 005: 666 / 3002 loss=2.521, ppl=5.74, wps=5944.4, ups=0.09, wpb=64776, bsz=128, num_updates=12600, lr=9.99072e-05, gnorm=2.034, loss_scale=8, train_wall=10, gb_free=2.8, wall=145223 2021-06-20 10:59:20 | INFO | train_inner | epoch 005: 667 / 3002 loss=2.494, ppl=5.63, wps=5864.3, ups=0.09, wpb=64898, bsz=128, num_updates=12601, lr=9.99072e-05, gnorm=2.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=145234 2021-06-20 10:59:31 | INFO | train_inner | epoch 005: 668 / 3002 loss=2.467, ppl=5.53, wps=5835.4, ups=0.09, wpb=64815, bsz=128, num_updates=12602, lr=9.99072e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=145245 2021-06-20 10:59:42 | INFO | train_inner | epoch 005: 669 / 3002 loss=2.567, ppl=5.93, wps=5779, ups=0.09, wpb=64851, bsz=128, num_updates=12603, lr=9.99072e-05, gnorm=2.072, loss_scale=8, train_wall=11, gb_free=2.8, wall=145257 2021-06-20 10:59:53 | INFO | train_inner | epoch 005: 670 / 3002 loss=2.675, ppl=6.39, wps=5897.8, ups=0.09, wpb=64811, bsz=128, num_updates=12604, lr=9.99072e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=145268 2021-06-20 11:00:04 | INFO | train_inner | epoch 005: 671 / 3002 loss=2.606, ppl=6.09, wps=5845, ups=0.09, wpb=64781, bsz=128, num_updates=12605, lr=9.99072e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=145279 2021-06-20 11:00:16 | INFO | train_inner | epoch 005: 672 / 3002 loss=2.427, ppl=5.38, wps=5721.6, ups=0.09, wpb=64416, bsz=128, num_updates=12606, lr=9.99071e-05, gnorm=2.062, loss_scale=8, train_wall=11, gb_free=2.8, wall=145290 2021-06-20 11:00:26 | INFO | train_inner | epoch 005: 673 / 3002 loss=2.577, ppl=5.97, wps=5985.1, ups=0.09, wpb=64908, bsz=128, num_updates=12607, lr=9.99071e-05, gnorm=2.029, loss_scale=8, train_wall=10, gb_free=2.8, wall=145301 2021-06-20 11:00:38 | INFO | train_inner | epoch 005: 674 / 3002 loss=2.571, ppl=5.94, wps=5878.1, ups=0.09, wpb=64873, bsz=128, num_updates=12608, lr=9.99071e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=145312 2021-06-20 11:00:49 | INFO | train_inner | epoch 005: 675 / 3002 loss=2.567, ppl=5.93, wps=5809.4, ups=0.09, wpb=64881, bsz=128, num_updates=12609, lr=9.99071e-05, gnorm=2.084, loss_scale=8, train_wall=11, gb_free=2.8, wall=145323 2021-06-20 11:01:00 | INFO | train_inner | epoch 005: 676 / 3002 loss=2.416, ppl=5.34, wps=5832.1, ups=0.09, wpb=64848, bsz=128, num_updates=12610, lr=9.99071e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=145334 2021-06-20 11:01:11 | INFO | train_inner | epoch 005: 677 / 3002 loss=2.404, ppl=5.29, wps=5846.9, ups=0.09, wpb=64842, bsz=128, num_updates=12611, lr=9.99071e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=145345 2021-06-20 11:01:22 | INFO | train_inner | epoch 005: 678 / 3002 loss=2.494, ppl=5.63, wps=5754.6, ups=0.09, wpb=64795, bsz=128, num_updates=12612, lr=9.99071e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=145356 2021-06-20 11:01:33 | INFO | train_inner | epoch 005: 679 / 3002 loss=2.439, ppl=5.42, wps=5989.5, ups=0.09, wpb=64851, bsz=128, num_updates=12613, lr=9.99071e-05, gnorm=1.934, loss_scale=8, train_wall=10, gb_free=2.8, wall=145367 2021-06-20 11:01:44 | INFO | train_inner | epoch 005: 680 / 3002 loss=2.537, ppl=5.8, wps=5827, ups=0.09, wpb=64827, bsz=128, num_updates=12614, lr=9.99071e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=145378 2021-06-20 11:01:55 | INFO | train_inner | epoch 005: 681 / 3002 loss=2.519, ppl=5.73, wps=5760.9, ups=0.09, wpb=64825, bsz=128, num_updates=12615, lr=9.99071e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=145390 2021-06-20 11:02:06 | INFO | train_inner | epoch 005: 682 / 3002 loss=2.677, ppl=6.4, wps=5825.4, ups=0.09, wpb=64760, bsz=128, num_updates=12616, lr=9.99071e-05, gnorm=1.977, loss_scale=8, train_wall=11, gb_free=2.8, wall=145401 2021-06-20 11:02:18 | INFO | train_inner | epoch 005: 683 / 3002 loss=2.502, ppl=5.66, wps=5864, ups=0.09, wpb=64819, bsz=128, num_updates=12617, lr=9.99071e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=145412 2021-06-20 11:02:29 | INFO | train_inner | epoch 005: 684 / 3002 loss=2.629, ppl=6.19, wps=5818.4, ups=0.09, wpb=64837, bsz=128, num_updates=12618, lr=9.9907e-05, gnorm=2.015, loss_scale=8, train_wall=11, gb_free=2.8, wall=145423 2021-06-20 11:02:40 | INFO | train_inner | epoch 005: 685 / 3002 loss=2.483, ppl=5.59, wps=5900.7, ups=0.09, wpb=64794, bsz=128, num_updates=12619, lr=9.9907e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=145434 2021-06-20 11:02:51 | INFO | train_inner | epoch 005: 686 / 3002 loss=2.482, ppl=5.59, wps=5782.9, ups=0.09, wpb=64841, bsz=128, num_updates=12620, lr=9.9907e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=145445 2021-06-20 11:03:02 | INFO | train_inner | epoch 005: 687 / 3002 loss=2.471, ppl=5.54, wps=5816.6, ups=0.09, wpb=64831, bsz=128, num_updates=12621, lr=9.9907e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=145456 2021-06-20 11:03:13 | INFO | train_inner | epoch 005: 688 / 3002 loss=2.54, ppl=5.82, wps=5770.3, ups=0.09, wpb=64812, bsz=128, num_updates=12622, lr=9.9907e-05, gnorm=2.049, loss_scale=8, train_wall=11, gb_free=2.8, wall=145468 2021-06-20 11:03:25 | INFO | train_inner | epoch 005: 689 / 3002 loss=2.375, ppl=5.19, wps=5733.4, ups=0.09, wpb=64805, bsz=128, num_updates=12623, lr=9.9907e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=145479 2021-06-20 11:03:36 | INFO | train_inner | epoch 005: 690 / 3002 loss=2.531, ppl=5.78, wps=5914.4, ups=0.09, wpb=64823, bsz=128, num_updates=12624, lr=9.9907e-05, gnorm=2.037, loss_scale=8, train_wall=10, gb_free=2.8, wall=145490 2021-06-20 11:03:47 | INFO | train_inner | epoch 005: 691 / 3002 loss=2.328, ppl=5.02, wps=5815.7, ups=0.09, wpb=64772, bsz=128, num_updates=12625, lr=9.9907e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=145501 2021-06-20 11:03:58 | INFO | train_inner | epoch 005: 692 / 3002 loss=2.539, ppl=5.81, wps=5853.9, ups=0.09, wpb=64715, bsz=128, num_updates=12626, lr=9.9907e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=145512 2021-06-20 11:04:09 | INFO | train_inner | epoch 005: 693 / 3002 loss=2.507, ppl=5.68, wps=5927.9, ups=0.09, wpb=64908, bsz=128, num_updates=12627, lr=9.9907e-05, gnorm=2.032, loss_scale=8, train_wall=10, gb_free=2.8, wall=145523 2021-06-20 11:04:20 | INFO | train_inner | epoch 005: 694 / 3002 loss=2.458, ppl=5.5, wps=5802.8, ups=0.09, wpb=64772, bsz=128, num_updates=12628, lr=9.9907e-05, gnorm=2, loss_scale=8, train_wall=11, gb_free=2.8, wall=145534 2021-06-20 11:04:31 | INFO | train_inner | epoch 005: 695 / 3002 loss=2.676, ppl=6.39, wps=5835, ups=0.09, wpb=64824, bsz=128, num_updates=12629, lr=9.9907e-05, gnorm=2, loss_scale=8, train_wall=11, gb_free=2.8, wall=145545 2021-06-20 11:04:42 | INFO | train_inner | epoch 005: 696 / 3002 loss=2.576, ppl=5.96, wps=5857.8, ups=0.09, wpb=64851, bsz=128, num_updates=12630, lr=9.9907e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=145556 2021-06-20 11:04:53 | INFO | train_inner | epoch 005: 697 / 3002 loss=2.358, ppl=5.12, wps=5725.7, ups=0.09, wpb=64807, bsz=128, num_updates=12631, lr=9.99069e-05, gnorm=1.873, loss_scale=8, train_wall=11, gb_free=2.8, wall=145568 2021-06-20 11:05:04 | INFO | train_inner | epoch 005: 698 / 3002 loss=2.61, ppl=6.1, wps=5987.9, ups=0.09, wpb=64792, bsz=128, num_updates=12632, lr=9.99069e-05, gnorm=1.974, loss_scale=8, train_wall=10, gb_free=2.8, wall=145578 2021-06-20 11:05:15 | INFO | train_inner | epoch 005: 699 / 3002 loss=2.659, ppl=6.32, wps=5765.1, ups=0.09, wpb=64856, bsz=128, num_updates=12633, lr=9.99069e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=145590 2021-06-20 11:05:26 | INFO | train_inner | epoch 005: 700 / 3002 loss=2.394, ppl=5.26, wps=5841.2, ups=0.09, wpb=64844, bsz=128, num_updates=12634, lr=9.99069e-05, gnorm=1.874, loss_scale=8, train_wall=11, gb_free=2.8, wall=145601 2021-06-20 11:05:37 | INFO | train_inner | epoch 005: 701 / 3002 loss=2.505, ppl=5.68, wps=5903.4, ups=0.09, wpb=64855, bsz=128, num_updates=12635, lr=9.99069e-05, gnorm=1.934, loss_scale=8, train_wall=11, gb_free=2.8, wall=145612 2021-06-20 11:05:49 | INFO | train_inner | epoch 005: 702 / 3002 loss=2.639, ppl=6.23, wps=5802.3, ups=0.09, wpb=64827, bsz=128, num_updates=12636, lr=9.99069e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=145623 2021-06-20 11:06:00 | INFO | train_inner | epoch 005: 703 / 3002 loss=2.554, ppl=5.87, wps=5904.8, ups=0.09, wpb=64713, bsz=128, num_updates=12637, lr=9.99069e-05, gnorm=2.785, loss_scale=8, train_wall=11, gb_free=2.8, wall=145634 2021-06-20 11:06:11 | INFO | train_inner | epoch 005: 704 / 3002 loss=2.595, ppl=6.04, wps=5931.1, ups=0.09, wpb=64872, bsz=128, num_updates=12638, lr=9.99069e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=145645 2021-06-20 11:06:21 | INFO | train_inner | epoch 005: 705 / 3002 loss=2.427, ppl=5.38, wps=5945.2, ups=0.09, wpb=64834, bsz=128, num_updates=12639, lr=9.99069e-05, gnorm=1.922, loss_scale=8, train_wall=10, gb_free=2.8, wall=145656 2021-06-20 11:06:32 | INFO | train_inner | epoch 005: 706 / 3002 loss=2.608, ppl=6.1, wps=5930.2, ups=0.09, wpb=64851, bsz=128, num_updates=12640, lr=9.99069e-05, gnorm=2.049, loss_scale=8, train_wall=10, gb_free=2.8, wall=145667 2021-06-20 11:06:44 | INFO | train_inner | epoch 005: 707 / 3002 loss=2.478, ppl=5.57, wps=5718.6, ups=0.09, wpb=64892, bsz=128, num_updates=12641, lr=9.99069e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=145678 2021-06-20 11:06:55 | INFO | train_inner | epoch 005: 708 / 3002 loss=2.674, ppl=6.38, wps=5803.4, ups=0.09, wpb=64761, bsz=128, num_updates=12642, lr=9.99069e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=145689 2021-06-20 11:07:06 | INFO | train_inner | epoch 005: 709 / 3002 loss=2.591, ppl=6.02, wps=5818, ups=0.09, wpb=64816, bsz=128, num_updates=12643, lr=9.99068e-05, gnorm=1.879, loss_scale=8, train_wall=11, gb_free=2.8, wall=145700 2021-06-20 11:07:17 | INFO | train_inner | epoch 005: 710 / 3002 loss=2.581, ppl=5.98, wps=5802.5, ups=0.09, wpb=64828, bsz=128, num_updates=12644, lr=9.99068e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=145712 2021-06-20 11:07:29 | INFO | train_inner | epoch 005: 711 / 3002 loss=2.395, ppl=5.26, wps=5728.9, ups=0.09, wpb=64808, bsz=128, num_updates=12645, lr=9.99068e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=145723 2021-06-20 11:07:40 | INFO | train_inner | epoch 005: 712 / 3002 loss=2.57, ppl=5.94, wps=5755.2, ups=0.09, wpb=64820, bsz=128, num_updates=12646, lr=9.99068e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=145734 2021-06-20 11:07:51 | INFO | train_inner | epoch 005: 713 / 3002 loss=2.486, ppl=5.6, wps=5849.9, ups=0.09, wpb=64825, bsz=128, num_updates=12647, lr=9.99068e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=145745 2021-06-20 11:08:02 | INFO | train_inner | epoch 005: 714 / 3002 loss=2.526, ppl=5.76, wps=5835.7, ups=0.09, wpb=64975, bsz=128, num_updates=12648, lr=9.99068e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=145756 2021-06-20 11:08:13 | INFO | train_inner | epoch 005: 715 / 3002 loss=2.495, ppl=5.64, wps=5757.2, ups=0.09, wpb=64760, bsz=128, num_updates=12649, lr=9.99068e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=145768 2021-06-20 11:08:24 | INFO | train_inner | epoch 005: 716 / 3002 loss=2.562, ppl=5.91, wps=5763.2, ups=0.09, wpb=64870, bsz=128, num_updates=12650, lr=9.99068e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=145779 2021-06-20 11:08:36 | INFO | train_inner | epoch 005: 717 / 3002 loss=2.445, ppl=5.44, wps=5778.6, ups=0.09, wpb=64824, bsz=128, num_updates=12651, lr=9.99068e-05, gnorm=2.083, loss_scale=8, train_wall=11, gb_free=2.8, wall=145790 2021-06-20 11:08:47 | INFO | train_inner | epoch 005: 718 / 3002 loss=2.512, ppl=5.7, wps=5840.7, ups=0.09, wpb=64731, bsz=128, num_updates=12652, lr=9.99068e-05, gnorm=2.124, loss_scale=8, train_wall=11, gb_free=2.8, wall=145801 2021-06-20 11:08:58 | INFO | train_inner | epoch 005: 719 / 3002 loss=2.503, ppl=5.67, wps=5912.7, ups=0.09, wpb=64783, bsz=128, num_updates=12653, lr=9.99068e-05, gnorm=1.951, loss_scale=8, train_wall=10, gb_free=2.8, wall=145812 2021-06-20 11:09:09 | INFO | train_inner | epoch 005: 720 / 3002 loss=2.618, ppl=6.14, wps=5875.6, ups=0.09, wpb=64853, bsz=128, num_updates=12654, lr=9.99068e-05, gnorm=2.066, loss_scale=8, train_wall=11, gb_free=2.8, wall=145823 2021-06-20 11:09:20 | INFO | train_inner | epoch 005: 721 / 3002 loss=2.518, ppl=5.73, wps=5850.7, ups=0.09, wpb=64858, bsz=128, num_updates=12655, lr=9.99068e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=145834 2021-06-20 11:09:31 | INFO | train_inner | epoch 005: 722 / 3002 loss=2.5, ppl=5.66, wps=5788.2, ups=0.09, wpb=64767, bsz=128, num_updates=12656, lr=9.99067e-05, gnorm=2.019, loss_scale=8, train_wall=11, gb_free=2.8, wall=145845 2021-06-20 11:09:42 | INFO | train_inner | epoch 005: 723 / 3002 loss=2.52, ppl=5.74, wps=5867.5, ups=0.09, wpb=64868, bsz=128, num_updates=12657, lr=9.99067e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=145856 2021-06-20 11:09:53 | INFO | train_inner | epoch 005: 724 / 3002 loss=2.556, ppl=5.88, wps=5799.6, ups=0.09, wpb=64830, bsz=128, num_updates=12658, lr=9.99067e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=145868 2021-06-20 11:10:04 | INFO | train_inner | epoch 005: 725 / 3002 loss=2.564, ppl=5.91, wps=5821.5, ups=0.09, wpb=64727, bsz=128, num_updates=12659, lr=9.99067e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=145879 2021-06-20 11:10:15 | INFO | train_inner | epoch 005: 726 / 3002 loss=2.622, ppl=6.16, wps=5906.2, ups=0.09, wpb=64808, bsz=128, num_updates=12660, lr=9.99067e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=145890 2021-06-20 11:10:26 | INFO | train_inner | epoch 005: 727 / 3002 loss=2.576, ppl=5.96, wps=5863.9, ups=0.09, wpb=64915, bsz=128, num_updates=12661, lr=9.99067e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=145901 2021-06-20 11:10:38 | INFO | train_inner | epoch 005: 728 / 3002 loss=2.615, ppl=6.13, wps=5796.8, ups=0.09, wpb=64789, bsz=128, num_updates=12662, lr=9.99067e-05, gnorm=2.098, loss_scale=8, train_wall=11, gb_free=2.8, wall=145912 2021-06-20 11:10:48 | INFO | train_inner | epoch 005: 729 / 3002 loss=2.614, ppl=6.12, wps=5995.9, ups=0.09, wpb=64828, bsz=128, num_updates=12663, lr=9.99067e-05, gnorm=2, loss_scale=8, train_wall=10, gb_free=2.8, wall=145923 2021-06-20 11:10:59 | INFO | train_inner | epoch 005: 730 / 3002 loss=2.44, ppl=5.43, wps=5936.4, ups=0.09, wpb=64906, bsz=128, num_updates=12664, lr=9.99067e-05, gnorm=1.906, loss_scale=8, train_wall=10, gb_free=2.8, wall=145934 2021-06-20 11:11:11 | INFO | train_inner | epoch 005: 731 / 3002 loss=2.694, ppl=6.47, wps=5723.9, ups=0.09, wpb=64802, bsz=128, num_updates=12665, lr=9.99067e-05, gnorm=1.924, loss_scale=8, train_wall=11, gb_free=2.8, wall=145945 2021-06-20 11:11:22 | INFO | train_inner | epoch 005: 732 / 3002 loss=2.509, ppl=5.69, wps=5693.1, ups=0.09, wpb=64864, bsz=128, num_updates=12666, lr=9.99067e-05, gnorm=2.058, loss_scale=8, train_wall=11, gb_free=2.8, wall=145956 2021-06-20 11:11:33 | INFO | train_inner | epoch 005: 733 / 3002 loss=2.595, ppl=6.04, wps=5777.1, ups=0.09, wpb=64822, bsz=128, num_updates=12667, lr=9.99067e-05, gnorm=2.086, loss_scale=8, train_wall=11, gb_free=2.8, wall=145968 2021-06-20 11:11:45 | INFO | train_inner | epoch 005: 734 / 3002 loss=2.442, ppl=5.43, wps=5794, ups=0.09, wpb=64803, bsz=128, num_updates=12668, lr=9.99066e-05, gnorm=2.155, loss_scale=8, train_wall=11, gb_free=2.8, wall=145979 2021-06-20 11:11:56 | INFO | train_inner | epoch 005: 735 / 3002 loss=2.673, ppl=6.38, wps=5830.3, ups=0.09, wpb=64814, bsz=128, num_updates=12669, lr=9.99066e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=145990 2021-06-20 11:12:06 | INFO | train_inner | epoch 005: 736 / 3002 loss=2.513, ppl=5.71, wps=6041.5, ups=0.09, wpb=64863, bsz=128, num_updates=12670, lr=9.99066e-05, gnorm=2.014, loss_scale=8, train_wall=10, gb_free=2.8, wall=146001 2021-06-20 11:12:17 | INFO | train_inner | epoch 005: 737 / 3002 loss=2.485, ppl=5.6, wps=5825.6, ups=0.09, wpb=64855, bsz=128, num_updates=12671, lr=9.99066e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=146012 2021-06-20 11:12:29 | INFO | train_inner | epoch 005: 738 / 3002 loss=2.517, ppl=5.72, wps=5755.6, ups=0.09, wpb=64872, bsz=128, num_updates=12672, lr=9.99066e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=146023 2021-06-20 11:12:40 | INFO | train_inner | epoch 005: 739 / 3002 loss=2.469, ppl=5.54, wps=5808.4, ups=0.09, wpb=64877, bsz=128, num_updates=12673, lr=9.99066e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=146034 2021-06-20 11:12:51 | INFO | train_inner | epoch 005: 740 / 3002 loss=2.481, ppl=5.58, wps=5854.1, ups=0.09, wpb=64934, bsz=128, num_updates=12674, lr=9.99066e-05, gnorm=2.015, loss_scale=8, train_wall=11, gb_free=2.8, wall=146045 2021-06-20 11:13:02 | INFO | train_inner | epoch 005: 741 / 3002 loss=2.581, ppl=5.99, wps=5857.5, ups=0.09, wpb=64912, bsz=128, num_updates=12675, lr=9.99066e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=146056 2021-06-20 11:13:13 | INFO | train_inner | epoch 005: 742 / 3002 loss=2.659, ppl=6.32, wps=5863.1, ups=0.09, wpb=64734, bsz=128, num_updates=12676, lr=9.99066e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=146067 2021-06-20 11:13:24 | INFO | train_inner | epoch 005: 743 / 3002 loss=2.391, ppl=5.25, wps=5766.3, ups=0.09, wpb=64851, bsz=128, num_updates=12677, lr=9.99066e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=146079 2021-06-20 11:13:36 | INFO | train_inner | epoch 005: 744 / 3002 loss=2.579, ppl=5.98, wps=5791.2, ups=0.09, wpb=64853, bsz=128, num_updates=12678, lr=9.99066e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=146090 2021-06-20 11:13:47 | INFO | train_inner | epoch 005: 745 / 3002 loss=2.538, ppl=5.81, wps=5897, ups=0.09, wpb=64814, bsz=128, num_updates=12679, lr=9.99066e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=146101 2021-06-20 11:13:58 | INFO | train_inner | epoch 005: 746 / 3002 loss=2.514, ppl=5.71, wps=5750.8, ups=0.09, wpb=64744, bsz=128, num_updates=12680, lr=9.99066e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=146112 2021-06-20 11:14:09 | INFO | train_inner | epoch 005: 747 / 3002 loss=2.59, ppl=6.02, wps=5813.3, ups=0.09, wpb=64746, bsz=128, num_updates=12681, lr=9.99065e-05, gnorm=2.117, loss_scale=8, train_wall=11, gb_free=2.8, wall=146123 2021-06-20 11:14:20 | INFO | train_inner | epoch 005: 748 / 3002 loss=2.519, ppl=5.73, wps=5807, ups=0.09, wpb=64817, bsz=128, num_updates=12682, lr=9.99065e-05, gnorm=2.789, loss_scale=8, train_wall=11, gb_free=2.8, wall=146134 2021-06-20 11:14:31 | INFO | train_inner | epoch 005: 749 / 3002 loss=2.536, ppl=5.8, wps=5773.3, ups=0.09, wpb=64722, bsz=128, num_updates=12683, lr=9.99065e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=146146 2021-06-20 11:14:42 | INFO | train_inner | epoch 005: 750 / 3002 loss=2.418, ppl=5.35, wps=5900.4, ups=0.09, wpb=64887, bsz=128, num_updates=12684, lr=9.99065e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=146157 2021-06-20 11:14:54 | INFO | train_inner | epoch 005: 751 / 3002 loss=2.418, ppl=5.35, wps=5750.4, ups=0.09, wpb=64771, bsz=128, num_updates=12685, lr=9.99065e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=146168 2021-06-20 11:15:05 | INFO | train_inner | epoch 005: 752 / 3002 loss=2.44, ppl=5.43, wps=5768.1, ups=0.09, wpb=64886, bsz=128, num_updates=12686, lr=9.99065e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=146179 2021-06-20 11:15:16 | INFO | train_inner | epoch 005: 753 / 3002 loss=2.509, ppl=5.69, wps=5773.8, ups=0.09, wpb=64825, bsz=128, num_updates=12687, lr=9.99065e-05, gnorm=1.977, loss_scale=8, train_wall=11, gb_free=2.8, wall=146190 2021-06-20 11:15:27 | INFO | train_inner | epoch 005: 754 / 3002 loss=2.539, ppl=5.81, wps=5776.9, ups=0.09, wpb=64808, bsz=128, num_updates=12688, lr=9.99065e-05, gnorm=1.923, loss_scale=8, train_wall=11, gb_free=2.8, wall=146202 2021-06-20 11:15:38 | INFO | train_inner | epoch 005: 755 / 3002 loss=2.314, ppl=4.97, wps=5824.2, ups=0.09, wpb=64902, bsz=128, num_updates=12689, lr=9.99065e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=146213 2021-06-20 11:15:50 | INFO | train_inner | epoch 005: 756 / 3002 loss=2.47, ppl=5.54, wps=5800.1, ups=0.09, wpb=64808, bsz=128, num_updates=12690, lr=9.99065e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=146224 2021-06-20 11:16:01 | INFO | train_inner | epoch 005: 757 / 3002 loss=2.654, ppl=6.29, wps=5800.2, ups=0.09, wpb=64782, bsz=128, num_updates=12691, lr=9.99065e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=146235 2021-06-20 11:16:12 | INFO | train_inner | epoch 005: 758 / 3002 loss=2.554, ppl=5.87, wps=5854.7, ups=0.09, wpb=64742, bsz=128, num_updates=12692, lr=9.99065e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=146246 2021-06-20 11:16:23 | INFO | train_inner | epoch 005: 759 / 3002 loss=2.41, ppl=5.32, wps=5857.2, ups=0.09, wpb=64787, bsz=128, num_updates=12693, lr=9.99064e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=146257 2021-06-20 11:16:34 | INFO | train_inner | epoch 005: 760 / 3002 loss=2.525, ppl=5.75, wps=5859.1, ups=0.09, wpb=64906, bsz=128, num_updates=12694, lr=9.99064e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=146268 2021-06-20 11:16:45 | INFO | train_inner | epoch 005: 761 / 3002 loss=2.522, ppl=5.74, wps=5809.7, ups=0.09, wpb=64793, bsz=128, num_updates=12695, lr=9.99064e-05, gnorm=2.115, loss_scale=8, train_wall=11, gb_free=2.8, wall=146279 2021-06-20 11:16:56 | INFO | train_inner | epoch 005: 762 / 3002 loss=2.561, ppl=5.9, wps=5843.9, ups=0.09, wpb=64856, bsz=128, num_updates=12696, lr=9.99064e-05, gnorm=2.173, loss_scale=8, train_wall=11, gb_free=2.8, wall=146291 2021-06-20 11:17:07 | INFO | train_inner | epoch 005: 763 / 3002 loss=2.613, ppl=6.12, wps=5961.3, ups=0.09, wpb=64852, bsz=128, num_updates=12697, lr=9.99064e-05, gnorm=2.021, loss_scale=8, train_wall=10, gb_free=2.8, wall=146301 2021-06-20 11:17:18 | INFO | train_inner | epoch 005: 764 / 3002 loss=2.567, ppl=5.93, wps=5955.3, ups=0.09, wpb=64912, bsz=128, num_updates=12698, lr=9.99064e-05, gnorm=1.957, loss_scale=8, train_wall=10, gb_free=2.8, wall=146312 2021-06-20 11:17:29 | INFO | train_inner | epoch 005: 765 / 3002 loss=2.568, ppl=5.93, wps=5855.1, ups=0.09, wpb=64728, bsz=128, num_updates=12699, lr=9.99064e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=146323 2021-06-20 11:17:40 | INFO | train_inner | epoch 005: 766 / 3002 loss=2.516, ppl=5.72, wps=5877.1, ups=0.09, wpb=64886, bsz=128, num_updates=12700, lr=9.99064e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=146334 2021-06-20 11:17:51 | INFO | train_inner | epoch 005: 767 / 3002 loss=2.637, ppl=6.22, wps=5913.6, ups=0.09, wpb=64794, bsz=128, num_updates=12701, lr=9.99064e-05, gnorm=2.045, loss_scale=8, train_wall=11, gb_free=2.8, wall=146345 2021-06-20 11:18:02 | INFO | train_inner | epoch 005: 768 / 3002 loss=2.504, ppl=5.67, wps=5928.6, ups=0.09, wpb=64919, bsz=128, num_updates=12702, lr=9.99064e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=146356 2021-06-20 11:18:13 | INFO | train_inner | epoch 005: 769 / 3002 loss=2.47, ppl=5.54, wps=5909.5, ups=0.09, wpb=64898, bsz=128, num_updates=12703, lr=9.99064e-05, gnorm=2.052, loss_scale=8, train_wall=11, gb_free=2.8, wall=146367 2021-06-20 11:18:24 | INFO | train_inner | epoch 005: 770 / 3002 loss=2.482, ppl=5.59, wps=5866, ups=0.09, wpb=64730, bsz=128, num_updates=12704, lr=9.99064e-05, gnorm=1.977, loss_scale=8, train_wall=11, gb_free=2.8, wall=146378 2021-06-20 11:18:35 | INFO | train_inner | epoch 005: 771 / 3002 loss=2.421, ppl=5.36, wps=5870.9, ups=0.09, wpb=64748, bsz=128, num_updates=12705, lr=9.99064e-05, gnorm=2.073, loss_scale=8, train_wall=11, gb_free=2.8, wall=146389 2021-06-20 11:18:46 | INFO | train_inner | epoch 005: 772 / 3002 loss=2.459, ppl=5.5, wps=5792.9, ups=0.09, wpb=64833, bsz=128, num_updates=12706, lr=9.99063e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=146401 2021-06-20 11:18:57 | INFO | train_inner | epoch 005: 773 / 3002 loss=2.673, ppl=6.38, wps=5893.3, ups=0.09, wpb=64738, bsz=128, num_updates=12707, lr=9.99063e-05, gnorm=2.089, loss_scale=8, train_wall=11, gb_free=2.8, wall=146412 2021-06-20 11:19:08 | INFO | train_inner | epoch 005: 774 / 3002 loss=2.481, ppl=5.58, wps=5800.2, ups=0.09, wpb=64853, bsz=128, num_updates=12708, lr=9.99063e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=146423 2021-06-20 11:19:20 | INFO | train_inner | epoch 005: 775 / 3002 loss=2.539, ppl=5.81, wps=5838.9, ups=0.09, wpb=64784, bsz=128, num_updates=12709, lr=9.99063e-05, gnorm=2.042, loss_scale=8, train_wall=11, gb_free=2.8, wall=146434 2021-06-20 11:19:31 | INFO | train_inner | epoch 005: 776 / 3002 loss=2.499, ppl=5.65, wps=5857, ups=0.09, wpb=64816, bsz=128, num_updates=12710, lr=9.99063e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=146445 2021-06-20 11:19:42 | INFO | train_inner | epoch 005: 777 / 3002 loss=2.517, ppl=5.72, wps=5755.5, ups=0.09, wpb=64847, bsz=128, num_updates=12711, lr=9.99063e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=146456 2021-06-20 11:19:53 | INFO | train_inner | epoch 005: 778 / 3002 loss=2.453, ppl=5.48, wps=5963.9, ups=0.09, wpb=64966, bsz=128, num_updates=12712, lr=9.99063e-05, gnorm=1.997, loss_scale=8, train_wall=10, gb_free=2.8, wall=146467 2021-06-20 11:20:04 | INFO | train_inner | epoch 005: 779 / 3002 loss=2.419, ppl=5.35, wps=5957.6, ups=0.09, wpb=64910, bsz=128, num_updates=12713, lr=9.99063e-05, gnorm=2.059, loss_scale=16, train_wall=10, gb_free=2.8, wall=146478 2021-06-20 11:20:15 | INFO | train_inner | epoch 005: 780 / 3002 loss=2.495, ppl=5.64, wps=5849, ups=0.09, wpb=64824, bsz=128, num_updates=12714, lr=9.99063e-05, gnorm=2.026, loss_scale=16, train_wall=11, gb_free=2.8, wall=146489 2021-06-20 11:20:26 | INFO | train_inner | epoch 005: 781 / 3002 loss=2.515, ppl=5.71, wps=5853, ups=0.09, wpb=64834, bsz=128, num_updates=12715, lr=9.99063e-05, gnorm=2.019, loss_scale=16, train_wall=11, gb_free=2.8, wall=146500 2021-06-20 11:20:37 | INFO | train_inner | epoch 005: 782 / 3002 loss=2.496, ppl=5.64, wps=5827.9, ups=0.09, wpb=64819, bsz=128, num_updates=12716, lr=9.99063e-05, gnorm=2.072, loss_scale=16, train_wall=11, gb_free=2.8, wall=146511 2021-06-20 11:20:48 | INFO | train_inner | epoch 005: 783 / 3002 loss=2.607, ppl=6.09, wps=5809.7, ups=0.09, wpb=64807, bsz=128, num_updates=12717, lr=9.99063e-05, gnorm=2.039, loss_scale=16, train_wall=11, gb_free=2.8, wall=146522 2021-06-20 11:20:59 | INFO | train_inner | epoch 005: 784 / 3002 loss=2.539, ppl=5.81, wps=5708.9, ups=0.09, wpb=64803, bsz=128, num_updates=12718, lr=9.99062e-05, gnorm=1.935, loss_scale=16, train_wall=11, gb_free=2.8, wall=146534 2021-06-20 11:21:11 | INFO | train_inner | epoch 005: 785 / 3002 loss=2.552, ppl=5.87, wps=5747.2, ups=0.09, wpb=64855, bsz=128, num_updates=12719, lr=9.99062e-05, gnorm=1.97, loss_scale=16, train_wall=11, gb_free=2.8, wall=146545 2021-06-20 11:21:22 | INFO | train_inner | epoch 005: 786 / 3002 loss=2.521, ppl=5.74, wps=5736.2, ups=0.09, wpb=64839, bsz=128, num_updates=12720, lr=9.99062e-05, gnorm=2.067, loss_scale=16, train_wall=11, gb_free=2.8, wall=146556 2021-06-20 11:21:33 | INFO | train_inner | epoch 005: 787 / 3002 loss=2.759, ppl=6.77, wps=5874.6, ups=0.09, wpb=64763, bsz=128, num_updates=12721, lr=9.99062e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=146567 2021-06-20 11:21:44 | INFO | train_inner | epoch 005: 788 / 3002 loss=2.513, ppl=5.71, wps=5711.6, ups=0.09, wpb=64761, bsz=128, num_updates=12722, lr=9.99062e-05, gnorm=1.992, loss_scale=16, train_wall=11, gb_free=2.8, wall=146579 2021-06-20 11:21:56 | INFO | train_inner | epoch 005: 789 / 3002 loss=2.535, ppl=5.8, wps=5796.7, ups=0.09, wpb=64803, bsz=128, num_updates=12723, lr=9.99062e-05, gnorm=1.987, loss_scale=16, train_wall=11, gb_free=2.8, wall=146590 2021-06-20 11:22:07 | INFO | train_inner | epoch 005: 790 / 3002 loss=2.622, ppl=6.16, wps=5893.1, ups=0.09, wpb=64837, bsz=128, num_updates=12724, lr=9.99062e-05, gnorm=2.148, loss_scale=16, train_wall=11, gb_free=2.8, wall=146601 2021-06-20 11:22:18 | INFO | train_inner | epoch 005: 791 / 3002 loss=2.677, ppl=6.39, wps=5726.9, ups=0.09, wpb=64863, bsz=128, num_updates=12725, lr=9.99062e-05, gnorm=2.034, loss_scale=16, train_wall=11, gb_free=2.8, wall=146612 2021-06-20 11:22:29 | INFO | train_inner | epoch 005: 792 / 3002 loss=2.567, ppl=5.93, wps=5771.7, ups=0.09, wpb=64836, bsz=128, num_updates=12726, lr=9.99062e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=146623 2021-06-20 11:22:40 | INFO | train_inner | epoch 005: 793 / 3002 loss=2.529, ppl=5.77, wps=5754.7, ups=0.09, wpb=64852, bsz=128, num_updates=12727, lr=9.99062e-05, gnorm=1.937, loss_scale=16, train_wall=11, gb_free=2.8, wall=146635 2021-06-20 11:22:52 | INFO | train_inner | epoch 005: 794 / 3002 loss=2.468, ppl=5.53, wps=5801.1, ups=0.09, wpb=64829, bsz=128, num_updates=12728, lr=9.99062e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=146646 2021-06-20 11:23:03 | INFO | train_inner | epoch 005: 795 / 3002 loss=2.597, ppl=6.05, wps=5846.1, ups=0.09, wpb=64894, bsz=128, num_updates=12729, lr=9.99062e-05, gnorm=1.993, loss_scale=16, train_wall=11, gb_free=2.8, wall=146657 2021-06-20 11:23:14 | INFO | train_inner | epoch 005: 796 / 3002 loss=2.645, ppl=6.26, wps=5919.2, ups=0.09, wpb=64897, bsz=128, num_updates=12730, lr=9.99062e-05, gnorm=1.984, loss_scale=16, train_wall=10, gb_free=2.8, wall=146668 2021-06-20 11:23:25 | INFO | train_inner | epoch 005: 797 / 3002 loss=2.358, ppl=5.13, wps=5764.6, ups=0.09, wpb=64780, bsz=128, num_updates=12731, lr=9.99061e-05, gnorm=1.896, loss_scale=16, train_wall=11, gb_free=2.8, wall=146679 2021-06-20 11:23:36 | INFO | train_inner | epoch 005: 798 / 3002 loss=2.661, ppl=6.32, wps=5733.5, ups=0.09, wpb=64910, bsz=128, num_updates=12732, lr=9.99061e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=146691 2021-06-20 11:23:47 | INFO | train_inner | epoch 005: 799 / 3002 loss=2.515, ppl=5.72, wps=5797.7, ups=0.09, wpb=64863, bsz=128, num_updates=12733, lr=9.99061e-05, gnorm=1.882, loss_scale=16, train_wall=11, gb_free=2.8, wall=146702 2021-06-20 11:23:58 | INFO | train_inner | epoch 005: 800 / 3002 loss=2.593, ppl=6.03, wps=5895.1, ups=0.09, wpb=64823, bsz=128, num_updates=12734, lr=9.99061e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=146713 2021-06-20 11:24:10 | INFO | train_inner | epoch 005: 801 / 3002 loss=2.405, ppl=5.3, wps=5834.8, ups=0.09, wpb=64860, bsz=128, num_updates=12735, lr=9.99061e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=146724 2021-06-20 11:24:21 | INFO | train_inner | epoch 005: 802 / 3002 loss=2.457, ppl=5.49, wps=5771.6, ups=0.09, wpb=64865, bsz=128, num_updates=12736, lr=9.99061e-05, gnorm=1.973, loss_scale=16, train_wall=11, gb_free=2.8, wall=146735 2021-06-20 11:24:32 | INFO | train_inner | epoch 005: 803 / 3002 loss=2.611, ppl=6.11, wps=5897.1, ups=0.09, wpb=64763, bsz=128, num_updates=12737, lr=9.99061e-05, gnorm=2.231, loss_scale=16, train_wall=11, gb_free=2.8, wall=146746 2021-06-20 11:24:43 | INFO | train_inner | epoch 005: 804 / 3002 loss=2.582, ppl=5.99, wps=5906.3, ups=0.09, wpb=64861, bsz=128, num_updates=12738, lr=9.99061e-05, gnorm=2.025, loss_scale=16, train_wall=11, gb_free=2.8, wall=146757 2021-06-20 11:24:54 | INFO | train_inner | epoch 005: 805 / 3002 loss=2.511, ppl=5.7, wps=5829.1, ups=0.09, wpb=64823, bsz=128, num_updates=12739, lr=9.99061e-05, gnorm=2.128, loss_scale=16, train_wall=11, gb_free=2.8, wall=146768 2021-06-20 11:25:05 | INFO | train_inner | epoch 005: 806 / 3002 loss=2.549, ppl=5.85, wps=5813.3, ups=0.09, wpb=64853, bsz=128, num_updates=12740, lr=9.99061e-05, gnorm=2.019, loss_scale=16, train_wall=11, gb_free=2.8, wall=146779 2021-06-20 11:25:16 | INFO | train_inner | epoch 005: 807 / 3002 loss=2.565, ppl=5.92, wps=5797.3, ups=0.09, wpb=64834, bsz=128, num_updates=12741, lr=9.99061e-05, gnorm=2.005, loss_scale=16, train_wall=11, gb_free=2.8, wall=146791 2021-06-20 11:25:27 | INFO | train_inner | epoch 005: 808 / 3002 loss=2.701, ppl=6.5, wps=5847.1, ups=0.09, wpb=64809, bsz=128, num_updates=12742, lr=9.99061e-05, gnorm=2.07, loss_scale=16, train_wall=11, gb_free=2.8, wall=146802 2021-06-20 11:25:38 | INFO | train_inner | epoch 005: 809 / 3002 loss=2.496, ppl=5.64, wps=5874.6, ups=0.09, wpb=64836, bsz=128, num_updates=12743, lr=9.9906e-05, gnorm=2.065, loss_scale=16, train_wall=11, gb_free=2.8, wall=146813 2021-06-20 11:25:49 | INFO | train_inner | epoch 005: 810 / 3002 loss=2.521, ppl=5.74, wps=5798.7, ups=0.09, wpb=64775, bsz=128, num_updates=12744, lr=9.9906e-05, gnorm=2.093, loss_scale=16, train_wall=11, gb_free=2.8, wall=146824 2021-06-20 11:26:00 | INFO | train_inner | epoch 005: 811 / 3002 loss=2.392, ppl=5.25, wps=5888.4, ups=0.09, wpb=64823, bsz=128, num_updates=12745, lr=9.9906e-05, gnorm=1.995, loss_scale=16, train_wall=11, gb_free=2.8, wall=146835 2021-06-20 11:26:12 | INFO | train_inner | epoch 005: 812 / 3002 loss=2.499, ppl=5.65, wps=5806, ups=0.09, wpb=64878, bsz=128, num_updates=12746, lr=9.9906e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=146846 2021-06-20 11:26:23 | INFO | train_inner | epoch 005: 813 / 3002 loss=2.383, ppl=5.22, wps=5865.2, ups=0.09, wpb=64772, bsz=128, num_updates=12747, lr=9.9906e-05, gnorm=1.97, loss_scale=16, train_wall=11, gb_free=2.8, wall=146857 2021-06-20 11:26:34 | INFO | train_inner | epoch 005: 814 / 3002 loss=2.387, ppl=5.23, wps=5787.5, ups=0.09, wpb=64892, bsz=128, num_updates=12748, lr=9.9906e-05, gnorm=1.882, loss_scale=16, train_wall=11, gb_free=2.8, wall=146868 2021-06-20 11:26:45 | INFO | train_inner | epoch 005: 815 / 3002 loss=2.544, ppl=5.83, wps=5815.6, ups=0.09, wpb=64910, bsz=128, num_updates=12749, lr=9.9906e-05, gnorm=1.952, loss_scale=16, train_wall=11, gb_free=2.8, wall=146879 2021-06-20 11:26:56 | INFO | train_inner | epoch 005: 816 / 3002 loss=2.483, ppl=5.59, wps=5870.3, ups=0.09, wpb=64767, bsz=128, num_updates=12750, lr=9.9906e-05, gnorm=2.024, loss_scale=16, train_wall=11, gb_free=2.8, wall=146890 2021-06-20 11:27:07 | INFO | train_inner | epoch 005: 817 / 3002 loss=2.585, ppl=6, wps=5873.3, ups=0.09, wpb=64835, bsz=128, num_updates=12751, lr=9.9906e-05, gnorm=2.077, loss_scale=16, train_wall=11, gb_free=2.8, wall=146901 2021-06-20 11:27:18 | INFO | train_inner | epoch 005: 818 / 3002 loss=2.502, ppl=5.66, wps=5873.2, ups=0.09, wpb=64800, bsz=128, num_updates=12752, lr=9.9906e-05, gnorm=1.951, loss_scale=16, train_wall=11, gb_free=2.8, wall=146913 2021-06-20 11:27:29 | INFO | train_inner | epoch 005: 819 / 3002 loss=2.52, ppl=5.74, wps=5766.3, ups=0.09, wpb=64863, bsz=128, num_updates=12753, lr=9.9906e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=146924 2021-06-20 11:27:40 | INFO | train_inner | epoch 005: 820 / 3002 loss=2.494, ppl=5.63, wps=5861.7, ups=0.09, wpb=64755, bsz=128, num_updates=12754, lr=9.9906e-05, gnorm=1.906, loss_scale=16, train_wall=11, gb_free=2.8, wall=146935 2021-06-20 11:27:52 | INFO | train_inner | epoch 005: 821 / 3002 loss=2.499, ppl=5.65, wps=5880.1, ups=0.09, wpb=64829, bsz=128, num_updates=12755, lr=9.9906e-05, gnorm=1.87, loss_scale=16, train_wall=11, gb_free=2.8, wall=146946 2021-06-20 11:28:03 | INFO | train_inner | epoch 005: 822 / 3002 loss=2.597, ppl=6.05, wps=5886.3, ups=0.09, wpb=64820, bsz=128, num_updates=12756, lr=9.99059e-05, gnorm=2.022, loss_scale=16, train_wall=11, gb_free=2.8, wall=146957 2021-06-20 11:28:14 | INFO | train_inner | epoch 005: 823 / 3002 loss=2.478, ppl=5.57, wps=5818.6, ups=0.09, wpb=64845, bsz=128, num_updates=12757, lr=9.99059e-05, gnorm=2.148, loss_scale=16, train_wall=11, gb_free=2.8, wall=146968 2021-06-20 11:28:25 | INFO | train_inner | epoch 005: 824 / 3002 loss=2.48, ppl=5.58, wps=5886.1, ups=0.09, wpb=64887, bsz=128, num_updates=12758, lr=9.99059e-05, gnorm=2.017, loss_scale=16, train_wall=11, gb_free=2.8, wall=146979 2021-06-20 11:28:36 | INFO | train_inner | epoch 005: 825 / 3002 loss=2.459, ppl=5.5, wps=5940.4, ups=0.09, wpb=64910, bsz=128, num_updates=12759, lr=9.99059e-05, gnorm=2.083, loss_scale=16, train_wall=10, gb_free=2.8, wall=146990 2021-06-20 11:28:47 | INFO | train_inner | epoch 005: 826 / 3002 loss=2.557, ppl=5.88, wps=5914.6, ups=0.09, wpb=64913, bsz=128, num_updates=12760, lr=9.99059e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=147001 2021-06-20 11:28:58 | INFO | train_inner | epoch 005: 827 / 3002 loss=2.511, ppl=5.7, wps=5886.8, ups=0.09, wpb=64832, bsz=128, num_updates=12761, lr=9.99059e-05, gnorm=1.948, loss_scale=16, train_wall=11, gb_free=2.8, wall=147012 2021-06-20 11:29:09 | INFO | train_inner | epoch 005: 828 / 3002 loss=2.463, ppl=5.51, wps=5879, ups=0.09, wpb=64882, bsz=128, num_updates=12762, lr=9.99059e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=147023 2021-06-20 11:29:20 | INFO | train_inner | epoch 005: 829 / 3002 loss=2.552, ppl=5.86, wps=5936.1, ups=0.09, wpb=64831, bsz=128, num_updates=12763, lr=9.99059e-05, gnorm=2.028, loss_scale=16, train_wall=10, gb_free=2.8, wall=147034 2021-06-20 11:29:31 | INFO | train_inner | epoch 005: 830 / 3002 loss=2.406, ppl=5.3, wps=5741, ups=0.09, wpb=64899, bsz=128, num_updates=12764, lr=9.99059e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=147045 2021-06-20 11:29:42 | INFO | train_inner | epoch 005: 831 / 3002 loss=2.502, ppl=5.67, wps=5881.8, ups=0.09, wpb=64817, bsz=128, num_updates=12765, lr=9.99059e-05, gnorm=2, loss_scale=16, train_wall=11, gb_free=2.8, wall=147056 2021-06-20 11:29:53 | INFO | train_inner | epoch 005: 832 / 3002 loss=2.522, ppl=5.74, wps=5874.4, ups=0.09, wpb=64773, bsz=128, num_updates=12766, lr=9.99059e-05, gnorm=2.139, loss_scale=16, train_wall=11, gb_free=2.8, wall=147067 2021-06-20 11:30:04 | INFO | train_inner | epoch 005: 833 / 3002 loss=2.481, ppl=5.58, wps=5887.4, ups=0.09, wpb=64857, bsz=128, num_updates=12767, lr=9.99059e-05, gnorm=2.006, loss_scale=16, train_wall=11, gb_free=2.8, wall=147078 2021-06-20 11:30:15 | INFO | train_inner | epoch 005: 834 / 3002 loss=2.466, ppl=5.53, wps=5910.5, ups=0.09, wpb=64805, bsz=128, num_updates=12768, lr=9.99058e-05, gnorm=2.012, loss_scale=16, train_wall=11, gb_free=2.8, wall=147089 2021-06-20 11:30:26 | INFO | train_inner | epoch 005: 835 / 3002 loss=2.568, ppl=5.93, wps=5900.3, ups=0.09, wpb=64865, bsz=128, num_updates=12769, lr=9.99058e-05, gnorm=2.033, loss_scale=16, train_wall=11, gb_free=2.8, wall=147100 2021-06-20 11:30:37 | INFO | train_inner | epoch 005: 836 / 3002 loss=2.498, ppl=5.65, wps=5810.3, ups=0.09, wpb=64775, bsz=128, num_updates=12770, lr=9.99058e-05, gnorm=2.004, loss_scale=16, train_wall=11, gb_free=2.8, wall=147111 2021-06-20 11:30:48 | INFO | train_inner | epoch 005: 837 / 3002 loss=2.422, ppl=5.36, wps=5932.9, ups=0.09, wpb=64816, bsz=128, num_updates=12771, lr=9.99058e-05, gnorm=2.04, loss_scale=16, train_wall=10, gb_free=2.8, wall=147122 2021-06-20 11:30:59 | INFO | train_inner | epoch 005: 838 / 3002 loss=2.604, ppl=6.08, wps=5850.3, ups=0.09, wpb=64918, bsz=128, num_updates=12772, lr=9.99058e-05, gnorm=1.992, loss_scale=16, train_wall=11, gb_free=2.8, wall=147133 2021-06-20 11:31:10 | INFO | train_inner | epoch 005: 839 / 3002 loss=2.505, ppl=5.68, wps=5808.2, ups=0.09, wpb=64807, bsz=128, num_updates=12773, lr=9.99058e-05, gnorm=2.218, loss_scale=16, train_wall=11, gb_free=2.8, wall=147145 2021-06-20 11:31:21 | INFO | train_inner | epoch 005: 840 / 3002 loss=2.611, ppl=6.11, wps=5921.5, ups=0.09, wpb=64764, bsz=128, num_updates=12774, lr=9.99058e-05, gnorm=2.059, loss_scale=16, train_wall=10, gb_free=2.8, wall=147155 2021-06-20 11:31:32 | INFO | train_inner | epoch 005: 841 / 3002 loss=2.535, ppl=5.79, wps=5797.2, ups=0.09, wpb=64723, bsz=128, num_updates=12775, lr=9.99058e-05, gnorm=2.053, loss_scale=16, train_wall=11, gb_free=2.8, wall=147167 2021-06-20 11:31:43 | INFO | train_inner | epoch 005: 842 / 3002 loss=2.571, ppl=5.94, wps=5871.6, ups=0.09, wpb=64813, bsz=128, num_updates=12776, lr=9.99058e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=147178 2021-06-20 11:31:54 | INFO | train_inner | epoch 005: 843 / 3002 loss=2.535, ppl=5.79, wps=5884.6, ups=0.09, wpb=64923, bsz=128, num_updates=12777, lr=9.99058e-05, gnorm=1.968, loss_scale=16, train_wall=11, gb_free=2.8, wall=147189 2021-06-20 11:32:06 | INFO | train_inner | epoch 005: 844 / 3002 loss=2.551, ppl=5.86, wps=5744, ups=0.09, wpb=64841, bsz=128, num_updates=12778, lr=9.99058e-05, gnorm=1.898, loss_scale=16, train_wall=11, gb_free=2.8, wall=147200 2021-06-20 11:32:17 | INFO | train_inner | epoch 005: 845 / 3002 loss=2.359, ppl=5.13, wps=5975, ups=0.09, wpb=64841, bsz=128, num_updates=12779, lr=9.99058e-05, gnorm=1.885, loss_scale=16, train_wall=10, gb_free=2.8, wall=147211 2021-06-20 11:32:28 | INFO | train_inner | epoch 005: 846 / 3002 loss=2.583, ppl=5.99, wps=5839.6, ups=0.09, wpb=64874, bsz=128, num_updates=12780, lr=9.99058e-05, gnorm=2.337, loss_scale=16, train_wall=11, gb_free=2.8, wall=147222 2021-06-20 11:32:39 | INFO | train_inner | epoch 005: 847 / 3002 loss=2.452, ppl=5.47, wps=5894.8, ups=0.09, wpb=64903, bsz=128, num_updates=12781, lr=9.99057e-05, gnorm=2.136, loss_scale=16, train_wall=11, gb_free=2.8, wall=147233 2021-06-20 11:32:50 | INFO | train_inner | epoch 005: 848 / 3002 loss=2.566, ppl=5.92, wps=5935.6, ups=0.09, wpb=64792, bsz=128, num_updates=12782, lr=9.99057e-05, gnorm=1.955, loss_scale=16, train_wall=10, gb_free=2.8, wall=147244 2021-06-20 11:33:01 | INFO | train_inner | epoch 005: 849 / 3002 loss=2.528, ppl=5.77, wps=5817.4, ups=0.09, wpb=64855, bsz=128, num_updates=12783, lr=9.99057e-05, gnorm=2.059, loss_scale=16, train_wall=11, gb_free=2.8, wall=147255 2021-06-20 11:33:12 | INFO | train_inner | epoch 005: 850 / 3002 loss=2.559, ppl=5.89, wps=5805.9, ups=0.09, wpb=64877, bsz=128, num_updates=12784, lr=9.99057e-05, gnorm=2.058, loss_scale=16, train_wall=11, gb_free=2.8, wall=147266 2021-06-20 11:33:23 | INFO | train_inner | epoch 005: 851 / 3002 loss=2.517, ppl=5.73, wps=5819.2, ups=0.09, wpb=64745, bsz=128, num_updates=12785, lr=9.99057e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=147277 2021-06-20 11:33:34 | INFO | train_inner | epoch 005: 852 / 3002 loss=2.52, ppl=5.73, wps=5818, ups=0.09, wpb=64762, bsz=128, num_updates=12786, lr=9.99057e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=147288 2021-06-20 11:33:45 | INFO | train_inner | epoch 005: 853 / 3002 loss=2.484, ppl=5.59, wps=5814.5, ups=0.09, wpb=64831, bsz=128, num_updates=12787, lr=9.99057e-05, gnorm=2.016, loss_scale=16, train_wall=11, gb_free=2.8, wall=147300 2021-06-20 11:33:56 | INFO | train_inner | epoch 005: 854 / 3002 loss=2.435, ppl=5.41, wps=5805.1, ups=0.09, wpb=64836, bsz=128, num_updates=12788, lr=9.99057e-05, gnorm=1.952, loss_scale=16, train_wall=11, gb_free=2.8, wall=147311 2021-06-20 11:34:07 | INFO | train_inner | epoch 005: 855 / 3002 loss=2.454, ppl=5.48, wps=5902.6, ups=0.09, wpb=64873, bsz=128, num_updates=12789, lr=9.99057e-05, gnorm=1.967, loss_scale=16, train_wall=11, gb_free=2.8, wall=147322 2021-06-20 11:34:19 | INFO | train_inner | epoch 005: 856 / 3002 loss=2.472, ppl=5.55, wps=5821.6, ups=0.09, wpb=64785, bsz=128, num_updates=12790, lr=9.99057e-05, gnorm=1.939, loss_scale=16, train_wall=11, gb_free=2.8, wall=147333 2021-06-20 11:34:30 | INFO | train_inner | epoch 005: 857 / 3002 loss=2.632, ppl=6.2, wps=5799.8, ups=0.09, wpb=64788, bsz=128, num_updates=12791, lr=9.99057e-05, gnorm=3.171, loss_scale=16, train_wall=11, gb_free=2.8, wall=147344 2021-06-20 11:34:41 | INFO | train_inner | epoch 005: 858 / 3002 loss=2.335, ppl=5.05, wps=5748.7, ups=0.09, wpb=64761, bsz=128, num_updates=12792, lr=9.99057e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=147355 2021-06-20 11:34:52 | INFO | train_inner | epoch 005: 859 / 3002 loss=2.399, ppl=5.28, wps=5707.2, ups=0.09, wpb=64769, bsz=128, num_updates=12793, lr=9.99056e-05, gnorm=1.979, loss_scale=16, train_wall=11, gb_free=2.8, wall=147367 2021-06-20 11:35:04 | INFO | train_inner | epoch 005: 860 / 3002 loss=2.29, ppl=4.89, wps=5815.5, ups=0.09, wpb=64761, bsz=128, num_updates=12794, lr=9.99056e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=147378 2021-06-20 11:35:14 | INFO | train_inner | epoch 005: 861 / 3002 loss=2.659, ppl=6.32, wps=5982.8, ups=0.09, wpb=64806, bsz=128, num_updates=12795, lr=9.99056e-05, gnorm=2.054, loss_scale=16, train_wall=10, gb_free=2.8, wall=147389 2021-06-20 11:35:25 | INFO | train_inner | epoch 005: 862 / 3002 loss=2.547, ppl=5.84, wps=5875.3, ups=0.09, wpb=64828, bsz=128, num_updates=12796, lr=9.99056e-05, gnorm=2.011, loss_scale=16, train_wall=11, gb_free=2.8, wall=147400 2021-06-20 11:35:36 | INFO | train_inner | epoch 005: 863 / 3002 loss=2.497, ppl=5.65, wps=5822.6, ups=0.09, wpb=64783, bsz=128, num_updates=12797, lr=9.99056e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=147411 2021-06-20 11:35:48 | INFO | train_inner | epoch 005: 864 / 3002 loss=2.622, ppl=6.16, wps=5848.2, ups=0.09, wpb=64755, bsz=128, num_updates=12798, lr=9.99056e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=147422 2021-06-20 11:35:59 | INFO | train_inner | epoch 005: 865 / 3002 loss=2.495, ppl=5.64, wps=5890.4, ups=0.09, wpb=64865, bsz=128, num_updates=12799, lr=9.99056e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=147433 2021-06-20 11:36:09 | INFO | train_inner | epoch 005: 866 / 3002 loss=2.517, ppl=5.72, wps=5968.4, ups=0.09, wpb=64759, bsz=128, num_updates=12800, lr=9.99056e-05, gnorm=2.001, loss_scale=16, train_wall=10, gb_free=2.8, wall=147444 2021-06-20 11:36:20 | INFO | train_inner | epoch 005: 867 / 3002 loss=2.276, ppl=4.84, wps=5919, ups=0.09, wpb=64915, bsz=128, num_updates=12801, lr=9.99056e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=147455 2021-06-20 11:36:32 | INFO | train_inner | epoch 005: 868 / 3002 loss=2.494, ppl=5.63, wps=5830.2, ups=0.09, wpb=64870, bsz=128, num_updates=12802, lr=9.99056e-05, gnorm=1.988, loss_scale=16, train_wall=11, gb_free=2.8, wall=147466 2021-06-20 11:36:43 | INFO | train_inner | epoch 005: 869 / 3002 loss=2.56, ppl=5.9, wps=5751.6, ups=0.09, wpb=64788, bsz=128, num_updates=12803, lr=9.99056e-05, gnorm=1.974, loss_scale=16, train_wall=11, gb_free=2.8, wall=147477 2021-06-20 11:36:54 | INFO | train_inner | epoch 005: 870 / 3002 loss=2.443, ppl=5.44, wps=5805.6, ups=0.09, wpb=64920, bsz=128, num_updates=12804, lr=9.99056e-05, gnorm=1.989, loss_scale=16, train_wall=11, gb_free=2.8, wall=147488 2021-06-20 11:37:05 | INFO | train_inner | epoch 005: 871 / 3002 loss=2.669, ppl=6.36, wps=5844.4, ups=0.09, wpb=64797, bsz=128, num_updates=12805, lr=9.99056e-05, gnorm=2.069, loss_scale=16, train_wall=11, gb_free=2.8, wall=147499 2021-06-20 11:37:16 | INFO | train_inner | epoch 005: 872 / 3002 loss=2.682, ppl=6.42, wps=5827.7, ups=0.09, wpb=64791, bsz=128, num_updates=12806, lr=9.99055e-05, gnorm=2.044, loss_scale=16, train_wall=11, gb_free=2.8, wall=147511 2021-06-20 11:37:27 | INFO | train_inner | epoch 005: 873 / 3002 loss=2.574, ppl=5.96, wps=5861.8, ups=0.09, wpb=64941, bsz=128, num_updates=12807, lr=9.99055e-05, gnorm=2.228, loss_scale=16, train_wall=11, gb_free=2.8, wall=147522 2021-06-20 11:37:38 | INFO | train_inner | epoch 005: 874 / 3002 loss=2.809, ppl=7.01, wps=5794.4, ups=0.09, wpb=64835, bsz=128, num_updates=12808, lr=9.99055e-05, gnorm=1.923, loss_scale=16, train_wall=11, gb_free=2.8, wall=147533 2021-06-20 11:37:49 | INFO | train_inner | epoch 005: 875 / 3002 loss=2.515, ppl=5.71, wps=6012.9, ups=0.09, wpb=64908, bsz=128, num_updates=12809, lr=9.99055e-05, gnorm=1.895, loss_scale=16, train_wall=10, gb_free=2.8, wall=147544 2021-06-20 11:38:00 | INFO | train_inner | epoch 005: 876 / 3002 loss=2.47, ppl=5.54, wps=5877.8, ups=0.09, wpb=64828, bsz=128, num_updates=12810, lr=9.99055e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=147555 2021-06-20 11:38:12 | INFO | train_inner | epoch 005: 877 / 3002 loss=2.455, ppl=5.48, wps=5721.2, ups=0.09, wpb=64833, bsz=128, num_updates=12811, lr=9.99055e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=147566 2021-06-20 11:38:23 | INFO | train_inner | epoch 005: 878 / 3002 loss=2.522, ppl=5.74, wps=5876.1, ups=0.09, wpb=64815, bsz=128, num_updates=12812, lr=9.99055e-05, gnorm=2.004, loss_scale=16, train_wall=11, gb_free=2.8, wall=147577 2021-06-20 11:38:34 | INFO | train_inner | epoch 005: 879 / 3002 loss=2.46, ppl=5.5, wps=5895.8, ups=0.09, wpb=64875, bsz=128, num_updates=12813, lr=9.99055e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=147588 2021-06-20 11:38:45 | INFO | train_inner | epoch 005: 880 / 3002 loss=2.602, ppl=6.07, wps=5753.6, ups=0.09, wpb=64876, bsz=128, num_updates=12814, lr=9.99055e-05, gnorm=2.026, loss_scale=16, train_wall=11, gb_free=2.8, wall=147599 2021-06-20 11:38:56 | INFO | train_inner | epoch 005: 881 / 3002 loss=2.418, ppl=5.34, wps=5924.6, ups=0.09, wpb=64861, bsz=128, num_updates=12815, lr=9.99055e-05, gnorm=1.964, loss_scale=16, train_wall=10, gb_free=2.8, wall=147610 2021-06-20 11:39:07 | INFO | train_inner | epoch 005: 882 / 3002 loss=2.442, ppl=5.44, wps=5805.9, ups=0.09, wpb=64786, bsz=128, num_updates=12816, lr=9.99055e-05, gnorm=1.905, loss_scale=16, train_wall=11, gb_free=2.8, wall=147621 2021-06-20 11:39:18 | INFO | train_inner | epoch 005: 883 / 3002 loss=2.627, ppl=6.18, wps=5826.9, ups=0.09, wpb=64734, bsz=128, num_updates=12817, lr=9.99055e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=147632 2021-06-20 11:39:29 | INFO | train_inner | epoch 005: 884 / 3002 loss=2.496, ppl=5.64, wps=5821.5, ups=0.09, wpb=64921, bsz=128, num_updates=12818, lr=9.99054e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=147644 2021-06-20 11:39:40 | INFO | train_inner | epoch 005: 885 / 3002 loss=2.587, ppl=6.01, wps=5811.5, ups=0.09, wpb=64779, bsz=128, num_updates=12819, lr=9.99054e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=147655 2021-06-20 11:39:52 | INFO | train_inner | epoch 005: 886 / 3002 loss=2.308, ppl=4.95, wps=5808.1, ups=0.09, wpb=64841, bsz=128, num_updates=12820, lr=9.99054e-05, gnorm=1.873, loss_scale=16, train_wall=11, gb_free=2.8, wall=147666 2021-06-20 11:40:03 | INFO | train_inner | epoch 005: 887 / 3002 loss=2.447, ppl=5.45, wps=5855.1, ups=0.09, wpb=64809, bsz=128, num_updates=12821, lr=9.99054e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=147677 2021-06-20 11:40:14 | INFO | train_inner | epoch 005: 888 / 3002 loss=2.681, ppl=6.41, wps=5736.8, ups=0.09, wpb=64780, bsz=128, num_updates=12822, lr=9.99054e-05, gnorm=2.141, loss_scale=16, train_wall=11, gb_free=2.8, wall=147688 2021-06-20 11:40:25 | INFO | train_inner | epoch 005: 889 / 3002 loss=2.496, ppl=5.64, wps=5876.9, ups=0.09, wpb=64821, bsz=128, num_updates=12823, lr=9.99054e-05, gnorm=1.987, loss_scale=16, train_wall=11, gb_free=2.8, wall=147699 2021-06-20 11:40:36 | INFO | train_inner | epoch 005: 890 / 3002 loss=2.481, ppl=5.58, wps=5809.1, ups=0.09, wpb=64774, bsz=128, num_updates=12824, lr=9.99054e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=147710 2021-06-20 11:40:47 | INFO | train_inner | epoch 005: 891 / 3002 loss=2.538, ppl=5.81, wps=5927.9, ups=0.09, wpb=64804, bsz=128, num_updates=12825, lr=9.99054e-05, gnorm=2.005, loss_scale=16, train_wall=10, gb_free=2.8, wall=147721 2021-06-20 11:40:58 | INFO | train_inner | epoch 005: 892 / 3002 loss=2.603, ppl=6.08, wps=5960.9, ups=0.09, wpb=64907, bsz=128, num_updates=12826, lr=9.99054e-05, gnorm=1.996, loss_scale=16, train_wall=10, gb_free=2.8, wall=147732 2021-06-20 11:41:09 | INFO | train_inner | epoch 005: 893 / 3002 loss=2.585, ppl=6, wps=5874.6, ups=0.09, wpb=64750, bsz=128, num_updates=12827, lr=9.99054e-05, gnorm=1.893, loss_scale=16, train_wall=11, gb_free=2.8, wall=147743 2021-06-20 11:41:20 | INFO | train_inner | epoch 005: 894 / 3002 loss=2.591, ppl=6.03, wps=5941.1, ups=0.09, wpb=64853, bsz=128, num_updates=12828, lr=9.99054e-05, gnorm=2.022, loss_scale=16, train_wall=10, gb_free=2.8, wall=147754 2021-06-20 11:41:31 | INFO | train_inner | epoch 005: 895 / 3002 loss=2.457, ppl=5.49, wps=5955.7, ups=0.09, wpb=64890, bsz=128, num_updates=12829, lr=9.99054e-05, gnorm=2.05, loss_scale=16, train_wall=10, gb_free=2.8, wall=147765 2021-06-20 11:41:42 | INFO | train_inner | epoch 005: 896 / 3002 loss=2.579, ppl=5.97, wps=5928, ups=0.09, wpb=64929, bsz=128, num_updates=12830, lr=9.99054e-05, gnorm=2.08, loss_scale=16, train_wall=11, gb_free=2.8, wall=147776 2021-06-20 11:41:53 | INFO | train_inner | epoch 005: 897 / 3002 loss=2.432, ppl=5.4, wps=5786.8, ups=0.09, wpb=64786, bsz=128, num_updates=12831, lr=9.99053e-05, gnorm=2.057, loss_scale=16, train_wall=11, gb_free=2.8, wall=147787 2021-06-20 11:42:04 | INFO | train_inner | epoch 005: 898 / 3002 loss=2.427, ppl=5.38, wps=5820.7, ups=0.09, wpb=64912, bsz=128, num_updates=12832, lr=9.99053e-05, gnorm=2.034, loss_scale=16, train_wall=11, gb_free=2.8, wall=147798 2021-06-20 11:42:15 | INFO | train_inner | epoch 005: 899 / 3002 loss=2.528, ppl=5.77, wps=5817.2, ups=0.09, wpb=64835, bsz=128, num_updates=12833, lr=9.99053e-05, gnorm=2.042, loss_scale=16, train_wall=11, gb_free=2.8, wall=147810 2021-06-20 11:42:26 | INFO | train_inner | epoch 005: 900 / 3002 loss=2.392, ppl=5.25, wps=5810.3, ups=0.09, wpb=64858, bsz=128, num_updates=12834, lr=9.99053e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=147821 2021-06-20 11:42:37 | INFO | train_inner | epoch 005: 901 / 3002 loss=2.566, ppl=5.92, wps=5926.9, ups=0.09, wpb=64828, bsz=128, num_updates=12835, lr=9.99053e-05, gnorm=1.95, loss_scale=16, train_wall=11, gb_free=2.8, wall=147832 2021-06-20 11:42:48 | INFO | train_inner | epoch 005: 902 / 3002 loss=2.469, ppl=5.54, wps=5918.7, ups=0.09, wpb=64814, bsz=128, num_updates=12836, lr=9.99053e-05, gnorm=1.873, loss_scale=16, train_wall=10, gb_free=2.8, wall=147843 2021-06-20 11:42:59 | INFO | train_inner | epoch 005: 903 / 3002 loss=2.576, ppl=5.96, wps=5965.4, ups=0.09, wpb=64874, bsz=128, num_updates=12837, lr=9.99053e-05, gnorm=1.934, loss_scale=16, train_wall=10, gb_free=2.8, wall=147854 2021-06-20 11:43:10 | INFO | train_inner | epoch 005: 904 / 3002 loss=2.503, ppl=5.67, wps=5899.3, ups=0.09, wpb=64792, bsz=128, num_updates=12838, lr=9.99053e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=147864 2021-06-20 11:43:21 | INFO | train_inner | epoch 005: 905 / 3002 loss=2.582, ppl=5.99, wps=5831.2, ups=0.09, wpb=64836, bsz=128, num_updates=12839, lr=9.99053e-05, gnorm=2.025, loss_scale=16, train_wall=11, gb_free=2.8, wall=147876 2021-06-20 11:43:32 | INFO | train_inner | epoch 005: 906 / 3002 loss=2.455, ppl=5.48, wps=5814, ups=0.09, wpb=64818, bsz=128, num_updates=12840, lr=9.99053e-05, gnorm=1.914, loss_scale=16, train_wall=11, gb_free=2.8, wall=147887 2021-06-20 11:43:43 | INFO | train_inner | epoch 005: 907 / 3002 loss=2.463, ppl=5.51, wps=5885.1, ups=0.09, wpb=64849, bsz=128, num_updates=12841, lr=9.99053e-05, gnorm=1.987, loss_scale=32, train_wall=11, gb_free=2.8, wall=147898 2021-06-20 11:43:55 | INFO | train_inner | epoch 005: 908 / 3002 loss=2.499, ppl=5.65, wps=5808.1, ups=0.09, wpb=64833, bsz=128, num_updates=12842, lr=9.99053e-05, gnorm=1.958, loss_scale=32, train_wall=11, gb_free=2.8, wall=147909 2021-06-20 11:44:06 | INFO | train_inner | epoch 005: 909 / 3002 loss=2.52, ppl=5.74, wps=5776.9, ups=0.09, wpb=64821, bsz=128, num_updates=12843, lr=9.99052e-05, gnorm=1.97, loss_scale=32, train_wall=11, gb_free=2.8, wall=147920 2021-06-20 11:44:17 | INFO | train_inner | epoch 005: 910 / 3002 loss=2.433, ppl=5.4, wps=5846.4, ups=0.09, wpb=64833, bsz=128, num_updates=12844, lr=9.99052e-05, gnorm=1.874, loss_scale=32, train_wall=11, gb_free=2.8, wall=147931 2021-06-20 11:44:28 | INFO | train_inner | epoch 005: 911 / 3002 loss=2.461, ppl=5.5, wps=5770.2, ups=0.09, wpb=64838, bsz=128, num_updates=12845, lr=9.99052e-05, gnorm=1.93, loss_scale=32, train_wall=11, gb_free=2.8, wall=147942 2021-06-20 11:44:39 | INFO | train_inner | epoch 005: 912 / 3002 loss=2.528, ppl=5.77, wps=5721.9, ups=0.09, wpb=64814, bsz=128, num_updates=12846, lr=9.99052e-05, gnorm=1.91, loss_scale=32, train_wall=11, gb_free=2.8, wall=147954 2021-06-20 11:44:51 | INFO | train_inner | epoch 005: 913 / 3002 loss=2.463, ppl=5.51, wps=5785.9, ups=0.09, wpb=64850, bsz=128, num_updates=12847, lr=9.99052e-05, gnorm=2.001, loss_scale=32, train_wall=11, gb_free=2.8, wall=147965 2021-06-20 11:45:02 | INFO | train_inner | epoch 005: 914 / 3002 loss=2.515, ppl=5.72, wps=5712.1, ups=0.09, wpb=64786, bsz=128, num_updates=12848, lr=9.99052e-05, gnorm=1.972, loss_scale=32, train_wall=11, gb_free=2.8, wall=147976 2021-06-20 11:45:13 | INFO | train_inner | epoch 005: 915 / 3002 loss=2.551, ppl=5.86, wps=5898.8, ups=0.09, wpb=64856, bsz=128, num_updates=12849, lr=9.99052e-05, gnorm=2.013, loss_scale=32, train_wall=10, gb_free=2.8, wall=147987 2021-06-20 11:45:24 | INFO | train_inner | epoch 005: 916 / 3002 loss=2.564, ppl=5.91, wps=5913.9, ups=0.09, wpb=64868, bsz=128, num_updates=12850, lr=9.99052e-05, gnorm=1.936, loss_scale=32, train_wall=10, gb_free=2.8, wall=147998 2021-06-20 11:45:35 | INFO | train_inner | epoch 005: 917 / 3002 loss=2.6, ppl=6.06, wps=5821.9, ups=0.09, wpb=64824, bsz=128, num_updates=12851, lr=9.99052e-05, gnorm=1.996, loss_scale=32, train_wall=11, gb_free=2.8, wall=148009 2021-06-20 11:45:46 | INFO | train_inner | epoch 005: 918 / 3002 loss=2.477, ppl=5.57, wps=5910.9, ups=0.09, wpb=64796, bsz=128, num_updates=12852, lr=9.99052e-05, gnorm=1.894, loss_scale=32, train_wall=11, gb_free=2.8, wall=148020 2021-06-20 11:45:57 | INFO | train_inner | epoch 005: 919 / 3002 loss=2.397, ppl=5.27, wps=5811, ups=0.09, wpb=64878, bsz=128, num_updates=12853, lr=9.99052e-05, gnorm=1.966, loss_scale=32, train_wall=11, gb_free=2.8, wall=148032 2021-06-20 11:46:08 | INFO | train_inner | epoch 005: 920 / 3002 loss=2.388, ppl=5.23, wps=5828.7, ups=0.09, wpb=64914, bsz=128, num_updates=12854, lr=9.99052e-05, gnorm=1.961, loss_scale=32, train_wall=11, gb_free=2.8, wall=148043 2021-06-20 11:46:19 | INFO | train_inner | epoch 005: 921 / 3002 loss=2.493, ppl=5.63, wps=5879.2, ups=0.09, wpb=64778, bsz=128, num_updates=12855, lr=9.99052e-05, gnorm=1.979, loss_scale=32, train_wall=11, gb_free=2.8, wall=148054 2021-06-20 11:46:30 | INFO | train_inner | epoch 005: 922 / 3002 loss=2.714, ppl=6.56, wps=5927.5, ups=0.09, wpb=64874, bsz=128, num_updates=12856, lr=9.99051e-05, gnorm=2.063, loss_scale=32, train_wall=10, gb_free=2.8, wall=148065 2021-06-20 11:46:42 | INFO | train_inner | epoch 005: 923 / 3002 loss=2.439, ppl=5.42, wps=5804.4, ups=0.09, wpb=64799, bsz=128, num_updates=12857, lr=9.99051e-05, gnorm=2.014, loss_scale=32, train_wall=11, gb_free=2.8, wall=148076 2021-06-20 11:46:53 | INFO | train_inner | epoch 005: 924 / 3002 loss=2.601, ppl=6.07, wps=5875.5, ups=0.09, wpb=64815, bsz=128, num_updates=12858, lr=9.99051e-05, gnorm=2.033, loss_scale=32, train_wall=11, gb_free=2.8, wall=148087 2021-06-20 11:47:04 | INFO | train_inner | epoch 005: 925 / 3002 loss=2.563, ppl=5.91, wps=5827, ups=0.09, wpb=64893, bsz=128, num_updates=12859, lr=9.99051e-05, gnorm=2.08, loss_scale=32, train_wall=11, gb_free=2.8, wall=148098 2021-06-20 11:47:15 | INFO | train_inner | epoch 005: 926 / 3002 loss=2.439, ppl=5.42, wps=5690.6, ups=0.09, wpb=64809, bsz=128, num_updates=12860, lr=9.99051e-05, gnorm=1.966, loss_scale=32, train_wall=11, gb_free=2.8, wall=148109 2021-06-20 11:47:26 | INFO | train_inner | epoch 005: 927 / 3002 loss=2.558, ppl=5.89, wps=5859.4, ups=0.09, wpb=64887, bsz=128, num_updates=12861, lr=9.99051e-05, gnorm=1.989, loss_scale=32, train_wall=11, gb_free=2.8, wall=148120 2021-06-20 11:47:37 | INFO | train_inner | epoch 005: 928 / 3002 loss=2.657, ppl=6.31, wps=5895.1, ups=0.09, wpb=64882, bsz=128, num_updates=12862, lr=9.99051e-05, gnorm=1.947, loss_scale=32, train_wall=11, gb_free=2.8, wall=148131 2021-06-20 11:47:48 | INFO | train_inner | epoch 005: 929 / 3002 loss=2.714, ppl=6.56, wps=5814.8, ups=0.09, wpb=64815, bsz=128, num_updates=12863, lr=9.99051e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=148143 2021-06-20 11:48:00 | INFO | train_inner | epoch 005: 930 / 3002 loss=2.304, ppl=4.94, wps=5772.3, ups=0.09, wpb=64820, bsz=128, num_updates=12864, lr=9.99051e-05, gnorm=1.959, loss_scale=32, train_wall=11, gb_free=2.8, wall=148154 2021-06-20 11:48:11 | INFO | train_inner | epoch 005: 931 / 3002 loss=2.396, ppl=5.26, wps=5834.9, ups=0.09, wpb=64814, bsz=128, num_updates=12865, lr=9.99051e-05, gnorm=1.925, loss_scale=32, train_wall=11, gb_free=2.8, wall=148165 2021-06-20 11:48:22 | INFO | train_inner | epoch 005: 932 / 3002 loss=2.531, ppl=5.78, wps=5802.8, ups=0.09, wpb=64733, bsz=128, num_updates=12866, lr=9.99051e-05, gnorm=1.968, loss_scale=32, train_wall=11, gb_free=2.8, wall=148176 2021-06-20 11:48:33 | INFO | train_inner | epoch 005: 933 / 3002 loss=2.612, ppl=6.11, wps=5854, ups=0.09, wpb=64832, bsz=128, num_updates=12867, lr=9.99051e-05, gnorm=2.17, loss_scale=32, train_wall=11, gb_free=2.8, wall=148187 2021-06-20 11:48:44 | INFO | train_inner | epoch 005: 934 / 3002 loss=2.702, ppl=6.51, wps=5837.5, ups=0.09, wpb=64882, bsz=128, num_updates=12868, lr=9.9905e-05, gnorm=2.046, loss_scale=32, train_wall=11, gb_free=2.8, wall=148198 2021-06-20 11:48:55 | INFO | train_inner | epoch 005: 935 / 3002 loss=2.545, ppl=5.84, wps=5868.3, ups=0.09, wpb=64781, bsz=128, num_updates=12869, lr=9.9905e-05, gnorm=1.986, loss_scale=32, train_wall=11, gb_free=2.8, wall=148209 2021-06-20 11:49:06 | INFO | train_inner | epoch 005: 936 / 3002 loss=2.438, ppl=5.42, wps=5759.6, ups=0.09, wpb=64814, bsz=128, num_updates=12870, lr=9.9905e-05, gnorm=1.983, loss_scale=32, train_wall=11, gb_free=2.8, wall=148221 2021-06-20 11:49:18 | INFO | train_inner | epoch 005: 937 / 3002 loss=2.399, ppl=5.27, wps=5768.1, ups=0.09, wpb=64789, bsz=128, num_updates=12871, lr=9.9905e-05, gnorm=1.962, loss_scale=32, train_wall=11, gb_free=2.8, wall=148232 2021-06-20 11:49:29 | INFO | train_inner | epoch 005: 938 / 3002 loss=2.567, ppl=5.93, wps=5816.2, ups=0.09, wpb=64815, bsz=128, num_updates=12872, lr=9.9905e-05, gnorm=2.095, loss_scale=32, train_wall=11, gb_free=2.8, wall=148243 2021-06-20 11:49:40 | INFO | train_inner | epoch 005: 939 / 3002 loss=2.582, ppl=5.99, wps=5722.8, ups=0.09, wpb=64842, bsz=128, num_updates=12873, lr=9.9905e-05, gnorm=1.991, loss_scale=32, train_wall=11, gb_free=2.8, wall=148254 2021-06-20 11:49:51 | INFO | train_inner | epoch 005: 940 / 3002 loss=2.686, ppl=6.44, wps=5873.3, ups=0.09, wpb=64698, bsz=128, num_updates=12874, lr=9.9905e-05, gnorm=1.988, loss_scale=32, train_wall=11, gb_free=2.8, wall=148265 2021-06-20 11:50:02 | INFO | train_inner | epoch 005: 941 / 3002 loss=2.676, ppl=6.39, wps=5762, ups=0.09, wpb=64783, bsz=128, num_updates=12875, lr=9.9905e-05, gnorm=1.959, loss_scale=32, train_wall=11, gb_free=2.8, wall=148277 2021-06-20 11:50:13 | INFO | train_inner | epoch 005: 942 / 3002 loss=2.56, ppl=5.9, wps=5781.1, ups=0.09, wpb=64857, bsz=128, num_updates=12876, lr=9.9905e-05, gnorm=2.057, loss_scale=32, train_wall=11, gb_free=2.8, wall=148288 2021-06-20 11:50:25 | INFO | train_inner | epoch 005: 943 / 3002 loss=2.473, ppl=5.55, wps=5738.9, ups=0.09, wpb=64840, bsz=128, num_updates=12877, lr=9.9905e-05, gnorm=1.94, loss_scale=32, train_wall=11, gb_free=2.8, wall=148299 2021-06-20 11:50:36 | INFO | train_inner | epoch 005: 944 / 3002 loss=2.604, ppl=6.08, wps=5991.1, ups=0.09, wpb=64846, bsz=128, num_updates=12878, lr=9.9905e-05, gnorm=2, loss_scale=32, train_wall=10, gb_free=2.8, wall=148310 2021-06-20 11:50:47 | INFO | train_inner | epoch 005: 945 / 3002 loss=2.546, ppl=5.84, wps=5870.2, ups=0.09, wpb=64812, bsz=128, num_updates=12879, lr=9.9905e-05, gnorm=2.004, loss_scale=32, train_wall=11, gb_free=2.8, wall=148321 2021-06-20 11:50:58 | INFO | train_inner | epoch 005: 946 / 3002 loss=2.606, ppl=6.09, wps=5829.1, ups=0.09, wpb=64864, bsz=128, num_updates=12880, lr=9.9905e-05, gnorm=2.034, loss_scale=32, train_wall=11, gb_free=2.8, wall=148332 2021-06-20 11:51:09 | INFO | train_inner | epoch 005: 947 / 3002 loss=2.467, ppl=5.53, wps=5811.6, ups=0.09, wpb=64836, bsz=128, num_updates=12881, lr=9.99049e-05, gnorm=1.949, loss_scale=32, train_wall=11, gb_free=2.8, wall=148343 2021-06-20 11:51:20 | INFO | train_inner | epoch 005: 948 / 3002 loss=2.492, ppl=5.62, wps=5859.7, ups=0.09, wpb=64845, bsz=128, num_updates=12882, lr=9.99049e-05, gnorm=1.913, loss_scale=32, train_wall=11, gb_free=2.8, wall=148354 2021-06-20 11:51:31 | INFO | train_inner | epoch 005: 949 / 3002 loss=2.554, ppl=5.87, wps=5952.9, ups=0.09, wpb=64823, bsz=128, num_updates=12883, lr=9.99049e-05, gnorm=2.042, loss_scale=32, train_wall=10, gb_free=2.8, wall=148365 2021-06-20 11:51:42 | INFO | train_inner | epoch 005: 950 / 3002 loss=2.566, ppl=5.92, wps=5776.8, ups=0.09, wpb=64822, bsz=128, num_updates=12884, lr=9.99049e-05, gnorm=1.964, loss_scale=32, train_wall=11, gb_free=2.8, wall=148376 2021-06-20 11:51:53 | INFO | train_inner | epoch 005: 951 / 3002 loss=2.51, ppl=5.7, wps=5799.3, ups=0.09, wpb=64816, bsz=128, num_updates=12885, lr=9.99049e-05, gnorm=2.006, loss_scale=32, train_wall=11, gb_free=2.8, wall=148388 2021-06-20 11:52:04 | INFO | train_inner | epoch 005: 952 / 3002 loss=2.552, ppl=5.87, wps=5891.1, ups=0.09, wpb=64857, bsz=128, num_updates=12886, lr=9.99049e-05, gnorm=1.936, loss_scale=32, train_wall=11, gb_free=2.8, wall=148399 2021-06-20 11:52:15 | INFO | train_inner | epoch 005: 953 / 3002 loss=2.518, ppl=5.73, wps=5809.3, ups=0.09, wpb=64772, bsz=128, num_updates=12887, lr=9.99049e-05, gnorm=1.961, loss_scale=32, train_wall=11, gb_free=2.8, wall=148410 2021-06-20 11:52:27 | INFO | train_inner | epoch 005: 954 / 3002 loss=2.613, ppl=6.12, wps=5777.3, ups=0.09, wpb=64822, bsz=128, num_updates=12888, lr=9.99049e-05, gnorm=2.019, loss_scale=32, train_wall=11, gb_free=2.8, wall=148421 2021-06-20 11:52:38 | INFO | train_inner | epoch 005: 955 / 3002 loss=2.556, ppl=5.88, wps=5881.5, ups=0.09, wpb=64775, bsz=128, num_updates=12889, lr=9.99049e-05, gnorm=1.892, loss_scale=32, train_wall=11, gb_free=2.8, wall=148432 2021-06-20 11:52:49 | INFO | train_inner | epoch 005: 956 / 3002 loss=2.71, ppl=6.54, wps=5911.1, ups=0.09, wpb=64862, bsz=128, num_updates=12890, lr=9.99049e-05, gnorm=2.003, loss_scale=32, train_wall=11, gb_free=2.8, wall=148443 2021-06-20 11:53:00 | INFO | train_inner | epoch 005: 957 / 3002 loss=2.539, ppl=5.81, wps=5755.1, ups=0.09, wpb=64789, bsz=128, num_updates=12891, lr=9.99049e-05, gnorm=1.981, loss_scale=32, train_wall=11, gb_free=2.8, wall=148454 2021-06-20 11:53:11 | INFO | train_inner | epoch 005: 958 / 3002 loss=2.469, ppl=5.54, wps=5826.5, ups=0.09, wpb=64827, bsz=128, num_updates=12892, lr=9.99049e-05, gnorm=2.004, loss_scale=32, train_wall=11, gb_free=2.8, wall=148465 2021-06-20 11:53:22 | INFO | train_inner | epoch 005: 959 / 3002 loss=2.671, ppl=6.37, wps=5696.3, ups=0.09, wpb=64749, bsz=128, num_updates=12893, lr=9.99048e-05, gnorm=1.972, loss_scale=32, train_wall=11, gb_free=2.8, wall=148477 2021-06-20 11:53:33 | INFO | train_inner | epoch 005: 960 / 3002 loss=2.608, ppl=6.1, wps=5904, ups=0.09, wpb=64720, bsz=128, num_updates=12894, lr=9.99048e-05, gnorm=1.919, loss_scale=32, train_wall=10, gb_free=2.8, wall=148488 2021-06-20 11:53:44 | INFO | train_inner | epoch 005: 961 / 3002 loss=2.633, ppl=6.2, wps=5851, ups=0.09, wpb=64899, bsz=128, num_updates=12895, lr=9.99048e-05, gnorm=2.049, loss_scale=32, train_wall=11, gb_free=2.8, wall=148499 2021-06-20 11:53:55 | INFO | train_inner | epoch 005: 962 / 3002 loss=2.496, ppl=5.64, wps=5936.7, ups=0.09, wpb=64866, bsz=128, num_updates=12896, lr=9.99048e-05, gnorm=2.259, loss_scale=32, train_wall=10, gb_free=2.8, wall=148510 2021-06-20 11:54:07 | INFO | train_inner | epoch 005: 963 / 3002 loss=2.495, ppl=5.64, wps=5783.5, ups=0.09, wpb=64690, bsz=128, num_updates=12897, lr=9.99048e-05, gnorm=1.99, loss_scale=32, train_wall=11, gb_free=2.8, wall=148521 2021-06-20 11:54:18 | INFO | train_inner | epoch 005: 964 / 3002 loss=2.392, ppl=5.25, wps=5795.6, ups=0.09, wpb=64901, bsz=128, num_updates=12898, lr=9.99048e-05, gnorm=1.974, loss_scale=32, train_wall=11, gb_free=2.8, wall=148532 2021-06-20 11:54:29 | INFO | train_inner | epoch 005: 965 / 3002 loss=2.584, ppl=6, wps=5855.1, ups=0.09, wpb=64857, bsz=128, num_updates=12899, lr=9.99048e-05, gnorm=2.011, loss_scale=32, train_wall=11, gb_free=2.8, wall=148543 2021-06-20 11:54:40 | INFO | train_inner | epoch 005: 966 / 3002 loss=2.593, ppl=6.03, wps=5853.3, ups=0.09, wpb=64780, bsz=128, num_updates=12900, lr=9.99048e-05, gnorm=2, loss_scale=32, train_wall=11, gb_free=2.8, wall=148554 2021-06-20 11:54:51 | INFO | train_inner | epoch 005: 967 / 3002 loss=2.402, ppl=5.28, wps=5726.3, ups=0.09, wpb=64863, bsz=128, num_updates=12901, lr=9.99048e-05, gnorm=1.892, loss_scale=32, train_wall=11, gb_free=2.8, wall=148566 2021-06-20 11:55:02 | INFO | train_inner | epoch 005: 968 / 3002 loss=2.542, ppl=5.82, wps=5845.6, ups=0.09, wpb=64796, bsz=128, num_updates=12902, lr=9.99048e-05, gnorm=2.021, loss_scale=32, train_wall=11, gb_free=2.8, wall=148577 2021-06-20 11:55:13 | INFO | train_inner | epoch 005: 969 / 3002 loss=2.401, ppl=5.28, wps=5820.6, ups=0.09, wpb=64826, bsz=128, num_updates=12903, lr=9.99048e-05, gnorm=1.925, loss_scale=32, train_wall=11, gb_free=2.8, wall=148588 2021-06-20 11:55:25 | INFO | train_inner | epoch 005: 970 / 3002 loss=2.509, ppl=5.69, wps=5798.7, ups=0.09, wpb=64895, bsz=128, num_updates=12904, lr=9.99048e-05, gnorm=2.003, loss_scale=32, train_wall=11, gb_free=2.8, wall=148599 2021-06-20 11:55:36 | INFO | train_inner | epoch 005: 971 / 3002 loss=2.525, ppl=5.76, wps=5849.8, ups=0.09, wpb=64862, bsz=128, num_updates=12905, lr=9.99048e-05, gnorm=1.992, loss_scale=32, train_wall=11, gb_free=2.8, wall=148610 2021-06-20 11:55:47 | INFO | train_inner | epoch 005: 972 / 3002 loss=2.496, ppl=5.64, wps=5927.2, ups=0.09, wpb=64845, bsz=128, num_updates=12906, lr=9.99047e-05, gnorm=1.986, loss_scale=32, train_wall=10, gb_free=2.8, wall=148621 2021-06-20 11:55:58 | INFO | train_inner | epoch 005: 973 / 3002 loss=2.615, ppl=6.13, wps=5843.9, ups=0.09, wpb=64816, bsz=128, num_updates=12907, lr=9.99047e-05, gnorm=1.993, loss_scale=32, train_wall=11, gb_free=2.8, wall=148632 2021-06-20 11:56:09 | INFO | train_inner | epoch 005: 974 / 3002 loss=2.448, ppl=5.46, wps=5834.7, ups=0.09, wpb=64865, bsz=128, num_updates=12908, lr=9.99047e-05, gnorm=1.98, loss_scale=32, train_wall=11, gb_free=2.8, wall=148643 2021-06-20 11:56:20 | INFO | train_inner | epoch 005: 975 / 3002 loss=2.651, ppl=6.28, wps=5790.4, ups=0.09, wpb=64890, bsz=128, num_updates=12909, lr=9.99047e-05, gnorm=2.028, loss_scale=32, train_wall=11, gb_free=2.8, wall=148654 2021-06-20 11:56:31 | INFO | train_inner | epoch 005: 976 / 3002 loss=2.64, ppl=6.24, wps=5800.4, ups=0.09, wpb=64816, bsz=128, num_updates=12910, lr=9.99047e-05, gnorm=2.076, loss_scale=32, train_wall=11, gb_free=2.8, wall=148666 2021-06-20 11:56:42 | INFO | train_inner | epoch 005: 977 / 3002 loss=2.564, ppl=5.91, wps=5942.7, ups=0.09, wpb=64793, bsz=128, num_updates=12911, lr=9.99047e-05, gnorm=1.887, loss_scale=32, train_wall=10, gb_free=2.8, wall=148676 2021-06-20 11:56:53 | INFO | train_inner | epoch 005: 978 / 3002 loss=2.596, ppl=6.05, wps=5750.3, ups=0.09, wpb=64728, bsz=128, num_updates=12912, lr=9.99047e-05, gnorm=1.954, loss_scale=32, train_wall=11, gb_free=2.8, wall=148688 2021-06-20 11:57:05 | INFO | train_inner | epoch 005: 979 / 3002 loss=2.526, ppl=5.76, wps=5836.1, ups=0.09, wpb=64827, bsz=128, num_updates=12913, lr=9.99047e-05, gnorm=2.001, loss_scale=32, train_wall=11, gb_free=2.8, wall=148699 2021-06-20 11:57:16 | INFO | train_inner | epoch 005: 980 / 3002 loss=2.622, ppl=6.16, wps=5702.2, ups=0.09, wpb=64763, bsz=128, num_updates=12914, lr=9.99047e-05, gnorm=1.975, loss_scale=32, train_wall=11, gb_free=2.8, wall=148710 2021-06-20 11:57:27 | INFO | train_inner | epoch 005: 981 / 3002 loss=2.514, ppl=5.71, wps=5898.6, ups=0.09, wpb=64798, bsz=128, num_updates=12915, lr=9.99047e-05, gnorm=1.962, loss_scale=32, train_wall=11, gb_free=2.8, wall=148721 2021-06-20 11:57:38 | INFO | train_inner | epoch 005: 982 / 3002 loss=2.495, ppl=5.64, wps=5824.7, ups=0.09, wpb=64846, bsz=128, num_updates=12916, lr=9.99047e-05, gnorm=1.923, loss_scale=32, train_wall=11, gb_free=2.8, wall=148732 2021-06-20 11:57:49 | INFO | train_inner | epoch 005: 983 / 3002 loss=2.566, ppl=5.92, wps=5734.4, ups=0.09, wpb=64806, bsz=128, num_updates=12917, lr=9.99047e-05, gnorm=1.972, loss_scale=32, train_wall=11, gb_free=2.8, wall=148744 2021-06-20 11:58:01 | INFO | train_inner | epoch 005: 984 / 3002 loss=2.516, ppl=5.72, wps=5785.6, ups=0.09, wpb=64849, bsz=128, num_updates=12918, lr=9.99046e-05, gnorm=1.975, loss_scale=32, train_wall=11, gb_free=2.8, wall=148755 2021-06-20 11:58:12 | INFO | train_inner | epoch 005: 985 / 3002 loss=2.516, ppl=5.72, wps=5783.6, ups=0.09, wpb=64899, bsz=128, num_updates=12919, lr=9.99046e-05, gnorm=1.977, loss_scale=32, train_wall=11, gb_free=2.8, wall=148766 2021-06-20 11:58:23 | INFO | train_inner | epoch 005: 986 / 3002 loss=2.563, ppl=5.91, wps=5881.1, ups=0.09, wpb=64917, bsz=128, num_updates=12920, lr=9.99046e-05, gnorm=1.987, loss_scale=32, train_wall=11, gb_free=2.8, wall=148777 2021-06-20 11:58:34 | INFO | train_inner | epoch 005: 987 / 3002 loss=2.608, ppl=6.1, wps=5783.4, ups=0.09, wpb=64849, bsz=128, num_updates=12921, lr=9.99046e-05, gnorm=1.945, loss_scale=32, train_wall=11, gb_free=2.8, wall=148788 2021-06-20 11:58:45 | INFO | train_inner | epoch 005: 988 / 3002 loss=2.554, ppl=5.87, wps=5837.9, ups=0.09, wpb=64836, bsz=128, num_updates=12922, lr=9.99046e-05, gnorm=1.937, loss_scale=32, train_wall=11, gb_free=2.8, wall=148799 2021-06-20 11:58:56 | INFO | train_inner | epoch 005: 989 / 3002 loss=2.55, ppl=5.86, wps=5849.8, ups=0.09, wpb=64752, bsz=128, num_updates=12923, lr=9.99046e-05, gnorm=1.967, loss_scale=32, train_wall=11, gb_free=2.8, wall=148811 2021-06-20 11:59:07 | INFO | train_inner | epoch 005: 990 / 3002 loss=2.497, ppl=5.64, wps=5801.5, ups=0.09, wpb=64779, bsz=128, num_updates=12924, lr=9.99046e-05, gnorm=1.991, loss_scale=32, train_wall=11, gb_free=2.8, wall=148822 2021-06-20 11:59:18 | INFO | train_inner | epoch 005: 991 / 3002 loss=2.559, ppl=5.89, wps=6034.6, ups=0.09, wpb=64803, bsz=128, num_updates=12925, lr=9.99046e-05, gnorm=1.983, loss_scale=32, train_wall=10, gb_free=2.8, wall=148832 2021-06-20 11:59:29 | INFO | train_inner | epoch 005: 992 / 3002 loss=2.461, ppl=5.5, wps=5845.2, ups=0.09, wpb=64819, bsz=128, num_updates=12926, lr=9.99046e-05, gnorm=1.94, loss_scale=32, train_wall=11, gb_free=2.8, wall=148843 2021-06-20 11:59:40 | INFO | train_inner | epoch 005: 993 / 3002 loss=2.391, ppl=5.25, wps=5867.5, ups=0.09, wpb=64877, bsz=128, num_updates=12927, lr=9.99046e-05, gnorm=2.047, loss_scale=32, train_wall=11, gb_free=2.8, wall=148855 2021-06-20 11:59:51 | INFO | train_inner | epoch 005: 994 / 3002 loss=2.543, ppl=5.83, wps=5808.5, ups=0.09, wpb=64805, bsz=128, num_updates=12928, lr=9.99046e-05, gnorm=2.044, loss_scale=32, train_wall=11, gb_free=2.8, wall=148866 2021-06-20 12:00:02 | INFO | train_inner | epoch 005: 995 / 3002 loss=2.585, ppl=6, wps=5925.5, ups=0.09, wpb=64777, bsz=128, num_updates=12929, lr=9.99046e-05, gnorm=2.035, loss_scale=32, train_wall=10, gb_free=2.8, wall=148877 2021-06-20 12:00:13 | INFO | train_inner | epoch 005: 996 / 3002 loss=2.53, ppl=5.77, wps=5806.8, ups=0.09, wpb=64811, bsz=128, num_updates=12930, lr=9.99046e-05, gnorm=2.084, loss_scale=32, train_wall=11, gb_free=2.8, wall=148888 2021-06-20 12:00:25 | INFO | train_inner | epoch 005: 997 / 3002 loss=2.485, ppl=5.6, wps=5712.8, ups=0.09, wpb=64736, bsz=128, num_updates=12931, lr=9.99045e-05, gnorm=1.895, loss_scale=32, train_wall=11, gb_free=2.8, wall=148899 2021-06-20 12:00:36 | INFO | train_inner | epoch 005: 998 / 3002 loss=2.486, ppl=5.6, wps=5740.6, ups=0.09, wpb=64864, bsz=128, num_updates=12932, lr=9.99045e-05, gnorm=1.929, loss_scale=32, train_wall=11, gb_free=2.8, wall=148910 2021-06-20 12:00:47 | INFO | train_inner | epoch 005: 999 / 3002 loss=2.525, ppl=5.76, wps=5771.3, ups=0.09, wpb=64846, bsz=128, num_updates=12933, lr=9.99045e-05, gnorm=1.943, loss_scale=32, train_wall=11, gb_free=2.8, wall=148922 2021-06-20 12:00:59 | INFO | train_inner | epoch 005: 1000 / 3002 loss=2.562, ppl=5.91, wps=5798.6, ups=0.09, wpb=64736, bsz=128, num_updates=12934, lr=9.99045e-05, gnorm=1.973, loss_scale=32, train_wall=11, gb_free=2.8, wall=148933 2021-06-20 12:01:10 | INFO | train_inner | epoch 005: 1001 / 3002 loss=2.441, ppl=5.43, wps=5746.9, ups=0.09, wpb=64831, bsz=128, num_updates=12935, lr=9.99045e-05, gnorm=1.921, loss_scale=32, train_wall=11, gb_free=2.8, wall=148944 2021-06-20 12:01:21 | INFO | train_inner | epoch 005: 1002 / 3002 loss=2.497, ppl=5.65, wps=5861.7, ups=0.09, wpb=64827, bsz=128, num_updates=12936, lr=9.99045e-05, gnorm=1.973, loss_scale=32, train_wall=11, gb_free=2.8, wall=148955 2021-06-20 12:01:32 | INFO | train_inner | epoch 005: 1003 / 3002 loss=2.487, ppl=5.61, wps=5821.5, ups=0.09, wpb=64818, bsz=128, num_updates=12937, lr=9.99045e-05, gnorm=2.003, loss_scale=32, train_wall=11, gb_free=2.8, wall=148966 2021-06-20 12:01:43 | INFO | train_inner | epoch 005: 1004 / 3002 loss=2.548, ppl=5.85, wps=5871.3, ups=0.09, wpb=64827, bsz=128, num_updates=12938, lr=9.99045e-05, gnorm=2.007, loss_scale=32, train_wall=11, gb_free=2.8, wall=148977 2021-06-20 12:01:54 | INFO | train_inner | epoch 005: 1005 / 3002 loss=2.528, ppl=5.77, wps=5905, ups=0.09, wpb=64811, bsz=128, num_updates=12939, lr=9.99045e-05, gnorm=2.054, loss_scale=32, train_wall=11, gb_free=2.8, wall=148988 2021-06-20 12:02:05 | INFO | train_inner | epoch 005: 1006 / 3002 loss=2.552, ppl=5.87, wps=5747.7, ups=0.09, wpb=64804, bsz=128, num_updates=12940, lr=9.99045e-05, gnorm=2.042, loss_scale=32, train_wall=11, gb_free=2.8, wall=149000 2021-06-20 12:02:16 | INFO | train_inner | epoch 005: 1007 / 3002 loss=2.575, ppl=5.96, wps=5775.6, ups=0.09, wpb=64822, bsz=128, num_updates=12941, lr=9.99045e-05, gnorm=2.014, loss_scale=32, train_wall=11, gb_free=2.8, wall=149011 2021-06-20 12:02:28 | INFO | train_inner | epoch 005: 1008 / 3002 loss=2.484, ppl=5.6, wps=5848, ups=0.09, wpb=64834, bsz=128, num_updates=12942, lr=9.99045e-05, gnorm=2, loss_scale=32, train_wall=11, gb_free=2.8, wall=149022 2021-06-20 12:02:39 | INFO | train_inner | epoch 005: 1009 / 3002 loss=2.511, ppl=5.7, wps=5785.7, ups=0.09, wpb=64815, bsz=128, num_updates=12943, lr=9.99044e-05, gnorm=1.921, loss_scale=32, train_wall=11, gb_free=2.8, wall=149033 2021-06-20 12:02:50 | INFO | train_inner | epoch 005: 1010 / 3002 loss=2.572, ppl=5.95, wps=5744.2, ups=0.09, wpb=64755, bsz=128, num_updates=12944, lr=9.99044e-05, gnorm=2.008, loss_scale=32, train_wall=11, gb_free=2.8, wall=149044 2021-06-20 12:03:01 | INFO | train_inner | epoch 005: 1011 / 3002 loss=2.545, ppl=5.84, wps=5749.9, ups=0.09, wpb=64886, bsz=128, num_updates=12945, lr=9.99044e-05, gnorm=2.018, loss_scale=32, train_wall=11, gb_free=2.8, wall=149056 2021-06-20 12:03:13 | INFO | train_inner | epoch 005: 1012 / 3002 loss=2.375, ppl=5.19, wps=5684.4, ups=0.09, wpb=64801, bsz=128, num_updates=12946, lr=9.99044e-05, gnorm=1.968, loss_scale=32, train_wall=11, gb_free=2.8, wall=149067 2021-06-20 12:03:24 | INFO | train_inner | epoch 005: 1013 / 3002 loss=2.522, ppl=5.74, wps=5845.7, ups=0.09, wpb=64866, bsz=128, num_updates=12947, lr=9.99044e-05, gnorm=1.99, loss_scale=32, train_wall=11, gb_free=2.8, wall=149078 2021-06-20 12:03:35 | INFO | train_inner | epoch 005: 1014 / 3002 loss=2.666, ppl=6.35, wps=5762.9, ups=0.09, wpb=64791, bsz=128, num_updates=12948, lr=9.99044e-05, gnorm=1.988, loss_scale=32, train_wall=11, gb_free=2.8, wall=149089 2021-06-20 12:03:46 | INFO | train_inner | epoch 005: 1015 / 3002 loss=2.465, ppl=5.52, wps=5812.6, ups=0.09, wpb=64811, bsz=128, num_updates=12949, lr=9.99044e-05, gnorm=1.978, loss_scale=32, train_wall=11, gb_free=2.8, wall=149101 2021-06-20 12:03:57 | INFO | train_inner | epoch 005: 1016 / 3002 loss=2.411, ppl=5.32, wps=5839.6, ups=0.09, wpb=64777, bsz=128, num_updates=12950, lr=9.99044e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=149112 2021-06-20 12:04:08 | INFO | train_inner | epoch 005: 1017 / 3002 loss=2.52, ppl=5.73, wps=5893.2, ups=0.09, wpb=64750, bsz=128, num_updates=12951, lr=9.99044e-05, gnorm=1.962, loss_scale=32, train_wall=11, gb_free=2.8, wall=149123 2021-06-20 12:04:19 | INFO | train_inner | epoch 005: 1018 / 3002 loss=2.534, ppl=5.79, wps=5903.8, ups=0.09, wpb=64831, bsz=128, num_updates=12952, lr=9.99044e-05, gnorm=2.045, loss_scale=32, train_wall=11, gb_free=2.8, wall=149134 2021-06-20 12:04:30 | INFO | train_inner | epoch 005: 1019 / 3002 loss=2.601, ppl=6.07, wps=5938.2, ups=0.09, wpb=64816, bsz=128, num_updates=12953, lr=9.99044e-05, gnorm=1.94, loss_scale=32, train_wall=10, gb_free=2.8, wall=149145 2021-06-20 12:04:41 | INFO | train_inner | epoch 005: 1020 / 3002 loss=2.651, ppl=6.28, wps=5837.5, ups=0.09, wpb=64850, bsz=128, num_updates=12954, lr=9.99044e-05, gnorm=2.113, loss_scale=32, train_wall=11, gb_free=2.8, wall=149156 2021-06-20 12:04:53 | INFO | train_inner | epoch 005: 1021 / 3002 loss=2.483, ppl=5.59, wps=5754.6, ups=0.09, wpb=64850, bsz=128, num_updates=12955, lr=9.99044e-05, gnorm=1.971, loss_scale=32, train_wall=11, gb_free=2.8, wall=149167 2021-06-20 12:05:04 | INFO | train_inner | epoch 005: 1022 / 3002 loss=2.456, ppl=5.49, wps=5838.9, ups=0.09, wpb=64784, bsz=128, num_updates=12956, lr=9.99043e-05, gnorm=1.948, loss_scale=32, train_wall=11, gb_free=2.8, wall=149178 2021-06-20 12:05:15 | INFO | train_inner | epoch 005: 1023 / 3002 loss=2.42, ppl=5.35, wps=5689.4, ups=0.09, wpb=64809, bsz=128, num_updates=12957, lr=9.99043e-05, gnorm=1.905, loss_scale=32, train_wall=11, gb_free=2.8, wall=149189 2021-06-20 12:05:26 | INFO | train_inner | epoch 005: 1024 / 3002 loss=2.577, ppl=5.97, wps=5709, ups=0.09, wpb=64852, bsz=128, num_updates=12958, lr=9.99043e-05, gnorm=2.062, loss_scale=32, train_wall=11, gb_free=2.8, wall=149201 2021-06-20 12:05:38 | INFO | train_inner | epoch 005: 1025 / 3002 loss=2.589, ppl=6.02, wps=5667.1, ups=0.09, wpb=64787, bsz=128, num_updates=12959, lr=9.99043e-05, gnorm=1.963, loss_scale=32, train_wall=11, gb_free=2.8, wall=149212 2021-06-20 12:05:49 | INFO | train_inner | epoch 005: 1026 / 3002 loss=2.677, ppl=6.39, wps=5830.2, ups=0.09, wpb=64761, bsz=128, num_updates=12960, lr=9.99043e-05, gnorm=1.982, loss_scale=32, train_wall=11, gb_free=2.8, wall=149223 2021-06-20 12:06:00 | INFO | train_inner | epoch 005: 1027 / 3002 loss=2.7, ppl=6.5, wps=5716.5, ups=0.09, wpb=64805, bsz=128, num_updates=12961, lr=9.99043e-05, gnorm=1.966, loss_scale=32, train_wall=11, gb_free=2.8, wall=149235 2021-06-20 12:06:11 | INFO | train_inner | epoch 005: 1028 / 3002 loss=2.592, ppl=6.03, wps=5856.7, ups=0.09, wpb=64748, bsz=128, num_updates=12962, lr=9.99043e-05, gnorm=2.019, loss_scale=32, train_wall=11, gb_free=2.8, wall=149246 2021-06-20 12:06:22 | INFO | train_inner | epoch 005: 1029 / 3002 loss=2.442, ppl=5.44, wps=5835.5, ups=0.09, wpb=64911, bsz=128, num_updates=12963, lr=9.99043e-05, gnorm=2.139, loss_scale=32, train_wall=11, gb_free=2.8, wall=149257 2021-06-20 12:06:34 | INFO | train_inner | epoch 005: 1030 / 3002 loss=2.488, ppl=5.61, wps=5749.9, ups=0.09, wpb=64845, bsz=128, num_updates=12964, lr=9.99043e-05, gnorm=1.959, loss_scale=32, train_wall=11, gb_free=2.8, wall=149268 2021-06-20 12:06:45 | INFO | train_inner | epoch 005: 1031 / 3002 loss=2.572, ppl=5.95, wps=5803.3, ups=0.09, wpb=64900, bsz=128, num_updates=12965, lr=9.99043e-05, gnorm=2.014, loss_scale=32, train_wall=11, gb_free=2.8, wall=149279 2021-06-20 12:06:56 | INFO | train_inner | epoch 005: 1032 / 3002 loss=2.615, ppl=6.13, wps=5897.3, ups=0.09, wpb=64844, bsz=128, num_updates=12966, lr=9.99043e-05, gnorm=2.07, loss_scale=32, train_wall=11, gb_free=2.8, wall=149290 2021-06-20 12:07:07 | INFO | train_inner | epoch 005: 1033 / 3002 loss=2.683, ppl=6.42, wps=5827.4, ups=0.09, wpb=64856, bsz=128, num_updates=12967, lr=9.99043e-05, gnorm=2.007, loss_scale=32, train_wall=11, gb_free=2.8, wall=149301 2021-06-20 12:07:18 | INFO | train_inner | epoch 005: 1034 / 3002 loss=2.499, ppl=5.65, wps=5925.3, ups=0.09, wpb=64803, bsz=128, num_updates=12968, lr=9.99042e-05, gnorm=1.969, loss_scale=32, train_wall=10, gb_free=2.8, wall=149312 2021-06-20 12:07:29 | INFO | train_inner | epoch 005: 1035 / 3002 loss=2.5, ppl=5.66, wps=6016.6, ups=0.09, wpb=64860, bsz=128, num_updates=12969, lr=9.99042e-05, gnorm=1.894, loss_scale=64, train_wall=10, gb_free=2.8, wall=149323 2021-06-20 12:07:40 | INFO | train_inner | epoch 005: 1036 / 3002 loss=2.572, ppl=5.95, wps=5888.7, ups=0.09, wpb=64798, bsz=128, num_updates=12970, lr=9.99042e-05, gnorm=1.966, loss_scale=64, train_wall=11, gb_free=2.8, wall=149334 2021-06-20 12:07:51 | INFO | train_inner | epoch 005: 1037 / 3002 loss=2.396, ppl=5.26, wps=5733.6, ups=0.09, wpb=64832, bsz=128, num_updates=12971, lr=9.99042e-05, gnorm=1.828, loss_scale=64, train_wall=11, gb_free=2.8, wall=149345 2021-06-20 12:08:02 | INFO | train_inner | epoch 005: 1038 / 3002 loss=2.38, ppl=5.21, wps=5754.4, ups=0.09, wpb=64872, bsz=128, num_updates=12972, lr=9.99042e-05, gnorm=2.041, loss_scale=64, train_wall=11, gb_free=2.8, wall=149357 2021-06-20 12:08:13 | INFO | train_inner | epoch 005: 1039 / 3002 loss=2.499, ppl=5.65, wps=5838.5, ups=0.09, wpb=64831, bsz=128, num_updates=12973, lr=9.99042e-05, gnorm=2.002, loss_scale=64, train_wall=11, gb_free=2.8, wall=149368 2021-06-20 12:08:25 | INFO | train_inner | epoch 005: 1040 / 3002 loss=2.594, ppl=6.04, wps=5852.5, ups=0.09, wpb=64875, bsz=128, num_updates=12974, lr=9.99042e-05, gnorm=2.006, loss_scale=64, train_wall=11, gb_free=2.8, wall=149379 2021-06-20 12:08:36 | INFO | train_inner | epoch 005: 1041 / 3002 loss=2.391, ppl=5.25, wps=5755.4, ups=0.09, wpb=64736, bsz=128, num_updates=12975, lr=9.99042e-05, gnorm=1.962, loss_scale=64, train_wall=11, gb_free=2.8, wall=149390 2021-06-20 12:08:47 | INFO | train_inner | epoch 005: 1042 / 3002 loss=2.59, ppl=6.02, wps=5971.1, ups=0.09, wpb=64859, bsz=128, num_updates=12976, lr=9.99042e-05, gnorm=1.945, loss_scale=64, train_wall=10, gb_free=2.8, wall=149401 2021-06-20 12:08:58 | INFO | train_inner | epoch 005: 1043 / 3002 loss=2.602, ppl=6.07, wps=5738.6, ups=0.09, wpb=64809, bsz=128, num_updates=12977, lr=9.99042e-05, gnorm=2.022, loss_scale=64, train_wall=11, gb_free=2.8, wall=149412 2021-06-20 12:09:09 | INFO | train_inner | epoch 005: 1044 / 3002 loss=2.694, ppl=6.47, wps=5810.9, ups=0.09, wpb=64870, bsz=128, num_updates=12978, lr=9.99042e-05, gnorm=2.021, loss_scale=64, train_wall=11, gb_free=2.8, wall=149423 2021-06-20 12:09:20 | INFO | train_inner | epoch 005: 1045 / 3002 loss=2.42, ppl=5.35, wps=5850.3, ups=0.09, wpb=64869, bsz=128, num_updates=12979, lr=9.99042e-05, gnorm=2.018, loss_scale=64, train_wall=11, gb_free=2.8, wall=149435 2021-06-20 12:09:31 | INFO | train_inner | epoch 005: 1046 / 3002 loss=2.645, ppl=6.26, wps=6000.5, ups=0.09, wpb=64818, bsz=128, num_updates=12980, lr=9.99042e-05, gnorm=1.949, loss_scale=64, train_wall=10, gb_free=2.8, wall=149445 2021-06-20 12:09:42 | INFO | train_inner | epoch 005: 1047 / 3002 loss=2.531, ppl=5.78, wps=5772, ups=0.09, wpb=64866, bsz=128, num_updates=12981, lr=9.99041e-05, gnorm=1.938, loss_scale=64, train_wall=11, gb_free=2.8, wall=149457 2021-06-20 12:09:53 | INFO | train_inner | epoch 005: 1048 / 3002 loss=2.559, ppl=5.89, wps=6022.3, ups=0.09, wpb=64836, bsz=128, num_updates=12982, lr=9.99041e-05, gnorm=1.957, loss_scale=64, train_wall=10, gb_free=2.8, wall=149467 2021-06-20 12:10:04 | INFO | train_inner | epoch 005: 1049 / 3002 loss=2.51, ppl=5.7, wps=5875.5, ups=0.09, wpb=64920, bsz=128, num_updates=12983, lr=9.99041e-05, gnorm=2.039, loss_scale=64, train_wall=11, gb_free=2.8, wall=149478 2021-06-20 12:10:15 | INFO | train_inner | epoch 005: 1050 / 3002 loss=2.509, ppl=5.69, wps=5870.9, ups=0.09, wpb=64804, bsz=128, num_updates=12984, lr=9.99041e-05, gnorm=2.043, loss_scale=64, train_wall=11, gb_free=2.8, wall=149489 2021-06-20 12:10:26 | INFO | train_inner | epoch 005: 1051 / 3002 loss=2.429, ppl=5.38, wps=5929.1, ups=0.09, wpb=64863, bsz=128, num_updates=12985, lr=9.99041e-05, gnorm=1.914, loss_scale=64, train_wall=10, gb_free=2.8, wall=149500 2021-06-20 12:10:37 | INFO | train_inner | epoch 005: 1052 / 3002 loss=2.509, ppl=5.69, wps=5741.5, ups=0.09, wpb=64830, bsz=128, num_updates=12986, lr=9.99041e-05, gnorm=1.99, loss_scale=64, train_wall=11, gb_free=2.8, wall=149512 2021-06-20 12:10:48 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 2021-06-20 12:11:00 | INFO | train_inner | epoch 005: 1054 / 3002 loss=2.507, ppl=5.68, wps=2894.5, ups=0.04, wpb=64745, bsz=128, num_updates=12987, lr=9.99041e-05, gnorm=1.891, loss_scale=32, train_wall=21, gb_free=2.8, wall=149534 2021-06-20 12:11:11 | INFO | train_inner | epoch 005: 1055 / 3002 loss=2.529, ppl=5.77, wps=5938.4, ups=0.09, wpb=64918, bsz=128, num_updates=12988, lr=9.99041e-05, gnorm=1.988, loss_scale=32, train_wall=10, gb_free=2.8, wall=149545 2021-06-20 12:11:22 | INFO | train_inner | epoch 005: 1056 / 3002 loss=2.539, ppl=5.81, wps=5746.4, ups=0.09, wpb=64792, bsz=128, num_updates=12989, lr=9.99041e-05, gnorm=2.024, loss_scale=32, train_wall=11, gb_free=2.8, wall=149556 2021-06-20 12:11:33 | INFO | train_inner | epoch 005: 1057 / 3002 loss=2.425, ppl=5.37, wps=5808.5, ups=0.09, wpb=64835, bsz=128, num_updates=12990, lr=9.99041e-05, gnorm=1.955, loss_scale=32, train_wall=11, gb_free=2.8, wall=149567 2021-06-20 12:11:44 | INFO | train_inner | epoch 005: 1058 / 3002 loss=2.482, ppl=5.59, wps=5829, ups=0.09, wpb=64761, bsz=128, num_updates=12991, lr=9.99041e-05, gnorm=2.068, loss_scale=32, train_wall=11, gb_free=2.8, wall=149579 2021-06-20 12:11:55 | INFO | train_inner | epoch 005: 1059 / 3002 loss=2.503, ppl=5.67, wps=5823.1, ups=0.09, wpb=64812, bsz=128, num_updates=12992, lr=9.99041e-05, gnorm=2.085, loss_scale=32, train_wall=11, gb_free=2.8, wall=149590 2021-06-20 12:12:06 | INFO | train_inner | epoch 005: 1060 / 3002 loss=2.392, ppl=5.25, wps=5897.3, ups=0.09, wpb=64880, bsz=128, num_updates=12993, lr=9.9904e-05, gnorm=1.945, loss_scale=32, train_wall=11, gb_free=2.8, wall=149601 2021-06-20 12:12:17 | INFO | train_inner | epoch 005: 1061 / 3002 loss=2.557, ppl=5.89, wps=5944.5, ups=0.09, wpb=64808, bsz=128, num_updates=12994, lr=9.9904e-05, gnorm=2.005, loss_scale=32, train_wall=10, gb_free=2.8, wall=149612 2021-06-20 12:12:28 | INFO | train_inner | epoch 005: 1062 / 3002 loss=2.532, ppl=5.78, wps=5867.4, ups=0.09, wpb=64813, bsz=128, num_updates=12995, lr=9.9904e-05, gnorm=1.937, loss_scale=32, train_wall=11, gb_free=2.8, wall=149623 2021-06-20 12:12:39 | INFO | train_inner | epoch 005: 1063 / 3002 loss=2.508, ppl=5.69, wps=5890.5, ups=0.09, wpb=64817, bsz=128, num_updates=12996, lr=9.9904e-05, gnorm=1.939, loss_scale=32, train_wall=11, gb_free=2.8, wall=149634 2021-06-20 12:12:50 | INFO | train_inner | epoch 005: 1064 / 3002 loss=2.504, ppl=5.67, wps=5867.7, ups=0.09, wpb=64766, bsz=128, num_updates=12997, lr=9.9904e-05, gnorm=1.98, loss_scale=32, train_wall=11, gb_free=2.8, wall=149645 2021-06-20 12:13:02 | INFO | train_inner | epoch 005: 1065 / 3002 loss=2.536, ppl=5.8, wps=5731.7, ups=0.09, wpb=64855, bsz=128, num_updates=12998, lr=9.9904e-05, gnorm=1.893, loss_scale=32, train_wall=11, gb_free=2.8, wall=149656 2021-06-20 12:13:12 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 2021-06-20 12:13:23 | INFO | train_inner | epoch 005: 1067 / 3002 loss=2.581, ppl=5.98, wps=2968.6, ups=0.05, wpb=64870, bsz=128, num_updates=12999, lr=9.9904e-05, gnorm=1.984, loss_scale=16, train_wall=21, gb_free=2.8, wall=149678 2021-06-20 12:13:35 | INFO | train_inner | epoch 005: 1068 / 3002 loss=2.614, ppl=6.12, wps=5869.9, ups=0.09, wpb=64822, bsz=128, num_updates=13000, lr=9.9904e-05, gnorm=2.026, loss_scale=16, train_wall=11, gb_free=2.8, wall=149689 2021-06-20 12:13:45 | INFO | train_inner | epoch 005: 1069 / 3002 loss=2.559, ppl=5.89, wps=5969.7, ups=0.09, wpb=64845, bsz=128, num_updates=13001, lr=9.9904e-05, gnorm=1.999, loss_scale=16, train_wall=10, gb_free=2.8, wall=149700 2021-06-20 12:13:57 | INFO | train_inner | epoch 005: 1070 / 3002 loss=2.642, ppl=6.24, wps=5703, ups=0.09, wpb=64813, bsz=128, num_updates=13002, lr=9.9904e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=149711 2021-06-20 12:14:08 | INFO | train_inner | epoch 005: 1071 / 3002 loss=2.486, ppl=5.6, wps=5815.4, ups=0.09, wpb=64879, bsz=128, num_updates=13003, lr=9.9904e-05, gnorm=1.974, loss_scale=16, train_wall=11, gb_free=2.8, wall=149722 2021-06-20 12:14:19 | INFO | train_inner | epoch 005: 1072 / 3002 loss=2.51, ppl=5.7, wps=5928.7, ups=0.09, wpb=64813, bsz=128, num_updates=13004, lr=9.9904e-05, gnorm=1.943, loss_scale=16, train_wall=10, gb_free=2.8, wall=149733 2021-06-20 12:14:30 | INFO | train_inner | epoch 005: 1073 / 3002 loss=2.608, ppl=6.1, wps=5867, ups=0.09, wpb=64829, bsz=128, num_updates=13005, lr=9.9904e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=149744 2021-06-20 12:14:41 | INFO | train_inner | epoch 005: 1074 / 3002 loss=2.365, ppl=5.15, wps=5809.6, ups=0.09, wpb=64751, bsz=128, num_updates=13006, lr=9.99039e-05, gnorm=1.898, loss_scale=16, train_wall=11, gb_free=2.8, wall=149755 2021-06-20 12:14:52 | INFO | train_inner | epoch 005: 1075 / 3002 loss=2.701, ppl=6.5, wps=5835.4, ups=0.09, wpb=64874, bsz=128, num_updates=13007, lr=9.99039e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=149767 2021-06-20 12:15:03 | INFO | train_inner | epoch 005: 1076 / 3002 loss=2.577, ppl=5.97, wps=5875.6, ups=0.09, wpb=64888, bsz=128, num_updates=13008, lr=9.99039e-05, gnorm=1.983, loss_scale=16, train_wall=11, gb_free=2.8, wall=149778 2021-06-20 12:15:14 | INFO | train_inner | epoch 005: 1077 / 3002 loss=2.561, ppl=5.9, wps=5948, ups=0.09, wpb=64910, bsz=128, num_updates=13009, lr=9.99039e-05, gnorm=2.058, loss_scale=16, train_wall=10, gb_free=2.8, wall=149788 2021-06-20 12:15:25 | INFO | train_inner | epoch 005: 1078 / 3002 loss=2.447, ppl=5.45, wps=5766.9, ups=0.09, wpb=64848, bsz=128, num_updates=13010, lr=9.99039e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=149800 2021-06-20 12:15:36 | INFO | train_inner | epoch 005: 1079 / 3002 loss=2.658, ppl=6.31, wps=5873, ups=0.09, wpb=64807, bsz=128, num_updates=13011, lr=9.99039e-05, gnorm=2.034, loss_scale=16, train_wall=11, gb_free=2.8, wall=149811 2021-06-20 12:15:47 | INFO | train_inner | epoch 005: 1080 / 3002 loss=2.67, ppl=6.36, wps=5897.4, ups=0.09, wpb=64883, bsz=128, num_updates=13012, lr=9.99039e-05, gnorm=2.092, loss_scale=16, train_wall=11, gb_free=2.8, wall=149822 2021-06-20 12:15:58 | INFO | train_inner | epoch 005: 1081 / 3002 loss=2.465, ppl=5.52, wps=5851.9, ups=0.09, wpb=64731, bsz=128, num_updates=13013, lr=9.99039e-05, gnorm=2.026, loss_scale=16, train_wall=11, gb_free=2.8, wall=149833 2021-06-20 12:16:10 | INFO | train_inner | epoch 005: 1082 / 3002 loss=2.508, ppl=5.69, wps=5859.5, ups=0.09, wpb=64872, bsz=128, num_updates=13014, lr=9.99039e-05, gnorm=2.034, loss_scale=16, train_wall=11, gb_free=2.8, wall=149844 2021-06-20 12:16:21 | INFO | train_inner | epoch 005: 1083 / 3002 loss=2.441, ppl=5.43, wps=5821.3, ups=0.09, wpb=64791, bsz=128, num_updates=13015, lr=9.99039e-05, gnorm=2.005, loss_scale=16, train_wall=11, gb_free=2.8, wall=149855 2021-06-20 12:16:32 | INFO | train_inner | epoch 005: 1084 / 3002 loss=2.404, ppl=5.29, wps=5849.7, ups=0.09, wpb=64860, bsz=128, num_updates=13016, lr=9.99039e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=149866 2021-06-20 12:16:43 | INFO | train_inner | epoch 005: 1085 / 3002 loss=2.346, ppl=5.08, wps=5957.5, ups=0.09, wpb=64792, bsz=128, num_updates=13017, lr=9.99039e-05, gnorm=2.006, loss_scale=16, train_wall=10, gb_free=2.8, wall=149877 2021-06-20 12:16:54 | INFO | train_inner | epoch 005: 1086 / 3002 loss=2.424, ppl=5.37, wps=5758.3, ups=0.09, wpb=64839, bsz=128, num_updates=13018, lr=9.99038e-05, gnorm=2.119, loss_scale=16, train_wall=11, gb_free=2.8, wall=149888 2021-06-20 12:17:05 | INFO | train_inner | epoch 005: 1087 / 3002 loss=2.673, ppl=6.38, wps=5807.2, ups=0.09, wpb=64804, bsz=128, num_updates=13019, lr=9.99038e-05, gnorm=2.1, loss_scale=16, train_wall=11, gb_free=2.8, wall=149899 2021-06-20 12:17:16 | INFO | train_inner | epoch 005: 1088 / 3002 loss=2.634, ppl=6.21, wps=5823.6, ups=0.09, wpb=64779, bsz=128, num_updates=13020, lr=9.99038e-05, gnorm=2.083, loss_scale=16, train_wall=11, gb_free=2.8, wall=149911 2021-06-20 12:17:27 | INFO | train_inner | epoch 005: 1089 / 3002 loss=2.428, ppl=5.38, wps=5809.9, ups=0.09, wpb=64845, bsz=128, num_updates=13021, lr=9.99038e-05, gnorm=1.996, loss_scale=16, train_wall=11, gb_free=2.8, wall=149922 2021-06-20 12:17:38 | INFO | train_inner | epoch 005: 1090 / 3002 loss=2.529, ppl=5.77, wps=5873, ups=0.09, wpb=64809, bsz=128, num_updates=13022, lr=9.99038e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=149933 2021-06-20 12:17:49 | INFO | train_inner | epoch 005: 1091 / 3002 loss=2.441, ppl=5.43, wps=5896.7, ups=0.09, wpb=64845, bsz=128, num_updates=13023, lr=9.99038e-05, gnorm=1.93, loss_scale=16, train_wall=11, gb_free=2.8, wall=149944 2021-06-20 12:18:00 | INFO | train_inner | epoch 005: 1092 / 3002 loss=2.732, ppl=6.64, wps=5868.2, ups=0.09, wpb=64799, bsz=128, num_updates=13024, lr=9.99038e-05, gnorm=2.035, loss_scale=16, train_wall=11, gb_free=2.8, wall=149955 2021-06-20 12:18:11 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 12:18:23 | INFO | train_inner | epoch 005: 1094 / 3002 loss=2.368, ppl=5.16, wps=2932.5, ups=0.05, wpb=64844, bsz=128, num_updates=13025, lr=9.99038e-05, gnorm=2.24, loss_scale=8, train_wall=21, gb_free=2.8, wall=149977 2021-06-20 12:18:34 | INFO | train_inner | epoch 005: 1095 / 3002 loss=2.675, ppl=6.39, wps=5776.7, ups=0.09, wpb=64860, bsz=128, num_updates=13026, lr=9.99038e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=149988 2021-06-20 12:18:45 | INFO | train_inner | epoch 005: 1096 / 3002 loss=2.438, ppl=5.42, wps=5970.7, ups=0.09, wpb=64861, bsz=128, num_updates=13027, lr=9.99038e-05, gnorm=1.974, loss_scale=8, train_wall=10, gb_free=2.8, wall=149999 2021-06-20 12:18:56 | INFO | train_inner | epoch 005: 1097 / 3002 loss=2.578, ppl=5.97, wps=5733.9, ups=0.09, wpb=64778, bsz=128, num_updates=13028, lr=9.99038e-05, gnorm=2.12, loss_scale=8, train_wall=11, gb_free=2.8, wall=150010 2021-06-20 12:19:07 | INFO | train_inner | epoch 005: 1098 / 3002 loss=2.521, ppl=5.74, wps=5814.4, ups=0.09, wpb=64772, bsz=128, num_updates=13029, lr=9.99038e-05, gnorm=2.551, loss_scale=8, train_wall=11, gb_free=2.8, wall=150021 2021-06-20 12:19:18 | INFO | train_inner | epoch 005: 1099 / 3002 loss=2.46, ppl=5.5, wps=5756.3, ups=0.09, wpb=64741, bsz=128, num_updates=13030, lr=9.99038e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=150033 2021-06-20 12:19:29 | INFO | train_inner | epoch 005: 1100 / 3002 loss=2.516, ppl=5.72, wps=5881.4, ups=0.09, wpb=64886, bsz=128, num_updates=13031, lr=9.99037e-05, gnorm=2.138, loss_scale=8, train_wall=11, gb_free=2.8, wall=150044 2021-06-20 12:19:41 | INFO | train_inner | epoch 005: 1101 / 3002 loss=2.426, ppl=5.37, wps=5792.4, ups=0.09, wpb=64834, bsz=128, num_updates=13032, lr=9.99037e-05, gnorm=2.08, loss_scale=8, train_wall=11, gb_free=2.8, wall=150055 2021-06-20 12:19:52 | INFO | train_inner | epoch 005: 1102 / 3002 loss=2.613, ppl=6.12, wps=5844.9, ups=0.09, wpb=64894, bsz=128, num_updates=13033, lr=9.99037e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=150066 2021-06-20 12:20:03 | INFO | train_inner | epoch 005: 1103 / 3002 loss=2.56, ppl=5.9, wps=5833.2, ups=0.09, wpb=64808, bsz=128, num_updates=13034, lr=9.99037e-05, gnorm=2.524, loss_scale=8, train_wall=11, gb_free=2.8, wall=150077 2021-06-20 12:20:14 | INFO | train_inner | epoch 005: 1104 / 3002 loss=2.684, ppl=6.43, wps=5929.5, ups=0.09, wpb=64824, bsz=128, num_updates=13035, lr=9.99037e-05, gnorm=2.058, loss_scale=8, train_wall=10, gb_free=2.8, wall=150088 2021-06-20 12:20:25 | INFO | train_inner | epoch 005: 1105 / 3002 loss=2.644, ppl=6.25, wps=5981.2, ups=0.09, wpb=64929, bsz=128, num_updates=13036, lr=9.99037e-05, gnorm=2.104, loss_scale=8, train_wall=10, gb_free=2.8, wall=150099 2021-06-20 12:20:35 | INFO | train_inner | epoch 005: 1106 / 3002 loss=2.536, ppl=5.8, wps=5964.4, ups=0.09, wpb=64867, bsz=128, num_updates=13037, lr=9.99037e-05, gnorm=2.089, loss_scale=8, train_wall=10, gb_free=2.8, wall=150110 2021-06-20 12:20:47 | INFO | train_inner | epoch 005: 1107 / 3002 loss=2.585, ppl=6, wps=5818.8, ups=0.09, wpb=64856, bsz=128, num_updates=13038, lr=9.99037e-05, gnorm=2.026, loss_scale=8, train_wall=11, gb_free=2.8, wall=150121 2021-06-20 12:20:58 | INFO | train_inner | epoch 005: 1108 / 3002 loss=2.549, ppl=5.85, wps=5768.9, ups=0.09, wpb=64842, bsz=128, num_updates=13039, lr=9.99037e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=150132 2021-06-20 12:21:09 | INFO | train_inner | epoch 005: 1109 / 3002 loss=2.429, ppl=5.38, wps=5692.6, ups=0.09, wpb=64889, bsz=128, num_updates=13040, lr=9.99037e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=150144 2021-06-20 12:21:20 | INFO | train_inner | epoch 005: 1110 / 3002 loss=2.787, ppl=6.9, wps=5833.9, ups=0.09, wpb=64770, bsz=128, num_updates=13041, lr=9.99037e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=150155 2021-06-20 12:21:31 | INFO | train_inner | epoch 005: 1111 / 3002 loss=2.568, ppl=5.93, wps=5824.9, ups=0.09, wpb=64875, bsz=128, num_updates=13042, lr=9.99037e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=150166 2021-06-20 12:21:43 | INFO | train_inner | epoch 005: 1112 / 3002 loss=2.578, ppl=5.97, wps=5836, ups=0.09, wpb=64762, bsz=128, num_updates=13043, lr=9.99036e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=150177 2021-06-20 12:21:53 | INFO | train_inner | epoch 005: 1113 / 3002 loss=2.372, ppl=5.18, wps=6007.7, ups=0.09, wpb=64803, bsz=128, num_updates=13044, lr=9.99036e-05, gnorm=1.966, loss_scale=8, train_wall=10, gb_free=2.8, wall=150188 2021-06-20 12:22:04 | INFO | train_inner | epoch 005: 1114 / 3002 loss=2.578, ppl=5.97, wps=5877.6, ups=0.09, wpb=64842, bsz=128, num_updates=13045, lr=9.99036e-05, gnorm=2.093, loss_scale=8, train_wall=11, gb_free=2.8, wall=150199 2021-06-20 12:22:15 | INFO | train_inner | epoch 005: 1115 / 3002 loss=2.542, ppl=5.82, wps=6007, ups=0.09, wpb=64885, bsz=128, num_updates=13046, lr=9.99036e-05, gnorm=2.049, loss_scale=8, train_wall=10, gb_free=2.8, wall=150209 2021-06-20 12:22:26 | INFO | train_inner | epoch 005: 1116 / 3002 loss=2.508, ppl=5.69, wps=5768.3, ups=0.09, wpb=64783, bsz=128, num_updates=13047, lr=9.99036e-05, gnorm=2.011, loss_scale=8, train_wall=11, gb_free=2.8, wall=150221 2021-06-20 12:22:37 | INFO | train_inner | epoch 005: 1117 / 3002 loss=2.449, ppl=5.46, wps=5867.4, ups=0.09, wpb=64828, bsz=128, num_updates=13048, lr=9.99036e-05, gnorm=2.033, loss_scale=8, train_wall=11, gb_free=2.8, wall=150232 2021-06-20 12:22:48 | INFO | train_inner | epoch 005: 1118 / 3002 loss=2.559, ppl=5.89, wps=5908.2, ups=0.09, wpb=64934, bsz=128, num_updates=13049, lr=9.99036e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=150243 2021-06-20 12:23:00 | INFO | train_inner | epoch 005: 1119 / 3002 loss=2.414, ppl=5.33, wps=5830, ups=0.09, wpb=64890, bsz=128, num_updates=13050, lr=9.99036e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=150254 2021-06-20 12:23:11 | INFO | train_inner | epoch 005: 1120 / 3002 loss=2.497, ppl=5.64, wps=5879.9, ups=0.09, wpb=64868, bsz=128, num_updates=13051, lr=9.99036e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=150265 2021-06-20 12:23:21 | INFO | train_inner | epoch 005: 1121 / 3002 loss=2.498, ppl=5.65, wps=5994.5, ups=0.09, wpb=64913, bsz=128, num_updates=13052, lr=9.99036e-05, gnorm=1.997, loss_scale=8, train_wall=10, gb_free=2.8, wall=150276 2021-06-20 12:23:32 | INFO | train_inner | epoch 005: 1122 / 3002 loss=2.423, ppl=5.36, wps=5896.5, ups=0.09, wpb=64803, bsz=128, num_updates=13053, lr=9.99036e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=150287 2021-06-20 12:23:43 | INFO | train_inner | epoch 005: 1123 / 3002 loss=2.529, ppl=5.77, wps=5863.8, ups=0.09, wpb=64838, bsz=128, num_updates=13054, lr=9.99036e-05, gnorm=2.008, loss_scale=8, train_wall=11, gb_free=2.8, wall=150298 2021-06-20 12:23:54 | INFO | train_inner | epoch 005: 1124 / 3002 loss=2.499, ppl=5.65, wps=5891.2, ups=0.09, wpb=64767, bsz=128, num_updates=13055, lr=9.99036e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=150309 2021-06-20 12:24:06 | INFO | train_inner | epoch 005: 1125 / 3002 loss=2.587, ppl=6.01, wps=5757.8, ups=0.09, wpb=64840, bsz=128, num_updates=13056, lr=9.99035e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=150320 2021-06-20 12:24:17 | INFO | train_inner | epoch 005: 1126 / 3002 loss=2.564, ppl=5.91, wps=5802.3, ups=0.09, wpb=64839, bsz=128, num_updates=13057, lr=9.99035e-05, gnorm=2.589, loss_scale=8, train_wall=11, gb_free=2.8, wall=150331 2021-06-20 12:24:28 | INFO | train_inner | epoch 005: 1127 / 3002 loss=2.411, ppl=5.32, wps=5953, ups=0.09, wpb=64810, bsz=128, num_updates=13058, lr=9.99035e-05, gnorm=2.044, loss_scale=8, train_wall=10, gb_free=2.8, wall=150342 2021-06-20 12:24:39 | INFO | train_inner | epoch 005: 1128 / 3002 loss=2.587, ppl=6.01, wps=5741.6, ups=0.09, wpb=64787, bsz=128, num_updates=13059, lr=9.99035e-05, gnorm=2.084, loss_scale=8, train_wall=11, gb_free=2.8, wall=150353 2021-06-20 12:24:50 | INFO | train_inner | epoch 005: 1129 / 3002 loss=2.55, ppl=5.86, wps=5897.7, ups=0.09, wpb=64928, bsz=128, num_updates=13060, lr=9.99035e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=150364 2021-06-20 12:25:01 | INFO | train_inner | epoch 005: 1130 / 3002 loss=2.641, ppl=6.24, wps=5846.5, ups=0.09, wpb=64787, bsz=128, num_updates=13061, lr=9.99035e-05, gnorm=2.26, loss_scale=8, train_wall=11, gb_free=2.8, wall=150375 2021-06-20 12:25:12 | INFO | train_inner | epoch 005: 1131 / 3002 loss=2.437, ppl=5.41, wps=5943.4, ups=0.09, wpb=64816, bsz=128, num_updates=13062, lr=9.99035e-05, gnorm=2.042, loss_scale=8, train_wall=10, gb_free=2.8, wall=150386 2021-06-20 12:25:23 | INFO | train_inner | epoch 005: 1132 / 3002 loss=2.47, ppl=5.54, wps=5862.1, ups=0.09, wpb=64765, bsz=128, num_updates=13063, lr=9.99035e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=150397 2021-06-20 12:25:34 | INFO | train_inner | epoch 005: 1133 / 3002 loss=2.557, ppl=5.89, wps=5791.3, ups=0.09, wpb=64886, bsz=128, num_updates=13064, lr=9.99035e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=150409 2021-06-20 12:25:46 | INFO | train_inner | epoch 005: 1134 / 3002 loss=2.526, ppl=5.76, wps=5791.5, ups=0.09, wpb=64764, bsz=128, num_updates=13065, lr=9.99035e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=150420 2021-06-20 12:25:57 | INFO | train_inner | epoch 005: 1135 / 3002 loss=2.632, ppl=6.2, wps=5746.5, ups=0.09, wpb=64849, bsz=128, num_updates=13066, lr=9.99035e-05, gnorm=2.192, loss_scale=8, train_wall=11, gb_free=2.8, wall=150431 2021-06-20 12:26:08 | INFO | train_inner | epoch 005: 1136 / 3002 loss=2.605, ppl=6.08, wps=5789.6, ups=0.09, wpb=64852, bsz=128, num_updates=13067, lr=9.99035e-05, gnorm=2.051, loss_scale=8, train_wall=11, gb_free=2.8, wall=150442 2021-06-20 12:26:19 | INFO | train_inner | epoch 005: 1137 / 3002 loss=2.467, ppl=5.53, wps=5796, ups=0.09, wpb=64862, bsz=128, num_updates=13068, lr=9.99034e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=150454 2021-06-20 12:26:30 | INFO | train_inner | epoch 005: 1138 / 3002 loss=2.416, ppl=5.34, wps=5872.7, ups=0.09, wpb=64865, bsz=128, num_updates=13069, lr=9.99034e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=150465 2021-06-20 12:26:41 | INFO | train_inner | epoch 005: 1139 / 3002 loss=2.57, ppl=5.94, wps=5911, ups=0.09, wpb=64844, bsz=128, num_updates=13070, lr=9.99034e-05, gnorm=2.321, loss_scale=8, train_wall=11, gb_free=2.8, wall=150476 2021-06-20 12:26:52 | INFO | train_inner | epoch 005: 1140 / 3002 loss=2.579, ppl=5.97, wps=5844.8, ups=0.09, wpb=64884, bsz=128, num_updates=13071, lr=9.99034e-05, gnorm=2.195, loss_scale=8, train_wall=11, gb_free=2.8, wall=150487 2021-06-20 12:27:03 | INFO | train_inner | epoch 005: 1141 / 3002 loss=2.5, ppl=5.66, wps=5837, ups=0.09, wpb=64912, bsz=128, num_updates=13072, lr=9.99034e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=150498 2021-06-20 12:27:15 | INFO | train_inner | epoch 005: 1142 / 3002 loss=2.682, ppl=6.42, wps=5744.2, ups=0.09, wpb=64758, bsz=128, num_updates=13073, lr=9.99034e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=150509 2021-06-20 12:27:26 | INFO | train_inner | epoch 005: 1143 / 3002 loss=2.387, ppl=5.23, wps=5823.6, ups=0.09, wpb=64872, bsz=128, num_updates=13074, lr=9.99034e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=150520 2021-06-20 12:27:37 | INFO | train_inner | epoch 005: 1144 / 3002 loss=2.475, ppl=5.56, wps=5885.8, ups=0.09, wpb=64876, bsz=128, num_updates=13075, lr=9.99034e-05, gnorm=1.886, loss_scale=8, train_wall=11, gb_free=2.8, wall=150531 2021-06-20 12:27:48 | INFO | train_inner | epoch 005: 1145 / 3002 loss=2.603, ppl=6.08, wps=5798.2, ups=0.09, wpb=64842, bsz=128, num_updates=13076, lr=9.99034e-05, gnorm=2.028, loss_scale=8, train_wall=11, gb_free=2.8, wall=150542 2021-06-20 12:27:59 | INFO | train_inner | epoch 005: 1146 / 3002 loss=2.382, ppl=5.21, wps=5775.2, ups=0.09, wpb=64842, bsz=128, num_updates=13077, lr=9.99034e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=150554 2021-06-20 12:28:10 | INFO | train_inner | epoch 005: 1147 / 3002 loss=2.508, ppl=5.69, wps=5872.7, ups=0.09, wpb=64829, bsz=128, num_updates=13078, lr=9.99034e-05, gnorm=1.924, loss_scale=8, train_wall=11, gb_free=2.8, wall=150565 2021-06-20 12:28:21 | INFO | train_inner | epoch 005: 1148 / 3002 loss=2.501, ppl=5.66, wps=5964.1, ups=0.09, wpb=64728, bsz=128, num_updates=13079, lr=9.99034e-05, gnorm=2.044, loss_scale=8, train_wall=10, gb_free=2.8, wall=150575 2021-06-20 12:28:32 | INFO | train_inner | epoch 005: 1149 / 3002 loss=2.299, ppl=4.92, wps=5848.1, ups=0.09, wpb=64934, bsz=128, num_updates=13080, lr=9.99034e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=150587 2021-06-20 12:28:43 | INFO | train_inner | epoch 005: 1150 / 3002 loss=2.579, ppl=5.98, wps=5776.2, ups=0.09, wpb=64886, bsz=128, num_updates=13081, lr=9.99033e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=150598 2021-06-20 12:28:55 | INFO | train_inner | epoch 005: 1151 / 3002 loss=2.536, ppl=5.8, wps=5777, ups=0.09, wpb=64749, bsz=128, num_updates=13082, lr=9.99033e-05, gnorm=2.144, loss_scale=8, train_wall=11, gb_free=2.8, wall=150609 2021-06-20 12:29:06 | INFO | train_inner | epoch 005: 1152 / 3002 loss=2.549, ppl=5.85, wps=5911.6, ups=0.09, wpb=64814, bsz=128, num_updates=13083, lr=9.99033e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=150620 2021-06-20 12:29:17 | INFO | train_inner | epoch 005: 1153 / 3002 loss=2.609, ppl=6.1, wps=5797.6, ups=0.09, wpb=64836, bsz=128, num_updates=13084, lr=9.99033e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=150631 2021-06-20 12:29:28 | INFO | train_inner | epoch 005: 1154 / 3002 loss=2.417, ppl=5.34, wps=5934.8, ups=0.09, wpb=64943, bsz=128, num_updates=13085, lr=9.99033e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=150642 2021-06-20 12:29:39 | INFO | train_inner | epoch 005: 1155 / 3002 loss=2.448, ppl=5.46, wps=5852.5, ups=0.09, wpb=64884, bsz=128, num_updates=13086, lr=9.99033e-05, gnorm=2.056, loss_scale=8, train_wall=11, gb_free=2.8, wall=150653 2021-06-20 12:29:50 | INFO | train_inner | epoch 005: 1156 / 3002 loss=2.452, ppl=5.47, wps=5883.4, ups=0.09, wpb=64754, bsz=128, num_updates=13087, lr=9.99033e-05, gnorm=1.934, loss_scale=8, train_wall=11, gb_free=2.8, wall=150664 2021-06-20 12:30:01 | INFO | train_inner | epoch 005: 1157 / 3002 loss=2.469, ppl=5.54, wps=5839.9, ups=0.09, wpb=64864, bsz=128, num_updates=13088, lr=9.99033e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=150675 2021-06-20 12:30:12 | INFO | train_inner | epoch 005: 1158 / 3002 loss=2.369, ppl=5.17, wps=5803.8, ups=0.09, wpb=64831, bsz=128, num_updates=13089, lr=9.99033e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=150687 2021-06-20 12:30:23 | INFO | train_inner | epoch 005: 1159 / 3002 loss=2.473, ppl=5.55, wps=5893.8, ups=0.09, wpb=64871, bsz=128, num_updates=13090, lr=9.99033e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=150698 2021-06-20 12:30:34 | INFO | train_inner | epoch 005: 1160 / 3002 loss=2.522, ppl=5.75, wps=5749.3, ups=0.09, wpb=64770, bsz=128, num_updates=13091, lr=9.99033e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=150709 2021-06-20 12:30:46 | INFO | train_inner | epoch 005: 1161 / 3002 loss=2.544, ppl=5.83, wps=5778.2, ups=0.09, wpb=64831, bsz=128, num_updates=13092, lr=9.99033e-05, gnorm=2.091, loss_scale=8, train_wall=11, gb_free=2.8, wall=150720 2021-06-20 12:30:57 | INFO | train_inner | epoch 005: 1162 / 3002 loss=2.5, ppl=5.66, wps=5828.8, ups=0.09, wpb=64791, bsz=128, num_updates=13093, lr=9.99032e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=150731 2021-06-20 12:31:08 | INFO | train_inner | epoch 005: 1163 / 3002 loss=2.544, ppl=5.83, wps=5836.5, ups=0.09, wpb=64844, bsz=128, num_updates=13094, lr=9.99032e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=150742 2021-06-20 12:31:19 | INFO | train_inner | epoch 005: 1164 / 3002 loss=2.548, ppl=5.85, wps=5802, ups=0.09, wpb=64800, bsz=128, num_updates=13095, lr=9.99032e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=150753 2021-06-20 12:31:30 | INFO | train_inner | epoch 005: 1165 / 3002 loss=2.471, ppl=5.55, wps=5914.4, ups=0.09, wpb=64965, bsz=128, num_updates=13096, lr=9.99032e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=150764 2021-06-20 12:31:41 | INFO | train_inner | epoch 005: 1166 / 3002 loss=2.432, ppl=5.4, wps=5788.2, ups=0.09, wpb=64766, bsz=128, num_updates=13097, lr=9.99032e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=150776 2021-06-20 12:31:52 | INFO | train_inner | epoch 005: 1167 / 3002 loss=2.499, ppl=5.65, wps=5818.6, ups=0.09, wpb=64794, bsz=128, num_updates=13098, lr=9.99032e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=150787 2021-06-20 12:32:04 | INFO | train_inner | epoch 005: 1168 / 3002 loss=2.697, ppl=6.49, wps=5790, ups=0.09, wpb=64848, bsz=128, num_updates=13099, lr=9.99032e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=150798 2021-06-20 12:32:15 | INFO | train_inner | epoch 005: 1169 / 3002 loss=2.518, ppl=5.73, wps=5903.1, ups=0.09, wpb=64928, bsz=128, num_updates=13100, lr=9.99032e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=150809 2021-06-20 12:32:26 | INFO | train_inner | epoch 005: 1170 / 3002 loss=2.593, ppl=6.03, wps=5863.5, ups=0.09, wpb=64938, bsz=128, num_updates=13101, lr=9.99032e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=150820 2021-06-20 12:32:36 | INFO | train_inner | epoch 005: 1171 / 3002 loss=2.531, ppl=5.78, wps=6044, ups=0.09, wpb=64852, bsz=128, num_updates=13102, lr=9.99032e-05, gnorm=2.012, loss_scale=8, train_wall=10, gb_free=2.8, wall=150831 2021-06-20 12:32:47 | INFO | train_inner | epoch 005: 1172 / 3002 loss=2.526, ppl=5.76, wps=5921.3, ups=0.09, wpb=64760, bsz=128, num_updates=13103, lr=9.99032e-05, gnorm=1.956, loss_scale=8, train_wall=10, gb_free=2.8, wall=150842 2021-06-20 12:32:59 | INFO | train_inner | epoch 005: 1173 / 3002 loss=2.622, ppl=6.16, wps=5739.9, ups=0.09, wpb=64705, bsz=128, num_updates=13104, lr=9.99032e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=150853 2021-06-20 12:33:10 | INFO | train_inner | epoch 005: 1174 / 3002 loss=2.647, ppl=6.27, wps=5909.4, ups=0.09, wpb=64776, bsz=128, num_updates=13105, lr=9.99032e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=150864 2021-06-20 12:33:20 | INFO | train_inner | epoch 005: 1175 / 3002 loss=2.51, ppl=5.7, wps=5966.6, ups=0.09, wpb=64853, bsz=128, num_updates=13106, lr=9.99031e-05, gnorm=1.892, loss_scale=8, train_wall=10, gb_free=2.8, wall=150875 2021-06-20 12:33:31 | INFO | train_inner | epoch 005: 1176 / 3002 loss=2.393, ppl=5.25, wps=5919.8, ups=0.09, wpb=64890, bsz=128, num_updates=13107, lr=9.99031e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=150886 2021-06-20 12:33:43 | INFO | train_inner | epoch 005: 1177 / 3002 loss=2.432, ppl=5.4, wps=5821.8, ups=0.09, wpb=64840, bsz=128, num_updates=13108, lr=9.99031e-05, gnorm=2.249, loss_scale=8, train_wall=11, gb_free=2.8, wall=150897 2021-06-20 12:33:54 | INFO | train_inner | epoch 005: 1178 / 3002 loss=2.582, ppl=5.99, wps=5858.1, ups=0.09, wpb=64793, bsz=128, num_updates=13109, lr=9.99031e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=150908 2021-06-20 12:34:05 | INFO | train_inner | epoch 005: 1179 / 3002 loss=2.562, ppl=5.9, wps=5926.6, ups=0.09, wpb=64814, bsz=128, num_updates=13110, lr=9.99031e-05, gnorm=2.111, loss_scale=8, train_wall=10, gb_free=2.8, wall=150919 2021-06-20 12:34:15 | INFO | train_inner | epoch 005: 1180 / 3002 loss=2.246, ppl=4.74, wps=6048, ups=0.09, wpb=64812, bsz=128, num_updates=13111, lr=9.99031e-05, gnorm=1.97, loss_scale=8, train_wall=10, gb_free=2.8, wall=150930 2021-06-20 12:34:26 | INFO | train_inner | epoch 005: 1181 / 3002 loss=2.446, ppl=5.45, wps=5859, ups=0.09, wpb=64853, bsz=128, num_updates=13112, lr=9.99031e-05, gnorm=1.924, loss_scale=8, train_wall=11, gb_free=2.8, wall=150941 2021-06-20 12:34:37 | INFO | train_inner | epoch 005: 1182 / 3002 loss=2.341, ppl=5.07, wps=5870.2, ups=0.09, wpb=64835, bsz=128, num_updates=13113, lr=9.99031e-05, gnorm=1.932, loss_scale=8, train_wall=11, gb_free=2.8, wall=150952 2021-06-20 12:34:48 | INFO | train_inner | epoch 005: 1183 / 3002 loss=2.75, ppl=6.73, wps=5826, ups=0.09, wpb=64841, bsz=128, num_updates=13114, lr=9.99031e-05, gnorm=2.058, loss_scale=8, train_wall=11, gb_free=2.8, wall=150963 2021-06-20 12:34:59 | INFO | train_inner | epoch 005: 1184 / 3002 loss=2.505, ppl=5.67, wps=5914.9, ups=0.09, wpb=64888, bsz=128, num_updates=13115, lr=9.99031e-05, gnorm=1.95, loss_scale=8, train_wall=11, gb_free=2.8, wall=150974 2021-06-20 12:35:11 | INFO | train_inner | epoch 005: 1185 / 3002 loss=2.564, ppl=5.91, wps=5842.6, ups=0.09, wpb=64873, bsz=128, num_updates=13116, lr=9.99031e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=150985 2021-06-20 12:35:21 | INFO | train_inner | epoch 005: 1186 / 3002 loss=2.504, ppl=5.67, wps=6059.7, ups=0.09, wpb=64783, bsz=128, num_updates=13117, lr=9.99031e-05, gnorm=1.985, loss_scale=8, train_wall=10, gb_free=2.8, wall=150996 2021-06-20 12:35:32 | INFO | train_inner | epoch 005: 1187 / 3002 loss=2.554, ppl=5.87, wps=5795.4, ups=0.09, wpb=64876, bsz=128, num_updates=13118, lr=9.9903e-05, gnorm=1.902, loss_scale=8, train_wall=11, gb_free=2.8, wall=151007 2021-06-20 12:35:43 | INFO | train_inner | epoch 005: 1188 / 3002 loss=2.388, ppl=5.24, wps=5872.2, ups=0.09, wpb=64808, bsz=128, num_updates=13119, lr=9.9903e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=151018 2021-06-20 12:35:54 | INFO | train_inner | epoch 005: 1189 / 3002 loss=2.426, ppl=5.37, wps=5930.1, ups=0.09, wpb=64902, bsz=128, num_updates=13120, lr=9.9903e-05, gnorm=1.87, loss_scale=8, train_wall=10, gb_free=2.8, wall=151029 2021-06-20 12:36:06 | INFO | train_inner | epoch 005: 1190 / 3002 loss=2.644, ppl=6.25, wps=5678.9, ups=0.09, wpb=64728, bsz=128, num_updates=13121, lr=9.9903e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=151040 2021-06-20 12:36:17 | INFO | train_inner | epoch 005: 1191 / 3002 loss=2.483, ppl=5.59, wps=5876, ups=0.09, wpb=64743, bsz=128, num_updates=13122, lr=9.9903e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=151051 2021-06-20 12:36:28 | INFO | train_inner | epoch 005: 1192 / 3002 loss=2.471, ppl=5.55, wps=5863.3, ups=0.09, wpb=64861, bsz=128, num_updates=13123, lr=9.9903e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=151062 2021-06-20 12:36:39 | INFO | train_inner | epoch 005: 1193 / 3002 loss=2.463, ppl=5.51, wps=5737.3, ups=0.09, wpb=64854, bsz=128, num_updates=13124, lr=9.9903e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=151074 2021-06-20 12:36:50 | INFO | train_inner | epoch 005: 1194 / 3002 loss=2.363, ppl=5.14, wps=5763, ups=0.09, wpb=64804, bsz=128, num_updates=13125, lr=9.9903e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=151085 2021-06-20 12:37:02 | INFO | train_inner | epoch 005: 1195 / 3002 loss=2.528, ppl=5.77, wps=5847.8, ups=0.09, wpb=64806, bsz=128, num_updates=13126, lr=9.9903e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=151096 2021-06-20 12:37:13 | INFO | train_inner | epoch 005: 1196 / 3002 loss=2.475, ppl=5.56, wps=5889.8, ups=0.09, wpb=64790, bsz=128, num_updates=13127, lr=9.9903e-05, gnorm=2.008, loss_scale=8, train_wall=11, gb_free=2.8, wall=151107 2021-06-20 12:37:24 | INFO | train_inner | epoch 005: 1197 / 3002 loss=2.475, ppl=5.56, wps=5857.2, ups=0.09, wpb=64869, bsz=128, num_updates=13128, lr=9.9903e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=151118 2021-06-20 12:37:35 | INFO | train_inner | epoch 005: 1198 / 3002 loss=2.46, ppl=5.5, wps=5891.1, ups=0.09, wpb=64904, bsz=128, num_updates=13129, lr=9.9903e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=151129 2021-06-20 12:37:46 | INFO | train_inner | epoch 005: 1199 / 3002 loss=2.603, ppl=6.08, wps=5816.4, ups=0.09, wpb=64871, bsz=128, num_updates=13130, lr=9.9903e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=151140 2021-06-20 12:37:57 | INFO | train_inner | epoch 005: 1200 / 3002 loss=2.632, ppl=6.2, wps=5769.1, ups=0.09, wpb=64812, bsz=128, num_updates=13131, lr=9.99029e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=151151 2021-06-20 12:38:08 | INFO | train_inner | epoch 005: 1201 / 3002 loss=2.47, ppl=5.54, wps=5866.5, ups=0.09, wpb=64876, bsz=128, num_updates=13132, lr=9.99029e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=151162 2021-06-20 12:38:19 | INFO | train_inner | epoch 005: 1202 / 3002 loss=2.473, ppl=5.55, wps=5730.5, ups=0.09, wpb=64858, bsz=128, num_updates=13133, lr=9.99029e-05, gnorm=1.922, loss_scale=8, train_wall=11, gb_free=2.8, wall=151174 2021-06-20 12:38:30 | INFO | train_inner | epoch 005: 1203 / 3002 loss=2.702, ppl=6.51, wps=5913.8, ups=0.09, wpb=64817, bsz=128, num_updates=13134, lr=9.99029e-05, gnorm=2.012, loss_scale=8, train_wall=10, gb_free=2.8, wall=151185 2021-06-20 12:38:41 | INFO | train_inner | epoch 005: 1204 / 3002 loss=2.604, ppl=6.08, wps=5874.3, ups=0.09, wpb=64794, bsz=128, num_updates=13135, lr=9.99029e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=151196 2021-06-20 12:38:52 | INFO | train_inner | epoch 005: 1205 / 3002 loss=2.504, ppl=5.67, wps=5839.8, ups=0.09, wpb=64832, bsz=128, num_updates=13136, lr=9.99029e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=151207 2021-06-20 12:39:04 | INFO | train_inner | epoch 005: 1206 / 3002 loss=2.432, ppl=5.4, wps=5694.9, ups=0.09, wpb=64883, bsz=128, num_updates=13137, lr=9.99029e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=151218 2021-06-20 12:39:15 | INFO | train_inner | epoch 005: 1207 / 3002 loss=2.652, ppl=6.28, wps=5802.9, ups=0.09, wpb=64786, bsz=128, num_updates=13138, lr=9.99029e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=151229 2021-06-20 12:39:26 | INFO | train_inner | epoch 005: 1208 / 3002 loss=2.454, ppl=5.48, wps=5814.1, ups=0.09, wpb=64846, bsz=128, num_updates=13139, lr=9.99029e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=151241 2021-06-20 12:39:37 | INFO | train_inner | epoch 005: 1209 / 3002 loss=2.368, ppl=5.16, wps=5776.9, ups=0.09, wpb=64804, bsz=128, num_updates=13140, lr=9.99029e-05, gnorm=1.892, loss_scale=8, train_wall=11, gb_free=2.8, wall=151252 2021-06-20 12:39:49 | INFO | train_inner | epoch 005: 1210 / 3002 loss=2.516, ppl=5.72, wps=5801, ups=0.09, wpb=64792, bsz=128, num_updates=13141, lr=9.99029e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=151263 2021-06-20 12:40:00 | INFO | train_inner | epoch 005: 1211 / 3002 loss=2.553, ppl=5.87, wps=5864, ups=0.09, wpb=64793, bsz=128, num_updates=13142, lr=9.99029e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=151274 2021-06-20 12:40:10 | INFO | train_inner | epoch 005: 1212 / 3002 loss=2.574, ppl=5.95, wps=6003.2, ups=0.09, wpb=64913, bsz=128, num_updates=13143, lr=9.99028e-05, gnorm=1.954, loss_scale=8, train_wall=10, gb_free=2.8, wall=151285 2021-06-20 12:40:21 | INFO | train_inner | epoch 005: 1213 / 3002 loss=2.751, ppl=6.73, wps=5907.4, ups=0.09, wpb=64815, bsz=128, num_updates=13144, lr=9.99028e-05, gnorm=2.042, loss_scale=8, train_wall=11, gb_free=2.8, wall=151296 2021-06-20 12:40:33 | INFO | train_inner | epoch 005: 1214 / 3002 loss=2.461, ppl=5.51, wps=5808.3, ups=0.09, wpb=64818, bsz=128, num_updates=13145, lr=9.99028e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=151307 2021-06-20 12:40:44 | INFO | train_inner | epoch 005: 1215 / 3002 loss=2.523, ppl=5.75, wps=5802.4, ups=0.09, wpb=64816, bsz=128, num_updates=13146, lr=9.99028e-05, gnorm=1.919, loss_scale=8, train_wall=11, gb_free=2.8, wall=151318 2021-06-20 12:40:55 | INFO | train_inner | epoch 005: 1216 / 3002 loss=2.46, ppl=5.5, wps=5813.5, ups=0.09, wpb=64863, bsz=128, num_updates=13147, lr=9.99028e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=151329 2021-06-20 12:41:06 | INFO | train_inner | epoch 005: 1217 / 3002 loss=2.468, ppl=5.53, wps=5737.3, ups=0.09, wpb=64813, bsz=128, num_updates=13148, lr=9.99028e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=151341 2021-06-20 12:41:17 | INFO | train_inner | epoch 005: 1218 / 3002 loss=2.455, ppl=5.48, wps=5804.7, ups=0.09, wpb=64797, bsz=128, num_updates=13149, lr=9.99028e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=151352 2021-06-20 12:41:29 | INFO | train_inner | epoch 005: 1219 / 3002 loss=2.505, ppl=5.68, wps=5755.4, ups=0.09, wpb=64859, bsz=128, num_updates=13150, lr=9.99028e-05, gnorm=2.015, loss_scale=8, train_wall=11, gb_free=2.8, wall=151363 2021-06-20 12:41:40 | INFO | train_inner | epoch 005: 1220 / 3002 loss=2.457, ppl=5.49, wps=5891.7, ups=0.09, wpb=64816, bsz=128, num_updates=13151, lr=9.99028e-05, gnorm=2.082, loss_scale=8, train_wall=11, gb_free=2.8, wall=151374 2021-06-20 12:41:51 | INFO | train_inner | epoch 005: 1221 / 3002 loss=2.416, ppl=5.34, wps=5796.4, ups=0.09, wpb=64811, bsz=128, num_updates=13152, lr=9.99028e-05, gnorm=2.024, loss_scale=16, train_wall=11, gb_free=2.8, wall=151385 2021-06-20 12:42:02 | INFO | train_inner | epoch 005: 1222 / 3002 loss=2.404, ppl=5.29, wps=5857.9, ups=0.09, wpb=64781, bsz=128, num_updates=13153, lr=9.99028e-05, gnorm=2.016, loss_scale=16, train_wall=11, gb_free=2.8, wall=151396 2021-06-20 12:42:13 | INFO | train_inner | epoch 005: 1223 / 3002 loss=2.618, ppl=6.14, wps=5774, ups=0.09, wpb=64803, bsz=128, num_updates=13154, lr=9.99028e-05, gnorm=2.139, loss_scale=16, train_wall=11, gb_free=2.8, wall=151407 2021-06-20 12:42:24 | INFO | train_inner | epoch 005: 1224 / 3002 loss=2.616, ppl=6.13, wps=5908.6, ups=0.09, wpb=64850, bsz=128, num_updates=13155, lr=9.99028e-05, gnorm=1.889, loss_scale=16, train_wall=11, gb_free=2.8, wall=151418 2021-06-20 12:42:35 | INFO | train_inner | epoch 005: 1225 / 3002 loss=2.664, ppl=6.34, wps=5860.6, ups=0.09, wpb=64854, bsz=128, num_updates=13156, lr=9.99027e-05, gnorm=1.981, loss_scale=16, train_wall=11, gb_free=2.8, wall=151429 2021-06-20 12:42:46 | INFO | train_inner | epoch 005: 1226 / 3002 loss=2.448, ppl=5.46, wps=5942.4, ups=0.09, wpb=64845, bsz=128, num_updates=13157, lr=9.99027e-05, gnorm=1.944, loss_scale=16, train_wall=10, gb_free=2.8, wall=151440 2021-06-20 12:42:57 | INFO | train_inner | epoch 005: 1227 / 3002 loss=2.65, ppl=6.28, wps=5872.1, ups=0.09, wpb=64833, bsz=128, num_updates=13158, lr=9.99027e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=151451 2021-06-20 12:43:08 | INFO | train_inner | epoch 005: 1228 / 3002 loss=2.548, ppl=5.85, wps=5842.5, ups=0.09, wpb=64773, bsz=128, num_updates=13159, lr=9.99027e-05, gnorm=1.979, loss_scale=16, train_wall=11, gb_free=2.8, wall=151463 2021-06-20 12:43:19 | INFO | train_inner | epoch 005: 1229 / 3002 loss=2.488, ppl=5.61, wps=5828.7, ups=0.09, wpb=64820, bsz=128, num_updates=13160, lr=9.99027e-05, gnorm=2.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=151474 2021-06-20 12:43:30 | INFO | train_inner | epoch 005: 1230 / 3002 loss=2.507, ppl=5.69, wps=5830.8, ups=0.09, wpb=64837, bsz=128, num_updates=13161, lr=9.99027e-05, gnorm=1.961, loss_scale=16, train_wall=11, gb_free=2.8, wall=151485 2021-06-20 12:43:41 | INFO | train_inner | epoch 005: 1231 / 3002 loss=2.462, ppl=5.51, wps=6009.4, ups=0.09, wpb=64855, bsz=128, num_updates=13162, lr=9.99027e-05, gnorm=2.117, loss_scale=16, train_wall=10, gb_free=2.8, wall=151496 2021-06-20 12:43:52 | INFO | train_inner | epoch 005: 1232 / 3002 loss=2.493, ppl=5.63, wps=5941.8, ups=0.09, wpb=64904, bsz=128, num_updates=13163, lr=9.99027e-05, gnorm=2.07, loss_scale=16, train_wall=10, gb_free=2.8, wall=151506 2021-06-20 12:44:03 | INFO | train_inner | epoch 005: 1233 / 3002 loss=2.429, ppl=5.39, wps=5933.7, ups=0.09, wpb=64824, bsz=128, num_updates=13164, lr=9.99027e-05, gnorm=2.068, loss_scale=16, train_wall=10, gb_free=2.8, wall=151517 2021-06-20 12:44:14 | INFO | train_inner | epoch 005: 1234 / 3002 loss=2.457, ppl=5.49, wps=5938.4, ups=0.09, wpb=64781, bsz=128, num_updates=13165, lr=9.99027e-05, gnorm=2.014, loss_scale=16, train_wall=10, gb_free=2.8, wall=151528 2021-06-20 12:44:25 | INFO | train_inner | epoch 005: 1235 / 3002 loss=2.598, ppl=6.05, wps=5785.3, ups=0.09, wpb=64779, bsz=128, num_updates=13166, lr=9.99027e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=151539 2021-06-20 12:44:36 | INFO | train_inner | epoch 005: 1236 / 3002 loss=2.809, ppl=7.01, wps=5774.8, ups=0.09, wpb=64814, bsz=128, num_updates=13167, lr=9.99027e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=151551 2021-06-20 12:44:48 | INFO | train_inner | epoch 005: 1237 / 3002 loss=2.442, ppl=5.43, wps=5812.9, ups=0.09, wpb=64814, bsz=128, num_updates=13168, lr=9.99026e-05, gnorm=2.122, loss_scale=16, train_wall=11, gb_free=2.8, wall=151562 2021-06-20 12:44:59 | INFO | train_inner | epoch 005: 1238 / 3002 loss=2.432, ppl=5.39, wps=5740.5, ups=0.09, wpb=64784, bsz=128, num_updates=13169, lr=9.99026e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=151573 2021-06-20 12:45:10 | INFO | train_inner | epoch 005: 1239 / 3002 loss=2.494, ppl=5.63, wps=5844.5, ups=0.09, wpb=64797, bsz=128, num_updates=13170, lr=9.99026e-05, gnorm=2.112, loss_scale=16, train_wall=11, gb_free=2.8, wall=151584 2021-06-20 12:45:21 | INFO | train_inner | epoch 005: 1240 / 3002 loss=2.551, ppl=5.86, wps=5873.6, ups=0.09, wpb=64829, bsz=128, num_updates=13171, lr=9.99026e-05, gnorm=2.085, loss_scale=16, train_wall=11, gb_free=2.8, wall=151595 2021-06-20 12:45:32 | INFO | train_inner | epoch 005: 1241 / 3002 loss=2.454, ppl=5.48, wps=5820.8, ups=0.09, wpb=64923, bsz=128, num_updates=13172, lr=9.99026e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=151606 2021-06-20 12:45:43 | INFO | train_inner | epoch 005: 1242 / 3002 loss=2.514, ppl=5.71, wps=5842.4, ups=0.09, wpb=64798, bsz=128, num_updates=13173, lr=9.99026e-05, gnorm=2.033, loss_scale=16, train_wall=11, gb_free=2.8, wall=151618 2021-06-20 12:45:54 | INFO | train_inner | epoch 005: 1243 / 3002 loss=2.513, ppl=5.71, wps=5805.8, ups=0.09, wpb=64870, bsz=128, num_updates=13174, lr=9.99026e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=151629 2021-06-20 12:46:05 | INFO | train_inner | epoch 005: 1244 / 3002 loss=2.503, ppl=5.67, wps=5852.4, ups=0.09, wpb=64840, bsz=128, num_updates=13175, lr=9.99026e-05, gnorm=1.975, loss_scale=16, train_wall=11, gb_free=2.8, wall=151640 2021-06-20 12:46:17 | INFO | train_inner | epoch 005: 1245 / 3002 loss=2.448, ppl=5.46, wps=5841.3, ups=0.09, wpb=64856, bsz=128, num_updates=13176, lr=9.99026e-05, gnorm=1.995, loss_scale=16, train_wall=11, gb_free=2.8, wall=151651 2021-06-20 12:46:28 | INFO | train_inner | epoch 005: 1246 / 3002 loss=2.473, ppl=5.55, wps=5742.3, ups=0.09, wpb=64919, bsz=128, num_updates=13177, lr=9.99026e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=151662 2021-06-20 12:46:39 | INFO | train_inner | epoch 005: 1247 / 3002 loss=2.489, ppl=5.61, wps=5791.8, ups=0.09, wpb=64801, bsz=128, num_updates=13178, lr=9.99026e-05, gnorm=1.968, loss_scale=16, train_wall=11, gb_free=2.8, wall=151673 2021-06-20 12:46:50 | INFO | train_inner | epoch 005: 1248 / 3002 loss=2.594, ppl=6.04, wps=5874.3, ups=0.09, wpb=64784, bsz=128, num_updates=13179, lr=9.99026e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=151684 2021-06-20 12:47:01 | INFO | train_inner | epoch 005: 1249 / 3002 loss=2.382, ppl=5.21, wps=5798.7, ups=0.09, wpb=64878, bsz=128, num_updates=13180, lr=9.99026e-05, gnorm=2.039, loss_scale=16, train_wall=11, gb_free=2.8, wall=151696 2021-06-20 12:47:12 | INFO | train_inner | epoch 005: 1250 / 3002 loss=2.642, ppl=6.24, wps=5800.8, ups=0.09, wpb=64780, bsz=128, num_updates=13181, lr=9.99025e-05, gnorm=1.913, loss_scale=16, train_wall=11, gb_free=2.8, wall=151707 2021-06-20 12:47:23 | INFO | train_inner | epoch 005: 1251 / 3002 loss=2.553, ppl=5.87, wps=5951.7, ups=0.09, wpb=64864, bsz=128, num_updates=13182, lr=9.99025e-05, gnorm=2.062, loss_scale=16, train_wall=10, gb_free=2.8, wall=151718 2021-06-20 12:47:34 | INFO | train_inner | epoch 005: 1252 / 3002 loss=2.53, ppl=5.77, wps=5851.9, ups=0.09, wpb=64771, bsz=128, num_updates=13183, lr=9.99025e-05, gnorm=2.01, loss_scale=16, train_wall=11, gb_free=2.8, wall=151729 2021-06-20 12:47:46 | INFO | train_inner | epoch 005: 1253 / 3002 loss=2.385, ppl=5.22, wps=5822, ups=0.09, wpb=64871, bsz=128, num_updates=13184, lr=9.99025e-05, gnorm=1.909, loss_scale=16, train_wall=11, gb_free=2.8, wall=151740 2021-06-20 12:47:57 | INFO | train_inner | epoch 005: 1254 / 3002 loss=2.669, ppl=6.36, wps=5820.7, ups=0.09, wpb=64786, bsz=128, num_updates=13185, lr=9.99025e-05, gnorm=2.12, loss_scale=16, train_wall=11, gb_free=2.8, wall=151751 2021-06-20 12:48:08 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 12:48:19 | INFO | train_inner | epoch 005: 1256 / 3002 loss=2.54, ppl=5.82, wps=2894.8, ups=0.04, wpb=64853, bsz=128, num_updates=13186, lr=9.99025e-05, gnorm=1.961, loss_scale=8, train_wall=21, gb_free=2.8, wall=151773 2021-06-20 12:48:30 | INFO | train_inner | epoch 005: 1257 / 3002 loss=2.459, ppl=5.5, wps=5906.3, ups=0.09, wpb=64814, bsz=128, num_updates=13187, lr=9.99025e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=151784 2021-06-20 12:48:41 | INFO | train_inner | epoch 005: 1258 / 3002 loss=2.458, ppl=5.49, wps=5938.7, ups=0.09, wpb=64889, bsz=128, num_updates=13188, lr=9.99025e-05, gnorm=1.98, loss_scale=8, train_wall=10, gb_free=2.8, wall=151795 2021-06-20 12:48:52 | INFO | train_inner | epoch 005: 1259 / 3002 loss=2.497, ppl=5.65, wps=5765.8, ups=0.09, wpb=64863, bsz=128, num_updates=13189, lr=9.99025e-05, gnorm=1.914, loss_scale=8, train_wall=11, gb_free=2.8, wall=151807 2021-06-20 12:49:03 | INFO | train_inner | epoch 005: 1260 / 3002 loss=2.622, ppl=6.16, wps=5790.9, ups=0.09, wpb=64785, bsz=128, num_updates=13190, lr=9.99025e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=151818 2021-06-20 12:49:15 | INFO | train_inner | epoch 005: 1261 / 3002 loss=2.508, ppl=5.69, wps=5727.2, ups=0.09, wpb=64788, bsz=128, num_updates=13191, lr=9.99025e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=151829 2021-06-20 12:49:26 | INFO | train_inner | epoch 005: 1262 / 3002 loss=2.577, ppl=5.97, wps=5769.3, ups=0.09, wpb=64886, bsz=128, num_updates=13192, lr=9.99025e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=151840 2021-06-20 12:49:37 | INFO | train_inner | epoch 005: 1263 / 3002 loss=2.44, ppl=5.43, wps=5926, ups=0.09, wpb=64927, bsz=128, num_updates=13193, lr=9.99024e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=151851 2021-06-20 12:49:48 | INFO | train_inner | epoch 005: 1264 / 3002 loss=2.636, ppl=6.22, wps=5792.3, ups=0.09, wpb=64838, bsz=128, num_updates=13194, lr=9.99024e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=151862 2021-06-20 12:49:59 | INFO | train_inner | epoch 005: 1265 / 3002 loss=2.4, ppl=5.28, wps=5811.9, ups=0.09, wpb=64745, bsz=128, num_updates=13195, lr=9.99024e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=151874 2021-06-20 12:50:10 | INFO | train_inner | epoch 005: 1266 / 3002 loss=2.575, ppl=5.96, wps=5888.5, ups=0.09, wpb=64837, bsz=128, num_updates=13196, lr=9.99024e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=151885 2021-06-20 12:50:21 | INFO | train_inner | epoch 005: 1267 / 3002 loss=2.705, ppl=6.52, wps=5866.6, ups=0.09, wpb=64856, bsz=128, num_updates=13197, lr=9.99024e-05, gnorm=2.078, loss_scale=8, train_wall=11, gb_free=2.8, wall=151896 2021-06-20 12:50:32 | INFO | train_inner | epoch 005: 1268 / 3002 loss=2.538, ppl=5.81, wps=5896.6, ups=0.09, wpb=64809, bsz=128, num_updates=13198, lr=9.99024e-05, gnorm=2.082, loss_scale=8, train_wall=11, gb_free=2.8, wall=151907 2021-06-20 12:50:43 | INFO | train_inner | epoch 005: 1269 / 3002 loss=2.369, ppl=5.16, wps=5832.7, ups=0.09, wpb=64846, bsz=128, num_updates=13199, lr=9.99024e-05, gnorm=1.871, loss_scale=8, train_wall=11, gb_free=2.8, wall=151918 2021-06-20 12:50:55 | INFO | train_inner | epoch 005: 1270 / 3002 loss=2.456, ppl=5.49, wps=5853.3, ups=0.09, wpb=64856, bsz=128, num_updates=13200, lr=9.99024e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=151929 2021-06-20 12:51:05 | INFO | train_inner | epoch 005: 1271 / 3002 loss=2.47, ppl=5.54, wps=5939.6, ups=0.09, wpb=64762, bsz=128, num_updates=13201, lr=9.99024e-05, gnorm=1.98, loss_scale=8, train_wall=10, gb_free=2.8, wall=151940 2021-06-20 12:51:17 | INFO | train_inner | epoch 005: 1272 / 3002 loss=2.54, ppl=5.81, wps=5848.2, ups=0.09, wpb=64859, bsz=128, num_updates=13202, lr=9.99024e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=151951 2021-06-20 12:51:28 | INFO | train_inner | epoch 005: 1273 / 3002 loss=2.572, ppl=5.95, wps=5768, ups=0.09, wpb=64877, bsz=128, num_updates=13203, lr=9.99024e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=151962 2021-06-20 12:51:39 | INFO | train_inner | epoch 005: 1274 / 3002 loss=2.385, ppl=5.22, wps=5725.9, ups=0.09, wpb=64877, bsz=128, num_updates=13204, lr=9.99024e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=151973 2021-06-20 12:51:50 | INFO | train_inner | epoch 005: 1275 / 3002 loss=2.561, ppl=5.9, wps=5804.1, ups=0.09, wpb=64769, bsz=128, num_updates=13205, lr=9.99024e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=151985 2021-06-20 12:52:01 | INFO | train_inner | epoch 005: 1276 / 3002 loss=2.45, ppl=5.46, wps=5918.3, ups=0.09, wpb=64812, bsz=128, num_updates=13206, lr=9.99023e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=151996 2021-06-20 12:52:12 | INFO | train_inner | epoch 005: 1277 / 3002 loss=2.637, ppl=6.22, wps=5766.1, ups=0.09, wpb=64845, bsz=128, num_updates=13207, lr=9.99023e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=152007 2021-06-20 12:52:24 | INFO | train_inner | epoch 005: 1278 / 3002 loss=2.714, ppl=6.56, wps=5805.2, ups=0.09, wpb=64820, bsz=128, num_updates=13208, lr=9.99023e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=152018 2021-06-20 12:52:34 | INFO | train_inner | epoch 005: 1279 / 3002 loss=2.6, ppl=6.06, wps=5998.2, ups=0.09, wpb=64866, bsz=128, num_updates=13209, lr=9.99023e-05, gnorm=1.901, loss_scale=8, train_wall=10, gb_free=2.8, wall=152029 2021-06-20 12:52:46 | INFO | train_inner | epoch 005: 1280 / 3002 loss=2.499, ppl=5.65, wps=5721.2, ups=0.09, wpb=64779, bsz=128, num_updates=13210, lr=9.99023e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=152040 2021-06-20 12:52:57 | INFO | train_inner | epoch 005: 1281 / 3002 loss=2.548, ppl=5.85, wps=5837.3, ups=0.09, wpb=64845, bsz=128, num_updates=13211, lr=9.99023e-05, gnorm=2.26, loss_scale=8, train_wall=11, gb_free=2.8, wall=152051 2021-06-20 12:53:08 | INFO | train_inner | epoch 005: 1282 / 3002 loss=2.547, ppl=5.84, wps=5932.1, ups=0.09, wpb=64893, bsz=128, num_updates=13212, lr=9.99023e-05, gnorm=2.034, loss_scale=8, train_wall=10, gb_free=2.8, wall=152062 2021-06-20 12:53:19 | INFO | train_inner | epoch 005: 1283 / 3002 loss=2.394, ppl=5.26, wps=5843.6, ups=0.09, wpb=64779, bsz=128, num_updates=13213, lr=9.99023e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=152073 2021-06-20 12:53:30 | INFO | train_inner | epoch 005: 1284 / 3002 loss=2.442, ppl=5.44, wps=5831, ups=0.09, wpb=64721, bsz=128, num_updates=13214, lr=9.99023e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=152084 2021-06-20 12:53:41 | INFO | train_inner | epoch 005: 1285 / 3002 loss=2.448, ppl=5.46, wps=6040.5, ups=0.09, wpb=64835, bsz=128, num_updates=13215, lr=9.99023e-05, gnorm=1.972, loss_scale=8, train_wall=10, gb_free=2.8, wall=152095 2021-06-20 12:53:52 | INFO | train_inner | epoch 005: 1286 / 3002 loss=2.458, ppl=5.49, wps=5937, ups=0.09, wpb=64882, bsz=128, num_updates=13216, lr=9.99023e-05, gnorm=1.979, loss_scale=8, train_wall=10, gb_free=2.8, wall=152106 2021-06-20 12:54:03 | INFO | train_inner | epoch 005: 1287 / 3002 loss=2.477, ppl=5.57, wps=5764.4, ups=0.09, wpb=64856, bsz=128, num_updates=13217, lr=9.99023e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=152117 2021-06-20 12:54:14 | INFO | train_inner | epoch 005: 1288 / 3002 loss=2.412, ppl=5.32, wps=5827.9, ups=0.09, wpb=64818, bsz=128, num_updates=13218, lr=9.99022e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=152128 2021-06-20 12:54:25 | INFO | train_inner | epoch 005: 1289 / 3002 loss=2.379, ppl=5.2, wps=5813.5, ups=0.09, wpb=64859, bsz=128, num_updates=13219, lr=9.99022e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=152140 2021-06-20 12:54:36 | INFO | train_inner | epoch 005: 1290 / 3002 loss=2.458, ppl=5.49, wps=5873.5, ups=0.09, wpb=64846, bsz=128, num_updates=13220, lr=9.99022e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=152151 2021-06-20 12:54:47 | INFO | train_inner | epoch 005: 1291 / 3002 loss=2.375, ppl=5.19, wps=5873.5, ups=0.09, wpb=64813, bsz=128, num_updates=13221, lr=9.99022e-05, gnorm=1.892, loss_scale=8, train_wall=11, gb_free=2.8, wall=152162 2021-06-20 12:54:59 | INFO | train_inner | epoch 005: 1292 / 3002 loss=2.416, ppl=5.34, wps=5756.3, ups=0.09, wpb=64796, bsz=128, num_updates=13222, lr=9.99022e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=152173 2021-06-20 12:55:10 | INFO | train_inner | epoch 005: 1293 / 3002 loss=2.582, ppl=5.99, wps=5827.7, ups=0.09, wpb=64842, bsz=128, num_updates=13223, lr=9.99022e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=152184 2021-06-20 12:55:21 | INFO | train_inner | epoch 005: 1294 / 3002 loss=2.627, ppl=6.18, wps=5886.6, ups=0.09, wpb=64795, bsz=128, num_updates=13224, lr=9.99022e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=152195 2021-06-20 12:55:32 | INFO | train_inner | epoch 005: 1295 / 3002 loss=2.616, ppl=6.13, wps=5864.3, ups=0.09, wpb=64853, bsz=128, num_updates=13225, lr=9.99022e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=152206 2021-06-20 12:55:43 | INFO | train_inner | epoch 005: 1296 / 3002 loss=2.642, ppl=6.24, wps=5924.1, ups=0.09, wpb=64826, bsz=128, num_updates=13226, lr=9.99022e-05, gnorm=1.946, loss_scale=8, train_wall=10, gb_free=2.8, wall=152217 2021-06-20 12:55:53 | INFO | train_inner | epoch 005: 1297 / 3002 loss=2.489, ppl=5.62, wps=6012.1, ups=0.09, wpb=64741, bsz=128, num_updates=13227, lr=9.99022e-05, gnorm=2, loss_scale=8, train_wall=10, gb_free=2.8, wall=152228 2021-06-20 12:56:04 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-20 12:56:16 | INFO | train_inner | epoch 005: 1299 / 3002 loss=2.601, ppl=6.07, wps=2925, ups=0.05, wpb=64724, bsz=128, num_updates=13228, lr=9.99022e-05, gnorm=2.033, loss_scale=4, train_wall=21, gb_free=2.8, wall=152250 2021-06-20 12:56:27 | INFO | train_inner | epoch 005: 1300 / 3002 loss=2.429, ppl=5.39, wps=5916.9, ups=0.09, wpb=64868, bsz=128, num_updates=13229, lr=9.99022e-05, gnorm=1.881, loss_scale=4, train_wall=10, gb_free=2.8, wall=152261 2021-06-20 12:56:38 | INFO | train_inner | epoch 005: 1301 / 3002 loss=2.642, ppl=6.24, wps=5825.5, ups=0.09, wpb=64891, bsz=128, num_updates=13230, lr=9.99022e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=152272 2021-06-20 12:56:49 | INFO | train_inner | epoch 005: 1302 / 3002 loss=2.545, ppl=5.83, wps=5810, ups=0.09, wpb=64842, bsz=128, num_updates=13231, lr=9.99021e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=152283 2021-06-20 12:57:00 | INFO | train_inner | epoch 005: 1303 / 3002 loss=2.425, ppl=5.37, wps=5901.2, ups=0.09, wpb=64855, bsz=128, num_updates=13232, lr=9.99021e-05, gnorm=2.406, loss_scale=4, train_wall=11, gb_free=2.8, wall=152294 2021-06-20 12:57:11 | INFO | train_inner | epoch 005: 1304 / 3002 loss=2.462, ppl=5.51, wps=5822.1, ups=0.09, wpb=64823, bsz=128, num_updates=13233, lr=9.99021e-05, gnorm=1.866, loss_scale=4, train_wall=11, gb_free=2.8, wall=152305 2021-06-20 12:57:22 | INFO | train_inner | epoch 005: 1305 / 3002 loss=2.541, ppl=5.82, wps=5854.5, ups=0.09, wpb=64823, bsz=128, num_updates=13234, lr=9.99021e-05, gnorm=1.953, loss_scale=4, train_wall=11, gb_free=2.8, wall=152316 2021-06-20 12:57:33 | INFO | train_inner | epoch 005: 1306 / 3002 loss=2.501, ppl=5.66, wps=5890.4, ups=0.09, wpb=64903, bsz=128, num_updates=13235, lr=9.99021e-05, gnorm=2.082, loss_scale=4, train_wall=11, gb_free=2.8, wall=152327 2021-06-20 12:57:44 | INFO | train_inner | epoch 005: 1307 / 3002 loss=2.467, ppl=5.53, wps=5827.9, ups=0.09, wpb=64922, bsz=128, num_updates=13236, lr=9.99021e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=152339 2021-06-20 12:57:55 | INFO | train_inner | epoch 005: 1308 / 3002 loss=2.415, ppl=5.33, wps=5893.8, ups=0.09, wpb=64884, bsz=128, num_updates=13237, lr=9.99021e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=152350 2021-06-20 12:58:06 | INFO | train_inner | epoch 005: 1309 / 3002 loss=2.538, ppl=5.81, wps=5882.3, ups=0.09, wpb=64865, bsz=128, num_updates=13238, lr=9.99021e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=152361 2021-06-20 12:58:17 | INFO | train_inner | epoch 005: 1310 / 3002 loss=2.482, ppl=5.59, wps=5901.2, ups=0.09, wpb=64848, bsz=128, num_updates=13239, lr=9.99021e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=152372 2021-06-20 12:58:28 | INFO | train_inner | epoch 005: 1311 / 3002 loss=2.526, ppl=5.76, wps=5900.7, ups=0.09, wpb=64785, bsz=128, num_updates=13240, lr=9.99021e-05, gnorm=1.934, loss_scale=4, train_wall=11, gb_free=2.8, wall=152383 2021-06-20 12:58:39 | INFO | train_inner | epoch 005: 1312 / 3002 loss=2.442, ppl=5.44, wps=5823.4, ups=0.09, wpb=64865, bsz=128, num_updates=13241, lr=9.99021e-05, gnorm=1.914, loss_scale=4, train_wall=11, gb_free=2.8, wall=152394 2021-06-20 12:58:50 | INFO | train_inner | epoch 005: 1313 / 3002 loss=2.431, ppl=5.39, wps=5802.1, ups=0.09, wpb=64845, bsz=128, num_updates=13242, lr=9.99021e-05, gnorm=2.038, loss_scale=4, train_wall=11, gb_free=2.8, wall=152405 2021-06-20 12:59:01 | INFO | train_inner | epoch 005: 1314 / 3002 loss=2.57, ppl=5.94, wps=5893.5, ups=0.09, wpb=64835, bsz=128, num_updates=13243, lr=9.9902e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=152416 2021-06-20 12:59:13 | INFO | train_inner | epoch 005: 1315 / 3002 loss=2.632, ppl=6.2, wps=5800.3, ups=0.09, wpb=64809, bsz=128, num_updates=13244, lr=9.9902e-05, gnorm=1.914, loss_scale=4, train_wall=11, gb_free=2.8, wall=152427 2021-06-20 12:59:24 | INFO | train_inner | epoch 005: 1316 / 3002 loss=2.425, ppl=5.37, wps=5771.5, ups=0.09, wpb=64771, bsz=128, num_updates=13245, lr=9.9902e-05, gnorm=1.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=152438 2021-06-20 12:59:35 | INFO | train_inner | epoch 005: 1317 / 3002 loss=2.662, ppl=6.33, wps=5806.9, ups=0.09, wpb=64837, bsz=128, num_updates=13246, lr=9.9902e-05, gnorm=2.078, loss_scale=4, train_wall=11, gb_free=2.8, wall=152449 2021-06-20 12:59:46 | INFO | train_inner | epoch 005: 1318 / 3002 loss=2.431, ppl=5.39, wps=5840.6, ups=0.09, wpb=64842, bsz=128, num_updates=13247, lr=9.9902e-05, gnorm=2.015, loss_scale=4, train_wall=11, gb_free=2.8, wall=152460 2021-06-20 12:59:57 | INFO | train_inner | epoch 005: 1319 / 3002 loss=2.475, ppl=5.56, wps=5947, ups=0.09, wpb=64856, bsz=128, num_updates=13248, lr=9.9902e-05, gnorm=1.968, loss_scale=4, train_wall=10, gb_free=2.8, wall=152471 2021-06-20 13:00:08 | INFO | train_inner | epoch 005: 1320 / 3002 loss=2.574, ppl=5.96, wps=5769.9, ups=0.09, wpb=64736, bsz=128, num_updates=13249, lr=9.9902e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=152483 2021-06-20 13:00:19 | INFO | train_inner | epoch 005: 1321 / 3002 loss=2.562, ppl=5.9, wps=5917.1, ups=0.09, wpb=64841, bsz=128, num_updates=13250, lr=9.9902e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=152494 2021-06-20 13:00:30 | INFO | train_inner | epoch 005: 1322 / 3002 loss=2.51, ppl=5.7, wps=5811, ups=0.09, wpb=64877, bsz=128, num_updates=13251, lr=9.9902e-05, gnorm=1.977, loss_scale=4, train_wall=11, gb_free=2.8, wall=152505 2021-06-20 13:00:42 | INFO | train_inner | epoch 005: 1323 / 3002 loss=2.391, ppl=5.25, wps=5754.7, ups=0.09, wpb=64819, bsz=128, num_updates=13252, lr=9.9902e-05, gnorm=1.981, loss_scale=4, train_wall=11, gb_free=2.8, wall=152516 2021-06-20 13:00:53 | INFO | train_inner | epoch 005: 1324 / 3002 loss=2.512, ppl=5.7, wps=5860.3, ups=0.09, wpb=64809, bsz=128, num_updates=13253, lr=9.9902e-05, gnorm=2.113, loss_scale=4, train_wall=11, gb_free=2.8, wall=152527 2021-06-20 13:01:04 | INFO | train_inner | epoch 005: 1325 / 3002 loss=2.469, ppl=5.54, wps=5687.5, ups=0.09, wpb=64821, bsz=128, num_updates=13254, lr=9.9902e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=152538 2021-06-20 13:01:15 | INFO | train_inner | epoch 005: 1326 / 3002 loss=2.541, ppl=5.82, wps=5778.3, ups=0.09, wpb=64803, bsz=128, num_updates=13255, lr=9.9902e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=152550 2021-06-20 13:01:26 | INFO | train_inner | epoch 005: 1327 / 3002 loss=2.533, ppl=5.79, wps=5952.4, ups=0.09, wpb=64853, bsz=128, num_updates=13256, lr=9.99019e-05, gnorm=1.989, loss_scale=4, train_wall=10, gb_free=2.8, wall=152561 2021-06-20 13:01:37 | INFO | train_inner | epoch 005: 1328 / 3002 loss=2.526, ppl=5.76, wps=5826.4, ups=0.09, wpb=64772, bsz=128, num_updates=13257, lr=9.99019e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=152572 2021-06-20 13:01:49 | INFO | train_inner | epoch 005: 1329 / 3002 loss=2.33, ppl=5.03, wps=5784.6, ups=0.09, wpb=64886, bsz=128, num_updates=13258, lr=9.99019e-05, gnorm=1.857, loss_scale=4, train_wall=11, gb_free=2.8, wall=152583 2021-06-20 13:02:00 | INFO | train_inner | epoch 005: 1330 / 3002 loss=2.532, ppl=5.78, wps=5788.6, ups=0.09, wpb=64793, bsz=128, num_updates=13259, lr=9.99019e-05, gnorm=1.938, loss_scale=4, train_wall=11, gb_free=2.8, wall=152594 2021-06-20 13:02:11 | INFO | train_inner | epoch 005: 1331 / 3002 loss=2.671, ppl=6.37, wps=5831, ups=0.09, wpb=64872, bsz=128, num_updates=13260, lr=9.99019e-05, gnorm=2.323, loss_scale=4, train_wall=11, gb_free=2.8, wall=152605 2021-06-20 13:02:22 | INFO | train_inner | epoch 005: 1332 / 3002 loss=2.632, ppl=6.2, wps=5909.2, ups=0.09, wpb=64831, bsz=128, num_updates=13261, lr=9.99019e-05, gnorm=2.075, loss_scale=4, train_wall=11, gb_free=2.8, wall=152616 2021-06-20 13:02:33 | INFO | train_inner | epoch 005: 1333 / 3002 loss=2.586, ppl=6, wps=5848.2, ups=0.09, wpb=64892, bsz=128, num_updates=13262, lr=9.99019e-05, gnorm=1.899, loss_scale=4, train_wall=11, gb_free=2.8, wall=152627 2021-06-20 13:02:44 | INFO | train_inner | epoch 005: 1334 / 3002 loss=2.592, ppl=6.03, wps=5779.4, ups=0.09, wpb=64781, bsz=128, num_updates=13263, lr=9.99019e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=152638 2021-06-20 13:02:55 | INFO | train_inner | epoch 005: 1335 / 3002 loss=2.473, ppl=5.55, wps=5970.2, ups=0.09, wpb=64836, bsz=128, num_updates=13264, lr=9.99019e-05, gnorm=1.973, loss_scale=4, train_wall=10, gb_free=2.8, wall=152649 2021-06-20 13:03:06 | INFO | train_inner | epoch 005: 1336 / 3002 loss=2.506, ppl=5.68, wps=5947, ups=0.09, wpb=64735, bsz=128, num_updates=13265, lr=9.99019e-05, gnorm=1.869, loss_scale=4, train_wall=10, gb_free=2.8, wall=152660 2021-06-20 13:03:17 | INFO | train_inner | epoch 005: 1337 / 3002 loss=2.585, ppl=6, wps=5831.5, ups=0.09, wpb=64832, bsz=128, num_updates=13266, lr=9.99019e-05, gnorm=2.095, loss_scale=4, train_wall=11, gb_free=2.8, wall=152671 2021-06-20 13:03:28 | INFO | train_inner | epoch 005: 1338 / 3002 loss=2.604, ppl=6.08, wps=5914, ups=0.09, wpb=64800, bsz=128, num_updates=13267, lr=9.99019e-05, gnorm=2.14, loss_scale=4, train_wall=11, gb_free=2.8, wall=152682 2021-06-20 13:03:39 | INFO | train_inner | epoch 005: 1339 / 3002 loss=2.532, ppl=5.78, wps=5825.5, ups=0.09, wpb=64852, bsz=128, num_updates=13268, lr=9.99018e-05, gnorm=1.884, loss_scale=4, train_wall=11, gb_free=2.8, wall=152693 2021-06-20 13:03:50 | INFO | train_inner | epoch 005: 1340 / 3002 loss=2.606, ppl=6.09, wps=5839.8, ups=0.09, wpb=64777, bsz=128, num_updates=13269, lr=9.99018e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=152705 2021-06-20 13:04:01 | INFO | train_inner | epoch 005: 1341 / 3002 loss=2.629, ppl=6.19, wps=5806.4, ups=0.09, wpb=64807, bsz=128, num_updates=13270, lr=9.99018e-05, gnorm=1.997, loss_scale=4, train_wall=11, gb_free=2.8, wall=152716 2021-06-20 13:04:12 | INFO | train_inner | epoch 005: 1342 / 3002 loss=2.557, ppl=5.88, wps=5885.5, ups=0.09, wpb=64838, bsz=128, num_updates=13271, lr=9.99018e-05, gnorm=2.029, loss_scale=4, train_wall=11, gb_free=2.8, wall=152727 2021-06-20 13:04:23 | INFO | train_inner | epoch 005: 1343 / 3002 loss=2.452, ppl=5.47, wps=5935.3, ups=0.09, wpb=64875, bsz=128, num_updates=13272, lr=9.99018e-05, gnorm=1.914, loss_scale=4, train_wall=10, gb_free=2.8, wall=152738 2021-06-20 13:04:35 | INFO | train_inner | epoch 005: 1344 / 3002 loss=2.469, ppl=5.54, wps=5743.7, ups=0.09, wpb=64792, bsz=128, num_updates=13273, lr=9.99018e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=152749 2021-06-20 13:04:46 | INFO | train_inner | epoch 005: 1345 / 3002 loss=2.49, ppl=5.62, wps=5882.6, ups=0.09, wpb=64796, bsz=128, num_updates=13274, lr=9.99018e-05, gnorm=1.943, loss_scale=4, train_wall=11, gb_free=2.8, wall=152760 2021-06-20 13:04:57 | INFO | train_inner | epoch 005: 1346 / 3002 loss=2.589, ppl=6.02, wps=5939.6, ups=0.09, wpb=64902, bsz=128, num_updates=13275, lr=9.99018e-05, gnorm=1.993, loss_scale=4, train_wall=10, gb_free=2.8, wall=152771 2021-06-20 13:05:08 | INFO | train_inner | epoch 005: 1347 / 3002 loss=2.58, ppl=5.98, wps=5847.3, ups=0.09, wpb=64805, bsz=128, num_updates=13276, lr=9.99018e-05, gnorm=2.04, loss_scale=4, train_wall=11, gb_free=2.8, wall=152782 2021-06-20 13:05:19 | INFO | train_inner | epoch 005: 1348 / 3002 loss=2.489, ppl=5.61, wps=5812.1, ups=0.09, wpb=64878, bsz=128, num_updates=13277, lr=9.99018e-05, gnorm=2.171, loss_scale=4, train_wall=11, gb_free=2.8, wall=152793 2021-06-20 13:05:30 | INFO | train_inner | epoch 005: 1349 / 3002 loss=2.415, ppl=5.33, wps=5793.9, ups=0.09, wpb=64827, bsz=128, num_updates=13278, lr=9.99018e-05, gnorm=1.916, loss_scale=4, train_wall=11, gb_free=2.8, wall=152804 2021-06-20 13:05:41 | INFO | train_inner | epoch 005: 1350 / 3002 loss=2.44, ppl=5.43, wps=5755.7, ups=0.09, wpb=64830, bsz=128, num_updates=13279, lr=9.99018e-05, gnorm=1.947, loss_scale=4, train_wall=11, gb_free=2.8, wall=152816 2021-06-20 13:05:52 | INFO | train_inner | epoch 005: 1351 / 3002 loss=2.472, ppl=5.55, wps=5905.1, ups=0.09, wpb=64795, bsz=128, num_updates=13280, lr=9.99018e-05, gnorm=2.04, loss_scale=4, train_wall=11, gb_free=2.8, wall=152827 2021-06-20 13:06:03 | INFO | train_inner | epoch 005: 1352 / 3002 loss=2.592, ppl=6.03, wps=5789.7, ups=0.09, wpb=64754, bsz=128, num_updates=13281, lr=9.99017e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=152838 2021-06-20 13:06:14 | INFO | train_inner | epoch 005: 1353 / 3002 loss=2.473, ppl=5.55, wps=5877.9, ups=0.09, wpb=64850, bsz=128, num_updates=13282, lr=9.99017e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=152849 2021-06-20 13:06:26 | INFO | train_inner | epoch 005: 1354 / 3002 loss=2.466, ppl=5.52, wps=5799.4, ups=0.09, wpb=64827, bsz=128, num_updates=13283, lr=9.99017e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=152860 2021-06-20 13:06:37 | INFO | train_inner | epoch 005: 1355 / 3002 loss=2.514, ppl=5.71, wps=5841.2, ups=0.09, wpb=64845, bsz=128, num_updates=13284, lr=9.99017e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=152871 2021-06-20 13:06:47 | INFO | train_inner | epoch 005: 1356 / 3002 loss=2.516, ppl=5.72, wps=6013.9, ups=0.09, wpb=64794, bsz=128, num_updates=13285, lr=9.99017e-05, gnorm=1.942, loss_scale=4, train_wall=10, gb_free=2.8, wall=152882 2021-06-20 13:06:59 | INFO | train_inner | epoch 005: 1357 / 3002 loss=2.479, ppl=5.58, wps=5729.2, ups=0.09, wpb=64810, bsz=128, num_updates=13286, lr=9.99017e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=152893 2021-06-20 13:07:10 | INFO | train_inner | epoch 005: 1358 / 3002 loss=2.392, ppl=5.25, wps=5776.9, ups=0.09, wpb=64838, bsz=128, num_updates=13287, lr=9.99017e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=152904 2021-06-20 13:07:21 | INFO | train_inner | epoch 005: 1359 / 3002 loss=2.633, ppl=6.2, wps=5800.3, ups=0.09, wpb=64811, bsz=128, num_updates=13288, lr=9.99017e-05, gnorm=2.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=152916 2021-06-20 13:07:32 | INFO | train_inner | epoch 005: 1360 / 3002 loss=2.433, ppl=5.4, wps=5842.5, ups=0.09, wpb=64809, bsz=128, num_updates=13289, lr=9.99017e-05, gnorm=2.01, loss_scale=4, train_wall=11, gb_free=2.8, wall=152927 2021-06-20 13:07:43 | INFO | train_inner | epoch 005: 1361 / 3002 loss=2.53, ppl=5.78, wps=5952.2, ups=0.09, wpb=64812, bsz=128, num_updates=13290, lr=9.99017e-05, gnorm=1.921, loss_scale=4, train_wall=10, gb_free=2.8, wall=152938 2021-06-20 13:07:54 | INFO | train_inner | epoch 005: 1362 / 3002 loss=2.565, ppl=5.92, wps=5878.5, ups=0.09, wpb=64901, bsz=128, num_updates=13291, lr=9.99017e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=152949 2021-06-20 13:08:05 | INFO | train_inner | epoch 005: 1363 / 3002 loss=2.592, ppl=6.03, wps=5764.7, ups=0.09, wpb=64741, bsz=128, num_updates=13292, lr=9.99017e-05, gnorm=1.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=152960 2021-06-20 13:08:16 | INFO | train_inner | epoch 005: 1364 / 3002 loss=2.408, ppl=5.31, wps=5926.5, ups=0.09, wpb=64865, bsz=128, num_updates=13293, lr=9.99016e-05, gnorm=1.947, loss_scale=4, train_wall=10, gb_free=2.8, wall=152971 2021-06-20 13:08:28 | INFO | train_inner | epoch 005: 1365 / 3002 loss=2.556, ppl=5.88, wps=5785.8, ups=0.09, wpb=64690, bsz=128, num_updates=13294, lr=9.99016e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=152982 2021-06-20 13:08:39 | INFO | train_inner | epoch 005: 1366 / 3002 loss=2.352, ppl=5.11, wps=5796.6, ups=0.09, wpb=64870, bsz=128, num_updates=13295, lr=9.99016e-05, gnorm=1.892, loss_scale=4, train_wall=11, gb_free=2.8, wall=152993 2021-06-20 13:08:50 | INFO | train_inner | epoch 005: 1367 / 3002 loss=2.569, ppl=5.94, wps=6000.7, ups=0.09, wpb=64870, bsz=128, num_updates=13296, lr=9.99016e-05, gnorm=2.078, loss_scale=4, train_wall=10, gb_free=2.8, wall=153004 2021-06-20 13:09:01 | INFO | train_inner | epoch 005: 1368 / 3002 loss=2.494, ppl=5.63, wps=5709.2, ups=0.09, wpb=64749, bsz=128, num_updates=13297, lr=9.99016e-05, gnorm=2.09, loss_scale=4, train_wall=11, gb_free=2.8, wall=153015 2021-06-20 13:09:12 | INFO | train_inner | epoch 005: 1369 / 3002 loss=2.677, ppl=6.39, wps=5801.1, ups=0.09, wpb=64778, bsz=128, num_updates=13298, lr=9.99016e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=153026 2021-06-20 13:09:23 | INFO | train_inner | epoch 005: 1370 / 3002 loss=2.535, ppl=5.8, wps=5887, ups=0.09, wpb=64838, bsz=128, num_updates=13299, lr=9.99016e-05, gnorm=1.943, loss_scale=4, train_wall=11, gb_free=2.8, wall=153037 2021-06-20 13:09:34 | INFO | train_inner | epoch 005: 1371 / 3002 loss=2.565, ppl=5.92, wps=5892.2, ups=0.09, wpb=64818, bsz=128, num_updates=13300, lr=9.99016e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=153048 2021-06-20 13:09:45 | INFO | train_inner | epoch 005: 1372 / 3002 loss=2.446, ppl=5.45, wps=5878.2, ups=0.09, wpb=64866, bsz=128, num_updates=13301, lr=9.99016e-05, gnorm=1.986, loss_scale=4, train_wall=11, gb_free=2.8, wall=153059 2021-06-20 13:09:56 | INFO | train_inner | epoch 005: 1373 / 3002 loss=2.492, ppl=5.63, wps=5896.3, ups=0.09, wpb=64852, bsz=128, num_updates=13302, lr=9.99016e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=153070 2021-06-20 13:10:07 | INFO | train_inner | epoch 005: 1374 / 3002 loss=2.458, ppl=5.5, wps=5898, ups=0.09, wpb=64829, bsz=128, num_updates=13303, lr=9.99016e-05, gnorm=2.011, loss_scale=4, train_wall=11, gb_free=2.8, wall=153081 2021-06-20 13:10:18 | INFO | train_inner | epoch 005: 1375 / 3002 loss=2.543, ppl=5.83, wps=5904.9, ups=0.09, wpb=64862, bsz=128, num_updates=13304, lr=9.99016e-05, gnorm=1.959, loss_scale=4, train_wall=11, gb_free=2.8, wall=153092 2021-06-20 13:10:29 | INFO | train_inner | epoch 005: 1376 / 3002 loss=2.5, ppl=5.66, wps=5750, ups=0.09, wpb=64838, bsz=128, num_updates=13305, lr=9.99016e-05, gnorm=2.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=153104 2021-06-20 13:10:40 | INFO | train_inner | epoch 005: 1377 / 3002 loss=2.588, ppl=6.01, wps=5855.6, ups=0.09, wpb=64816, bsz=128, num_updates=13306, lr=9.99015e-05, gnorm=1.949, loss_scale=4, train_wall=11, gb_free=2.8, wall=153115 2021-06-20 13:10:52 | INFO | train_inner | epoch 005: 1378 / 3002 loss=2.51, ppl=5.7, wps=5844.8, ups=0.09, wpb=64846, bsz=128, num_updates=13307, lr=9.99015e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=153126 2021-06-20 13:11:03 | INFO | train_inner | epoch 005: 1379 / 3002 loss=2.438, ppl=5.42, wps=5716.7, ups=0.09, wpb=64849, bsz=128, num_updates=13308, lr=9.99015e-05, gnorm=1.904, loss_scale=4, train_wall=11, gb_free=2.8, wall=153137 2021-06-20 13:11:14 | INFO | train_inner | epoch 005: 1380 / 3002 loss=2.428, ppl=5.38, wps=5906.9, ups=0.09, wpb=64863, bsz=128, num_updates=13309, lr=9.99015e-05, gnorm=1.961, loss_scale=4, train_wall=11, gb_free=2.8, wall=153148 2021-06-20 13:11:25 | INFO | train_inner | epoch 005: 1381 / 3002 loss=2.455, ppl=5.48, wps=5893.8, ups=0.09, wpb=64798, bsz=128, num_updates=13310, lr=9.99015e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=153159 2021-06-20 13:11:36 | INFO | train_inner | epoch 005: 1382 / 3002 loss=2.442, ppl=5.43, wps=5730.4, ups=0.09, wpb=64853, bsz=128, num_updates=13311, lr=9.99015e-05, gnorm=1.964, loss_scale=4, train_wall=11, gb_free=2.8, wall=153171 2021-06-20 13:11:47 | INFO | train_inner | epoch 005: 1383 / 3002 loss=2.543, ppl=5.83, wps=5892.5, ups=0.09, wpb=64819, bsz=128, num_updates=13312, lr=9.99015e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=153182 2021-06-20 13:11:59 | INFO | train_inner | epoch 005: 1384 / 3002 loss=2.681, ppl=6.41, wps=5704.2, ups=0.09, wpb=64800, bsz=128, num_updates=13313, lr=9.99015e-05, gnorm=2.036, loss_scale=4, train_wall=11, gb_free=2.8, wall=153193 2021-06-20 13:12:10 | INFO | train_inner | epoch 005: 1385 / 3002 loss=2.605, ppl=6.08, wps=5789.3, ups=0.09, wpb=64721, bsz=128, num_updates=13314, lr=9.99015e-05, gnorm=2.029, loss_scale=4, train_wall=11, gb_free=2.8, wall=153204 2021-06-20 13:12:21 | INFO | train_inner | epoch 005: 1386 / 3002 loss=2.433, ppl=5.4, wps=5775.8, ups=0.09, wpb=64839, bsz=128, num_updates=13315, lr=9.99015e-05, gnorm=2.045, loss_scale=4, train_wall=11, gb_free=2.8, wall=153215 2021-06-20 13:12:32 | INFO | train_inner | epoch 005: 1387 / 3002 loss=2.476, ppl=5.56, wps=5971.7, ups=0.09, wpb=64843, bsz=128, num_updates=13316, lr=9.99015e-05, gnorm=1.983, loss_scale=4, train_wall=10, gb_free=2.8, wall=153226 2021-06-20 13:12:43 | INFO | train_inner | epoch 005: 1388 / 3002 loss=2.599, ppl=6.06, wps=5826.7, ups=0.09, wpb=64824, bsz=128, num_updates=13317, lr=9.99015e-05, gnorm=1.961, loss_scale=4, train_wall=11, gb_free=2.8, wall=153237 2021-06-20 13:12:54 | INFO | train_inner | epoch 005: 1389 / 3002 loss=2.405, ppl=5.3, wps=5941.5, ups=0.09, wpb=64851, bsz=128, num_updates=13318, lr=9.99014e-05, gnorm=1.976, loss_scale=4, train_wall=10, gb_free=2.8, wall=153248 2021-06-20 13:13:05 | INFO | train_inner | epoch 005: 1390 / 3002 loss=2.567, ppl=5.93, wps=5696.5, ups=0.09, wpb=64757, bsz=128, num_updates=13319, lr=9.99014e-05, gnorm=1.962, loss_scale=4, train_wall=11, gb_free=2.8, wall=153260 2021-06-20 13:13:16 | INFO | train_inner | epoch 005: 1391 / 3002 loss=2.369, ppl=5.17, wps=5840.5, ups=0.09, wpb=64843, bsz=128, num_updates=13320, lr=9.99014e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=153271 2021-06-20 13:13:27 | INFO | train_inner | epoch 005: 1392 / 3002 loss=2.462, ppl=5.51, wps=5929.9, ups=0.09, wpb=64924, bsz=128, num_updates=13321, lr=9.99014e-05, gnorm=1.915, loss_scale=4, train_wall=11, gb_free=2.8, wall=153282 2021-06-20 13:13:38 | INFO | train_inner | epoch 005: 1393 / 3002 loss=2.483, ppl=5.59, wps=5992.5, ups=0.09, wpb=64834, bsz=128, num_updates=13322, lr=9.99014e-05, gnorm=2.031, loss_scale=4, train_wall=10, gb_free=2.8, wall=153292 2021-06-20 13:13:49 | INFO | train_inner | epoch 005: 1394 / 3002 loss=2.357, ppl=5.12, wps=5848, ups=0.09, wpb=64878, bsz=128, num_updates=13323, lr=9.99014e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=153304 2021-06-20 13:14:00 | INFO | train_inner | epoch 005: 1395 / 3002 loss=2.451, ppl=5.47, wps=5792.3, ups=0.09, wpb=64873, bsz=128, num_updates=13324, lr=9.99014e-05, gnorm=2.047, loss_scale=4, train_wall=11, gb_free=2.8, wall=153315 2021-06-20 13:14:12 | INFO | train_inner | epoch 005: 1396 / 3002 loss=2.558, ppl=5.89, wps=5751.9, ups=0.09, wpb=64800, bsz=128, num_updates=13325, lr=9.99014e-05, gnorm=2.076, loss_scale=4, train_wall=11, gb_free=2.8, wall=153326 2021-06-20 13:14:23 | INFO | train_inner | epoch 005: 1397 / 3002 loss=2.593, ppl=6.03, wps=5853.7, ups=0.09, wpb=64857, bsz=128, num_updates=13326, lr=9.99014e-05, gnorm=2.076, loss_scale=4, train_wall=11, gb_free=2.8, wall=153337 2021-06-20 13:14:34 | INFO | train_inner | epoch 005: 1398 / 3002 loss=2.561, ppl=5.9, wps=6009.9, ups=0.09, wpb=64869, bsz=128, num_updates=13327, lr=9.99014e-05, gnorm=2.036, loss_scale=4, train_wall=10, gb_free=2.8, wall=153348 2021-06-20 13:14:45 | INFO | train_inner | epoch 005: 1399 / 3002 loss=2.487, ppl=5.6, wps=5877.9, ups=0.09, wpb=64771, bsz=128, num_updates=13328, lr=9.99014e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=153359 2021-06-20 13:14:56 | INFO | train_inner | epoch 005: 1400 / 3002 loss=2.616, ppl=6.13, wps=5760.3, ups=0.09, wpb=64853, bsz=128, num_updates=13329, lr=9.99014e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=153370 2021-06-20 13:15:07 | INFO | train_inner | epoch 005: 1401 / 3002 loss=2.387, ppl=5.23, wps=5881.7, ups=0.09, wpb=64906, bsz=128, num_updates=13330, lr=9.99014e-05, gnorm=1.974, loss_scale=4, train_wall=11, gb_free=2.8, wall=153381 2021-06-20 13:15:18 | INFO | train_inner | epoch 005: 1402 / 3002 loss=2.375, ppl=5.19, wps=5810, ups=0.09, wpb=64842, bsz=128, num_updates=13331, lr=9.99013e-05, gnorm=1.894, loss_scale=4, train_wall=11, gb_free=2.8, wall=153392 2021-06-20 13:15:29 | INFO | train_inner | epoch 005: 1403 / 3002 loss=2.393, ppl=5.25, wps=5823.1, ups=0.09, wpb=64863, bsz=128, num_updates=13332, lr=9.99013e-05, gnorm=1.875, loss_scale=4, train_wall=11, gb_free=2.8, wall=153403 2021-06-20 13:15:40 | INFO | train_inner | epoch 005: 1404 / 3002 loss=2.446, ppl=5.45, wps=5843.6, ups=0.09, wpb=64793, bsz=128, num_updates=13333, lr=9.99013e-05, gnorm=1.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=153415 2021-06-20 13:15:51 | INFO | train_inner | epoch 005: 1405 / 3002 loss=2.405, ppl=5.3, wps=5866.3, ups=0.09, wpb=64911, bsz=128, num_updates=13334, lr=9.99013e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=153426 2021-06-20 13:16:02 | INFO | train_inner | epoch 005: 1406 / 3002 loss=2.573, ppl=5.95, wps=5839.3, ups=0.09, wpb=64867, bsz=128, num_updates=13335, lr=9.99013e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=153437 2021-06-20 13:16:13 | INFO | train_inner | epoch 005: 1407 / 3002 loss=2.667, ppl=6.35, wps=5861.6, ups=0.09, wpb=64800, bsz=128, num_updates=13336, lr=9.99013e-05, gnorm=1.981, loss_scale=4, train_wall=11, gb_free=2.8, wall=153448 2021-06-20 13:16:25 | INFO | train_inner | epoch 005: 1408 / 3002 loss=2.528, ppl=5.77, wps=5786.3, ups=0.09, wpb=64822, bsz=128, num_updates=13337, lr=9.99013e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=153459 2021-06-20 13:16:36 | INFO | train_inner | epoch 005: 1409 / 3002 loss=2.515, ppl=5.72, wps=5858, ups=0.09, wpb=64725, bsz=128, num_updates=13338, lr=9.99013e-05, gnorm=1.899, loss_scale=4, train_wall=11, gb_free=2.8, wall=153470 2021-06-20 13:16:47 | INFO | train_inner | epoch 005: 1410 / 3002 loss=2.478, ppl=5.57, wps=5695.7, ups=0.09, wpb=64709, bsz=128, num_updates=13339, lr=9.99013e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=153481 2021-06-20 13:16:58 | INFO | train_inner | epoch 005: 1411 / 3002 loss=2.466, ppl=5.52, wps=5902.2, ups=0.09, wpb=64783, bsz=128, num_updates=13340, lr=9.99013e-05, gnorm=1.888, loss_scale=4, train_wall=11, gb_free=2.8, wall=153492 2021-06-20 13:17:09 | INFO | train_inner | epoch 005: 1412 / 3002 loss=2.459, ppl=5.5, wps=5826.1, ups=0.09, wpb=64816, bsz=128, num_updates=13341, lr=9.99013e-05, gnorm=1.874, loss_scale=4, train_wall=11, gb_free=2.8, wall=153504 2021-06-20 13:17:20 | INFO | train_inner | epoch 005: 1413 / 3002 loss=2.434, ppl=5.4, wps=5881.5, ups=0.09, wpb=64832, bsz=128, num_updates=13342, lr=9.99013e-05, gnorm=1.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=153515 2021-06-20 13:17:31 | INFO | train_inner | epoch 005: 1414 / 3002 loss=2.556, ppl=5.88, wps=5885, ups=0.09, wpb=64917, bsz=128, num_updates=13343, lr=9.99012e-05, gnorm=2.122, loss_scale=4, train_wall=11, gb_free=2.8, wall=153526 2021-06-20 13:17:42 | INFO | train_inner | epoch 005: 1415 / 3002 loss=2.528, ppl=5.77, wps=5821, ups=0.09, wpb=64856, bsz=128, num_updates=13344, lr=9.99012e-05, gnorm=1.937, loss_scale=4, train_wall=11, gb_free=2.8, wall=153537 2021-06-20 13:17:54 | INFO | train_inner | epoch 005: 1416 / 3002 loss=2.703, ppl=6.51, wps=5803.5, ups=0.09, wpb=64757, bsz=128, num_updates=13345, lr=9.99012e-05, gnorm=2.109, loss_scale=4, train_wall=11, gb_free=2.8, wall=153548 2021-06-20 13:18:05 | INFO | train_inner | epoch 005: 1417 / 3002 loss=2.447, ppl=5.45, wps=5848.5, ups=0.09, wpb=64960, bsz=128, num_updates=13346, lr=9.99012e-05, gnorm=7.369, loss_scale=4, train_wall=11, gb_free=2.8, wall=153559 2021-06-20 13:18:16 | INFO | train_inner | epoch 005: 1418 / 3002 loss=2.635, ppl=6.21, wps=5852.7, ups=0.09, wpb=64854, bsz=128, num_updates=13347, lr=9.99012e-05, gnorm=1.89, loss_scale=4, train_wall=11, gb_free=2.8, wall=153570 2021-06-20 13:18:27 | INFO | train_inner | epoch 005: 1419 / 3002 loss=2.467, ppl=5.53, wps=5864.1, ups=0.09, wpb=64813, bsz=128, num_updates=13348, lr=9.99012e-05, gnorm=1.956, loss_scale=4, train_wall=11, gb_free=2.8, wall=153581 2021-06-20 13:18:38 | INFO | train_inner | epoch 005: 1420 / 3002 loss=2.528, ppl=5.77, wps=5750.8, ups=0.09, wpb=64786, bsz=128, num_updates=13349, lr=9.99012e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=153592 2021-06-20 13:18:49 | INFO | train_inner | epoch 005: 1421 / 3002 loss=2.599, ppl=6.06, wps=5858.4, ups=0.09, wpb=64854, bsz=128, num_updates=13350, lr=9.99012e-05, gnorm=2.073, loss_scale=4, train_wall=11, gb_free=2.8, wall=153603 2021-06-20 13:19:00 | INFO | train_inner | epoch 005: 1422 / 3002 loss=2.549, ppl=5.85, wps=5882.6, ups=0.09, wpb=64845, bsz=128, num_updates=13351, lr=9.99012e-05, gnorm=2.023, loss_scale=4, train_wall=11, gb_free=2.8, wall=153614 2021-06-20 13:19:11 | INFO | train_inner | epoch 005: 1423 / 3002 loss=2.707, ppl=6.53, wps=5806.4, ups=0.09, wpb=64821, bsz=128, num_updates=13352, lr=9.99012e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=153626 2021-06-20 13:19:22 | INFO | train_inner | epoch 005: 1424 / 3002 loss=2.626, ppl=6.17, wps=5852.5, ups=0.09, wpb=64822, bsz=128, num_updates=13353, lr=9.99012e-05, gnorm=2.041, loss_scale=4, train_wall=11, gb_free=2.8, wall=153637 2021-06-20 13:19:33 | INFO | train_inner | epoch 005: 1425 / 3002 loss=2.574, ppl=5.96, wps=5853.2, ups=0.09, wpb=64810, bsz=128, num_updates=13354, lr=9.99012e-05, gnorm=1.93, loss_scale=4, train_wall=11, gb_free=2.8, wall=153648 2021-06-20 13:19:45 | INFO | train_inner | epoch 005: 1426 / 3002 loss=2.472, ppl=5.55, wps=5759.3, ups=0.09, wpb=64870, bsz=128, num_updates=13355, lr=9.99012e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=153659 2021-06-20 13:19:56 | INFO | train_inner | epoch 005: 1427 / 3002 loss=2.565, ppl=5.92, wps=5942.6, ups=0.09, wpb=64815, bsz=128, num_updates=13356, lr=9.99011e-05, gnorm=1.993, loss_scale=8, train_wall=10, gb_free=2.8, wall=153670 2021-06-20 13:20:07 | INFO | train_inner | epoch 005: 1428 / 3002 loss=2.601, ppl=6.07, wps=5819, ups=0.09, wpb=64817, bsz=128, num_updates=13357, lr=9.99011e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=153681 2021-06-20 13:20:18 | INFO | train_inner | epoch 005: 1429 / 3002 loss=2.686, ppl=6.44, wps=5776.3, ups=0.09, wpb=64864, bsz=128, num_updates=13358, lr=9.99011e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=153692 2021-06-20 13:20:29 | INFO | train_inner | epoch 005: 1430 / 3002 loss=2.708, ppl=6.53, wps=5830.6, ups=0.09, wpb=64814, bsz=128, num_updates=13359, lr=9.99011e-05, gnorm=2.039, loss_scale=8, train_wall=11, gb_free=2.8, wall=153703 2021-06-20 13:20:40 | INFO | train_inner | epoch 005: 1431 / 3002 loss=2.465, ppl=5.52, wps=6028.2, ups=0.09, wpb=64877, bsz=128, num_updates=13360, lr=9.99011e-05, gnorm=2.115, loss_scale=8, train_wall=10, gb_free=2.8, wall=153714 2021-06-20 13:20:51 | INFO | train_inner | epoch 005: 1432 / 3002 loss=2.397, ppl=5.27, wps=5796.3, ups=0.09, wpb=64845, bsz=128, num_updates=13361, lr=9.99011e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=153725 2021-06-20 13:21:02 | INFO | train_inner | epoch 005: 1433 / 3002 loss=2.649, ppl=6.27, wps=5899.4, ups=0.09, wpb=64863, bsz=128, num_updates=13362, lr=9.99011e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=153736 2021-06-20 13:21:13 | INFO | train_inner | epoch 005: 1434 / 3002 loss=2.528, ppl=5.77, wps=5908.1, ups=0.09, wpb=64862, bsz=128, num_updates=13363, lr=9.99011e-05, gnorm=1.96, loss_scale=8, train_wall=10, gb_free=2.8, wall=153747 2021-06-20 13:21:24 | INFO | train_inner | epoch 005: 1435 / 3002 loss=2.554, ppl=5.87, wps=5865.9, ups=0.09, wpb=64935, bsz=128, num_updates=13364, lr=9.99011e-05, gnorm=2.011, loss_scale=8, train_wall=11, gb_free=2.8, wall=153758 2021-06-20 13:21:35 | INFO | train_inner | epoch 005: 1436 / 3002 loss=2.639, ppl=6.23, wps=5886.6, ups=0.09, wpb=64888, bsz=128, num_updates=13365, lr=9.99011e-05, gnorm=2.449, loss_scale=8, train_wall=11, gb_free=2.8, wall=153769 2021-06-20 13:21:46 | INFO | train_inner | epoch 005: 1437 / 3002 loss=2.619, ppl=6.14, wps=5821.7, ups=0.09, wpb=64790, bsz=128, num_updates=13366, lr=9.99011e-05, gnorm=3.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=153781 2021-06-20 13:21:57 | INFO | train_inner | epoch 005: 1438 / 3002 loss=2.591, ppl=6.03, wps=5879.8, ups=0.09, wpb=64767, bsz=128, num_updates=13367, lr=9.99011e-05, gnorm=2.055, loss_scale=8, train_wall=11, gb_free=2.8, wall=153792 2021-06-20 13:22:08 | INFO | train_inner | epoch 005: 1439 / 3002 loss=2.466, ppl=5.53, wps=5884.4, ups=0.09, wpb=64873, bsz=128, num_updates=13368, lr=9.9901e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=153803 2021-06-20 13:22:20 | INFO | train_inner | epoch 005: 1440 / 3002 loss=2.485, ppl=5.6, wps=5778.1, ups=0.09, wpb=64846, bsz=128, num_updates=13369, lr=9.9901e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=153814 2021-06-20 13:22:31 | INFO | train_inner | epoch 005: 1441 / 3002 loss=2.652, ppl=6.28, wps=5839.5, ups=0.09, wpb=64788, bsz=128, num_updates=13370, lr=9.9901e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=153825 2021-06-20 13:22:42 | INFO | train_inner | epoch 005: 1442 / 3002 loss=2.499, ppl=5.65, wps=5802.6, ups=0.09, wpb=64905, bsz=128, num_updates=13371, lr=9.9901e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=153836 2021-06-20 13:22:53 | INFO | train_inner | epoch 005: 1443 / 3002 loss=2.54, ppl=5.81, wps=5973.4, ups=0.09, wpb=64885, bsz=128, num_updates=13372, lr=9.9901e-05, gnorm=3.715, loss_scale=8, train_wall=10, gb_free=2.8, wall=153847 2021-06-20 13:23:04 | INFO | train_inner | epoch 005: 1444 / 3002 loss=2.409, ppl=5.31, wps=5851.8, ups=0.09, wpb=64881, bsz=128, num_updates=13373, lr=9.9901e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=153858 2021-06-20 13:23:15 | INFO | train_inner | epoch 005: 1445 / 3002 loss=2.447, ppl=5.45, wps=5821.1, ups=0.09, wpb=64840, bsz=128, num_updates=13374, lr=9.9901e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=153869 2021-06-20 13:23:26 | INFO | train_inner | epoch 005: 1446 / 3002 loss=2.424, ppl=5.37, wps=5953.6, ups=0.09, wpb=64862, bsz=128, num_updates=13375, lr=9.9901e-05, gnorm=2.002, loss_scale=8, train_wall=10, gb_free=2.8, wall=153880 2021-06-20 13:23:37 | INFO | train_inner | epoch 005: 1447 / 3002 loss=2.506, ppl=5.68, wps=5849, ups=0.09, wpb=64839, bsz=128, num_updates=13376, lr=9.9901e-05, gnorm=2.123, loss_scale=8, train_wall=11, gb_free=2.8, wall=153891 2021-06-20 13:23:48 | INFO | train_inner | epoch 005: 1448 / 3002 loss=2.578, ppl=5.97, wps=5769.9, ups=0.09, wpb=64885, bsz=128, num_updates=13377, lr=9.9901e-05, gnorm=2.085, loss_scale=8, train_wall=11, gb_free=2.8, wall=153902 2021-06-20 13:23:59 | INFO | train_inner | epoch 005: 1449 / 3002 loss=2.405, ppl=5.3, wps=6004.4, ups=0.09, wpb=64830, bsz=128, num_updates=13378, lr=9.9901e-05, gnorm=2.061, loss_scale=8, train_wall=10, gb_free=2.8, wall=153913 2021-06-20 13:24:10 | INFO | train_inner | epoch 005: 1450 / 3002 loss=2.544, ppl=5.83, wps=5810.1, ups=0.09, wpb=64836, bsz=128, num_updates=13379, lr=9.9901e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=153924 2021-06-20 13:24:21 | INFO | train_inner | epoch 005: 1451 / 3002 loss=2.389, ppl=5.24, wps=5791.3, ups=0.09, wpb=64786, bsz=128, num_updates=13380, lr=9.9901e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=153936 2021-06-20 13:24:32 | INFO | train_inner | epoch 005: 1452 / 3002 loss=2.616, ppl=6.13, wps=5884, ups=0.09, wpb=64928, bsz=128, num_updates=13381, lr=9.99009e-05, gnorm=2.029, loss_scale=8, train_wall=11, gb_free=2.8, wall=153947 2021-06-20 13:24:43 | INFO | train_inner | epoch 005: 1453 / 3002 loss=2.5, ppl=5.66, wps=5856.4, ups=0.09, wpb=64824, bsz=128, num_updates=13382, lr=9.99009e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=153958 2021-06-20 13:24:54 | INFO | train_inner | epoch 005: 1454 / 3002 loss=2.568, ppl=5.93, wps=5944.5, ups=0.09, wpb=64880, bsz=128, num_updates=13383, lr=9.99009e-05, gnorm=1.946, loss_scale=8, train_wall=10, gb_free=2.8, wall=153969 2021-06-20 13:25:06 | INFO | train_inner | epoch 005: 1455 / 3002 loss=2.575, ppl=5.96, wps=5709, ups=0.09, wpb=64763, bsz=128, num_updates=13384, lr=9.99009e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=153980 2021-06-20 13:25:17 | INFO | train_inner | epoch 005: 1456 / 3002 loss=2.424, ppl=5.37, wps=5811.8, ups=0.09, wpb=64888, bsz=128, num_updates=13385, lr=9.99009e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=153991 2021-06-20 13:25:28 | INFO | train_inner | epoch 005: 1457 / 3002 loss=2.594, ppl=6.04, wps=5926.5, ups=0.09, wpb=64831, bsz=128, num_updates=13386, lr=9.99009e-05, gnorm=1.995, loss_scale=8, train_wall=10, gb_free=2.8, wall=154002 2021-06-20 13:25:39 | INFO | train_inner | epoch 005: 1458 / 3002 loss=2.421, ppl=5.36, wps=6014.3, ups=0.09, wpb=64875, bsz=128, num_updates=13387, lr=9.99009e-05, gnorm=2.054, loss_scale=8, train_wall=10, gb_free=2.8, wall=154013 2021-06-20 13:25:50 | INFO | train_inner | epoch 005: 1459 / 3002 loss=2.43, ppl=5.39, wps=5799, ups=0.09, wpb=64844, bsz=128, num_updates=13388, lr=9.99009e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=154024 2021-06-20 13:26:01 | INFO | train_inner | epoch 005: 1460 / 3002 loss=2.499, ppl=5.65, wps=5922.6, ups=0.09, wpb=64868, bsz=128, num_updates=13389, lr=9.99009e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=154035 2021-06-20 13:26:12 | INFO | train_inner | epoch 005: 1461 / 3002 loss=2.448, ppl=5.46, wps=5840.8, ups=0.09, wpb=64800, bsz=128, num_updates=13390, lr=9.99009e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=154046 2021-06-20 13:26:23 | INFO | train_inner | epoch 005: 1462 / 3002 loss=2.548, ppl=5.85, wps=5869.4, ups=0.09, wpb=64780, bsz=128, num_updates=13391, lr=9.99009e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=154057 2021-06-20 13:26:34 | INFO | train_inner | epoch 005: 1463 / 3002 loss=2.491, ppl=5.62, wps=5826.7, ups=0.09, wpb=64915, bsz=128, num_updates=13392, lr=9.99009e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=154068 2021-06-20 13:26:45 | INFO | train_inner | epoch 005: 1464 / 3002 loss=2.487, ppl=5.61, wps=5824.3, ups=0.09, wpb=64845, bsz=128, num_updates=13393, lr=9.99008e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=154079 2021-06-20 13:26:56 | INFO | train_inner | epoch 005: 1465 / 3002 loss=2.494, ppl=5.63, wps=5837.1, ups=0.09, wpb=64905, bsz=128, num_updates=13394, lr=9.99008e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=154091 2021-06-20 13:27:07 | INFO | train_inner | epoch 005: 1466 / 3002 loss=2.534, ppl=5.79, wps=5828.2, ups=0.09, wpb=64779, bsz=128, num_updates=13395, lr=9.99008e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=154102 2021-06-20 13:27:18 | INFO | train_inner | epoch 005: 1467 / 3002 loss=2.399, ppl=5.27, wps=5921.2, ups=0.09, wpb=64824, bsz=128, num_updates=13396, lr=9.99008e-05, gnorm=1.972, loss_scale=8, train_wall=10, gb_free=2.8, wall=154113 2021-06-20 13:27:29 | INFO | train_inner | epoch 005: 1468 / 3002 loss=2.542, ppl=5.82, wps=5836.1, ups=0.09, wpb=64858, bsz=128, num_updates=13397, lr=9.99008e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=154124 2021-06-20 13:27:40 | INFO | train_inner | epoch 005: 1469 / 3002 loss=2.483, ppl=5.59, wps=5846.2, ups=0.09, wpb=64817, bsz=128, num_updates=13398, lr=9.99008e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=154135 2021-06-20 13:27:51 | INFO | train_inner | epoch 005: 1470 / 3002 loss=2.451, ppl=5.47, wps=5904.9, ups=0.09, wpb=64849, bsz=128, num_updates=13399, lr=9.99008e-05, gnorm=2.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=154146 2021-06-20 13:28:03 | INFO | train_inner | epoch 005: 1471 / 3002 loss=2.522, ppl=5.74, wps=5776.2, ups=0.09, wpb=64825, bsz=128, num_updates=13400, lr=9.99008e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=154157 2021-06-20 13:28:14 | INFO | train_inner | epoch 005: 1472 / 3002 loss=2.495, ppl=5.64, wps=5821.8, ups=0.09, wpb=64813, bsz=128, num_updates=13401, lr=9.99008e-05, gnorm=1.878, loss_scale=8, train_wall=11, gb_free=2.8, wall=154168 2021-06-20 13:28:25 | INFO | train_inner | epoch 005: 1473 / 3002 loss=2.573, ppl=5.95, wps=5780.7, ups=0.09, wpb=64821, bsz=128, num_updates=13402, lr=9.99008e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=154179 2021-06-20 13:28:36 | INFO | train_inner | epoch 005: 1474 / 3002 loss=2.452, ppl=5.47, wps=5877.5, ups=0.09, wpb=64837, bsz=128, num_updates=13403, lr=9.99008e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=154190 2021-06-20 13:28:47 | INFO | train_inner | epoch 005: 1475 / 3002 loss=2.592, ppl=6.03, wps=5846.9, ups=0.09, wpb=64821, bsz=128, num_updates=13404, lr=9.99008e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=154201 2021-06-20 13:28:58 | INFO | train_inner | epoch 005: 1476 / 3002 loss=2.548, ppl=5.85, wps=5861.6, ups=0.09, wpb=64710, bsz=128, num_updates=13405, lr=9.99008e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=154212 2021-06-20 13:29:09 | INFO | train_inner | epoch 005: 1477 / 3002 loss=2.647, ppl=6.26, wps=5921.2, ups=0.09, wpb=64818, bsz=128, num_updates=13406, lr=9.99007e-05, gnorm=1.962, loss_scale=8, train_wall=10, gb_free=2.8, wall=154223 2021-06-20 13:29:20 | INFO | train_inner | epoch 005: 1478 / 3002 loss=2.405, ppl=5.29, wps=5799.5, ups=0.09, wpb=64829, bsz=128, num_updates=13407, lr=9.99007e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=154235 2021-06-20 13:29:31 | INFO | train_inner | epoch 005: 1479 / 3002 loss=2.68, ppl=6.41, wps=5851.6, ups=0.09, wpb=64827, bsz=128, num_updates=13408, lr=9.99007e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=154246 2021-06-20 13:29:42 | INFO | train_inner | epoch 005: 1480 / 3002 loss=2.473, ppl=5.55, wps=5829.1, ups=0.09, wpb=64799, bsz=128, num_updates=13409, lr=9.99007e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=154257 2021-06-20 13:29:54 | INFO | train_inner | epoch 005: 1481 / 3002 loss=2.381, ppl=5.21, wps=5753.4, ups=0.09, wpb=64830, bsz=128, num_updates=13410, lr=9.99007e-05, gnorm=2.178, loss_scale=8, train_wall=11, gb_free=2.8, wall=154268 2021-06-20 13:30:05 | INFO | train_inner | epoch 005: 1482 / 3002 loss=2.582, ppl=5.99, wps=5785.7, ups=0.09, wpb=64814, bsz=128, num_updates=13411, lr=9.99007e-05, gnorm=1.994, loss_scale=8, train_wall=11, gb_free=2.8, wall=154279 2021-06-20 13:30:16 | INFO | train_inner | epoch 005: 1483 / 3002 loss=2.341, ppl=5.06, wps=5843.7, ups=0.09, wpb=64884, bsz=128, num_updates=13412, lr=9.99007e-05, gnorm=2.086, loss_scale=8, train_wall=11, gb_free=2.8, wall=154290 2021-06-20 13:30:27 | INFO | train_inner | epoch 005: 1484 / 3002 loss=2.613, ppl=6.12, wps=5864.5, ups=0.09, wpb=64851, bsz=128, num_updates=13413, lr=9.99007e-05, gnorm=2.01, loss_scale=8, train_wall=11, gb_free=2.8, wall=154301 2021-06-20 13:30:38 | INFO | train_inner | epoch 005: 1485 / 3002 loss=2.667, ppl=6.35, wps=5885.7, ups=0.09, wpb=64855, bsz=128, num_updates=13414, lr=9.99007e-05, gnorm=1.968, loss_scale=8, train_wall=11, gb_free=2.8, wall=154312 2021-06-20 13:30:49 | INFO | train_inner | epoch 005: 1486 / 3002 loss=2.481, ppl=5.58, wps=5811.3, ups=0.09, wpb=64803, bsz=128, num_updates=13415, lr=9.99007e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=154324 2021-06-20 13:31:00 | INFO | train_inner | epoch 005: 1487 / 3002 loss=2.491, ppl=5.62, wps=5799.1, ups=0.09, wpb=64822, bsz=128, num_updates=13416, lr=9.99007e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=154335 2021-06-20 13:31:11 | INFO | train_inner | epoch 005: 1488 / 3002 loss=2.545, ppl=5.83, wps=5885.7, ups=0.09, wpb=64837, bsz=128, num_updates=13417, lr=9.99007e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=154346 2021-06-20 13:31:23 | INFO | train_inner | epoch 005: 1489 / 3002 loss=2.548, ppl=5.85, wps=5848.3, ups=0.09, wpb=64808, bsz=128, num_updates=13418, lr=9.99006e-05, gnorm=2.01, loss_scale=8, train_wall=11, gb_free=2.8, wall=154357 2021-06-20 13:31:34 | INFO | train_inner | epoch 005: 1490 / 3002 loss=2.429, ppl=5.39, wps=5876.8, ups=0.09, wpb=64913, bsz=128, num_updates=13419, lr=9.99006e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=154368 2021-06-20 13:31:45 | INFO | train_inner | epoch 005: 1491 / 3002 loss=2.452, ppl=5.47, wps=5746, ups=0.09, wpb=64846, bsz=128, num_updates=13420, lr=9.99006e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=154379 2021-06-20 13:31:56 | INFO | train_inner | epoch 005: 1492 / 3002 loss=2.542, ppl=5.82, wps=5905.5, ups=0.09, wpb=64828, bsz=128, num_updates=13421, lr=9.99006e-05, gnorm=1.944, loss_scale=8, train_wall=10, gb_free=2.8, wall=154390 2021-06-20 13:32:07 | INFO | train_inner | epoch 005: 1493 / 3002 loss=2.571, ppl=5.94, wps=5906.2, ups=0.09, wpb=64831, bsz=128, num_updates=13422, lr=9.99006e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=154401 2021-06-20 13:32:18 | INFO | train_inner | epoch 005: 1494 / 3002 loss=2.478, ppl=5.57, wps=5778.1, ups=0.09, wpb=64872, bsz=128, num_updates=13423, lr=9.99006e-05, gnorm=1.897, loss_scale=8, train_wall=11, gb_free=2.8, wall=154412 2021-06-20 13:32:29 | INFO | train_inner | epoch 005: 1495 / 3002 loss=2.297, ppl=4.92, wps=5875.9, ups=0.09, wpb=64847, bsz=128, num_updates=13424, lr=9.99006e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=154423 2021-06-20 13:32:40 | INFO | train_inner | epoch 005: 1496 / 3002 loss=2.555, ppl=5.88, wps=5790.1, ups=0.09, wpb=64831, bsz=128, num_updates=13425, lr=9.99006e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=154435 2021-06-20 13:32:51 | INFO | train_inner | epoch 005: 1497 / 3002 loss=2.467, ppl=5.53, wps=5804.2, ups=0.09, wpb=64899, bsz=128, num_updates=13426, lr=9.99006e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=154446 2021-06-20 13:33:03 | INFO | train_inner | epoch 005: 1498 / 3002 loss=2.444, ppl=5.44, wps=5723.7, ups=0.09, wpb=64878, bsz=128, num_updates=13427, lr=9.99006e-05, gnorm=1.898, loss_scale=8, train_wall=11, gb_free=2.8, wall=154457 2021-06-20 13:33:14 | INFO | train_inner | epoch 005: 1499 / 3002 loss=2.456, ppl=5.49, wps=5786.4, ups=0.09, wpb=64804, bsz=128, num_updates=13428, lr=9.99006e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=154468 2021-06-20 13:33:25 | INFO | train_inner | epoch 005: 1500 / 3002 loss=2.458, ppl=5.5, wps=5869.9, ups=0.09, wpb=64818, bsz=128, num_updates=13429, lr=9.99006e-05, gnorm=2.025, loss_scale=8, train_wall=11, gb_free=2.8, wall=154479 2021-06-20 13:33:36 | INFO | train_inner | epoch 005: 1501 / 3002 loss=2.459, ppl=5.5, wps=5869.6, ups=0.09, wpb=64874, bsz=128, num_updates=13430, lr=9.99006e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=154490 2021-06-20 13:33:47 | INFO | train_inner | epoch 005: 1502 / 3002 loss=2.467, ppl=5.53, wps=5783, ups=0.09, wpb=64796, bsz=128, num_updates=13431, lr=9.99005e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=154502 2021-06-20 13:33:59 | INFO | train_inner | epoch 005: 1503 / 3002 loss=2.651, ppl=6.28, wps=5791.7, ups=0.09, wpb=64859, bsz=128, num_updates=13432, lr=9.99005e-05, gnorm=2.023, loss_scale=8, train_wall=11, gb_free=2.8, wall=154513 2021-06-20 13:34:10 | INFO | train_inner | epoch 005: 1504 / 3002 loss=2.379, ppl=5.2, wps=5785.6, ups=0.09, wpb=64834, bsz=128, num_updates=13433, lr=9.99005e-05, gnorm=2.243, loss_scale=8, train_wall=11, gb_free=2.8, wall=154524 2021-06-20 13:34:21 | INFO | train_inner | epoch 005: 1505 / 3002 loss=2.459, ppl=5.5, wps=5771.2, ups=0.09, wpb=64848, bsz=128, num_updates=13434, lr=9.99005e-05, gnorm=1.985, loss_scale=8, train_wall=11, gb_free=2.8, wall=154535 2021-06-20 13:34:32 | INFO | train_inner | epoch 005: 1506 / 3002 loss=2.544, ppl=5.83, wps=5835.7, ups=0.09, wpb=64824, bsz=128, num_updates=13435, lr=9.99005e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=154546 2021-06-20 13:34:43 | INFO | train_inner | epoch 005: 1507 / 3002 loss=2.635, ppl=6.21, wps=5901.4, ups=0.09, wpb=64855, bsz=128, num_updates=13436, lr=9.99005e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=154557 2021-06-20 13:34:54 | INFO | train_inner | epoch 005: 1508 / 3002 loss=2.682, ppl=6.42, wps=5813.4, ups=0.09, wpb=64826, bsz=128, num_updates=13437, lr=9.99005e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=154569 2021-06-20 13:35:05 | INFO | train_inner | epoch 005: 1509 / 3002 loss=2.387, ppl=5.23, wps=5842.3, ups=0.09, wpb=64803, bsz=128, num_updates=13438, lr=9.99005e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=154580 2021-06-20 13:35:17 | INFO | train_inner | epoch 005: 1510 / 3002 loss=2.557, ppl=5.88, wps=5781.3, ups=0.09, wpb=64882, bsz=128, num_updates=13439, lr=9.99005e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=154591 2021-06-20 13:35:28 | INFO | train_inner | epoch 005: 1511 / 3002 loss=2.387, ppl=5.23, wps=5817.9, ups=0.09, wpb=64751, bsz=128, num_updates=13440, lr=9.99005e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=154602 2021-06-20 13:35:39 | INFO | train_inner | epoch 005: 1512 / 3002 loss=2.379, ppl=5.2, wps=5904.1, ups=0.09, wpb=64818, bsz=128, num_updates=13441, lr=9.99005e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=154613 2021-06-20 13:35:50 | INFO | train_inner | epoch 005: 1513 / 3002 loss=2.392, ppl=5.25, wps=5829.1, ups=0.09, wpb=64926, bsz=128, num_updates=13442, lr=9.99005e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=154624 2021-06-20 13:36:01 | INFO | train_inner | epoch 005: 1514 / 3002 loss=2.444, ppl=5.44, wps=5823, ups=0.09, wpb=64815, bsz=128, num_updates=13443, lr=9.99004e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=154635 2021-06-20 13:36:12 | INFO | train_inner | epoch 005: 1515 / 3002 loss=2.45, ppl=5.46, wps=5861.4, ups=0.09, wpb=64865, bsz=128, num_updates=13444, lr=9.99004e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=154646 2021-06-20 13:36:23 | INFO | train_inner | epoch 005: 1516 / 3002 loss=2.592, ppl=6.03, wps=5763.5, ups=0.09, wpb=64872, bsz=128, num_updates=13445, lr=9.99004e-05, gnorm=2.015, loss_scale=8, train_wall=11, gb_free=2.8, wall=154658 2021-06-20 13:36:34 | INFO | train_inner | epoch 005: 1517 / 3002 loss=2.514, ppl=5.71, wps=5781, ups=0.09, wpb=64818, bsz=128, num_updates=13446, lr=9.99004e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=154669 2021-06-20 13:36:46 | INFO | train_inner | epoch 005: 1518 / 3002 loss=2.403, ppl=5.29, wps=5754.2, ups=0.09, wpb=64807, bsz=128, num_updates=13447, lr=9.99004e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=154680 2021-06-20 13:36:57 | INFO | train_inner | epoch 005: 1519 / 3002 loss=2.604, ppl=6.08, wps=5748.3, ups=0.09, wpb=64825, bsz=128, num_updates=13448, lr=9.99004e-05, gnorm=2.008, loss_scale=8, train_wall=11, gb_free=2.8, wall=154691 2021-06-20 13:37:08 | INFO | train_inner | epoch 005: 1520 / 3002 loss=2.581, ppl=5.99, wps=5831.9, ups=0.09, wpb=64826, bsz=128, num_updates=13449, lr=9.99004e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=154702 2021-06-20 13:37:19 | INFO | train_inner | epoch 005: 1521 / 3002 loss=2.466, ppl=5.52, wps=5822.8, ups=0.09, wpb=64917, bsz=128, num_updates=13450, lr=9.99004e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=154714 2021-06-20 13:37:30 | INFO | train_inner | epoch 005: 1522 / 3002 loss=2.47, ppl=5.54, wps=5788.9, ups=0.09, wpb=64796, bsz=128, num_updates=13451, lr=9.99004e-05, gnorm=2.011, loss_scale=8, train_wall=11, gb_free=2.8, wall=154725 2021-06-20 13:37:42 | INFO | train_inner | epoch 005: 1523 / 3002 loss=2.579, ppl=5.97, wps=5808.3, ups=0.09, wpb=64833, bsz=128, num_updates=13452, lr=9.99004e-05, gnorm=2.026, loss_scale=8, train_wall=11, gb_free=2.8, wall=154736 2021-06-20 13:37:53 | INFO | train_inner | epoch 005: 1524 / 3002 loss=2.503, ppl=5.67, wps=5913.3, ups=0.09, wpb=64887, bsz=128, num_updates=13453, lr=9.99004e-05, gnorm=2.109, loss_scale=8, train_wall=11, gb_free=2.8, wall=154747 2021-06-20 13:38:04 | INFO | train_inner | epoch 005: 1525 / 3002 loss=2.694, ppl=6.47, wps=5920.1, ups=0.09, wpb=64905, bsz=128, num_updates=13454, lr=9.99004e-05, gnorm=2.051, loss_scale=8, train_wall=10, gb_free=2.8, wall=154758 2021-06-20 13:38:15 | INFO | train_inner | epoch 005: 1526 / 3002 loss=2.465, ppl=5.52, wps=5816.6, ups=0.09, wpb=64856, bsz=128, num_updates=13455, lr=9.99004e-05, gnorm=1.857, loss_scale=8, train_wall=11, gb_free=2.8, wall=154769 2021-06-20 13:38:26 | INFO | train_inner | epoch 005: 1527 / 3002 loss=2.355, ppl=5.12, wps=5736.4, ups=0.09, wpb=64860, bsz=128, num_updates=13456, lr=9.99003e-05, gnorm=1.868, loss_scale=8, train_wall=11, gb_free=2.8, wall=154780 2021-06-20 13:38:37 | INFO | train_inner | epoch 005: 1528 / 3002 loss=2.51, ppl=5.7, wps=5740.1, ups=0.09, wpb=64854, bsz=128, num_updates=13457, lr=9.99003e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=154792 2021-06-20 13:38:48 | INFO | train_inner | epoch 005: 1529 / 3002 loss=2.591, ppl=6.03, wps=5821, ups=0.09, wpb=64790, bsz=128, num_updates=13458, lr=9.99003e-05, gnorm=2.247, loss_scale=8, train_wall=11, gb_free=2.8, wall=154803 2021-06-20 13:38:59 | INFO | train_inner | epoch 005: 1530 / 3002 loss=2.433, ppl=5.4, wps=5942.2, ups=0.09, wpb=64843, bsz=128, num_updates=13459, lr=9.99003e-05, gnorm=1.973, loss_scale=8, train_wall=10, gb_free=2.8, wall=154814 2021-06-20 13:39:11 | INFO | train_inner | epoch 005: 1531 / 3002 loss=2.499, ppl=5.65, wps=5801.3, ups=0.09, wpb=64861, bsz=128, num_updates=13460, lr=9.99003e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=154825 2021-06-20 13:39:21 | INFO | train_inner | epoch 005: 1532 / 3002 loss=2.538, ppl=5.81, wps=5962.9, ups=0.09, wpb=64865, bsz=128, num_updates=13461, lr=9.99003e-05, gnorm=1.966, loss_scale=8, train_wall=10, gb_free=2.8, wall=154836 2021-06-20 13:39:32 | INFO | train_inner | epoch 005: 1533 / 3002 loss=2.6, ppl=6.06, wps=5907, ups=0.09, wpb=64877, bsz=128, num_updates=13462, lr=9.99003e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=154847 2021-06-20 13:39:43 | INFO | train_inner | epoch 005: 1534 / 3002 loss=2.426, ppl=5.37, wps=5942.3, ups=0.09, wpb=64916, bsz=128, num_updates=13463, lr=9.99003e-05, gnorm=2.007, loss_scale=8, train_wall=10, gb_free=2.8, wall=154858 2021-06-20 13:39:54 | INFO | train_inner | epoch 005: 1535 / 3002 loss=2.585, ppl=6, wps=5884.9, ups=0.09, wpb=64820, bsz=128, num_updates=13464, lr=9.99003e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=154869 2021-06-20 13:40:05 | INFO | train_inner | epoch 005: 1536 / 3002 loss=2.59, ppl=6.02, wps=5803.1, ups=0.09, wpb=64785, bsz=128, num_updates=13465, lr=9.99003e-05, gnorm=2.111, loss_scale=8, train_wall=11, gb_free=2.8, wall=154880 2021-06-20 13:40:17 | INFO | train_inner | epoch 005: 1537 / 3002 loss=2.574, ppl=5.95, wps=5849.9, ups=0.09, wpb=64810, bsz=128, num_updates=13466, lr=9.99003e-05, gnorm=2.069, loss_scale=8, train_wall=11, gb_free=2.8, wall=154891 2021-06-20 13:40:28 | INFO | train_inner | epoch 005: 1538 / 3002 loss=2.649, ppl=6.27, wps=5813.8, ups=0.09, wpb=64842, bsz=128, num_updates=13467, lr=9.99003e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=154902 2021-06-20 13:40:39 | INFO | train_inner | epoch 005: 1539 / 3002 loss=2.589, ppl=6.01, wps=5905, ups=0.09, wpb=64781, bsz=128, num_updates=13468, lr=9.99002e-05, gnorm=2.028, loss_scale=8, train_wall=10, gb_free=2.8, wall=154913 2021-06-20 13:40:50 | INFO | train_inner | epoch 005: 1540 / 3002 loss=2.637, ppl=6.22, wps=5902.1, ups=0.09, wpb=64826, bsz=128, num_updates=13469, lr=9.99002e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=154924 2021-06-20 13:41:01 | INFO | train_inner | epoch 005: 1541 / 3002 loss=2.507, ppl=5.68, wps=5929.5, ups=0.09, wpb=64871, bsz=128, num_updates=13470, lr=9.99002e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=154935 2021-06-20 13:41:12 | INFO | train_inner | epoch 005: 1542 / 3002 loss=2.643, ppl=6.25, wps=5858.4, ups=0.09, wpb=64911, bsz=128, num_updates=13471, lr=9.99002e-05, gnorm=2.052, loss_scale=8, train_wall=11, gb_free=2.8, wall=154946 2021-06-20 13:41:23 | INFO | train_inner | epoch 005: 1543 / 3002 loss=2.457, ppl=5.49, wps=5795.5, ups=0.09, wpb=64749, bsz=128, num_updates=13472, lr=9.99002e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=154957 2021-06-20 13:41:34 | INFO | train_inner | epoch 005: 1544 / 3002 loss=2.464, ppl=5.52, wps=5975.8, ups=0.09, wpb=64856, bsz=128, num_updates=13473, lr=9.99002e-05, gnorm=2.022, loss_scale=8, train_wall=10, gb_free=2.8, wall=154968 2021-06-20 13:41:45 | INFO | train_inner | epoch 005: 1545 / 3002 loss=2.446, ppl=5.45, wps=5761.9, ups=0.09, wpb=64884, bsz=128, num_updates=13474, lr=9.99002e-05, gnorm=1.924, loss_scale=8, train_wall=11, gb_free=2.8, wall=154979 2021-06-20 13:41:56 | INFO | train_inner | epoch 005: 1546 / 3002 loss=2.497, ppl=5.64, wps=5862.8, ups=0.09, wpb=64785, bsz=128, num_updates=13475, lr=9.99002e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=154990 2021-06-20 13:42:07 | INFO | train_inner | epoch 005: 1547 / 3002 loss=2.491, ppl=5.62, wps=5876.9, ups=0.09, wpb=64820, bsz=128, num_updates=13476, lr=9.99002e-05, gnorm=2.029, loss_scale=8, train_wall=11, gb_free=2.8, wall=155001 2021-06-20 13:42:18 | INFO | train_inner | epoch 005: 1548 / 3002 loss=2.64, ppl=6.23, wps=5889.5, ups=0.09, wpb=64844, bsz=128, num_updates=13477, lr=9.99002e-05, gnorm=2.067, loss_scale=8, train_wall=11, gb_free=2.8, wall=155012 2021-06-20 13:42:29 | INFO | train_inner | epoch 005: 1549 / 3002 loss=2.407, ppl=5.3, wps=5855.5, ups=0.09, wpb=64804, bsz=128, num_updates=13478, lr=9.99002e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=155023 2021-06-20 13:42:40 | INFO | train_inner | epoch 005: 1550 / 3002 loss=2.611, ppl=6.11, wps=5909.1, ups=0.09, wpb=64841, bsz=128, num_updates=13479, lr=9.99002e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=155034 2021-06-20 13:42:51 | INFO | train_inner | epoch 005: 1551 / 3002 loss=2.433, ppl=5.4, wps=5917, ups=0.09, wpb=64794, bsz=128, num_updates=13480, lr=9.99002e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=155045 2021-06-20 13:43:02 | INFO | train_inner | epoch 005: 1552 / 3002 loss=2.432, ppl=5.4, wps=5830.9, ups=0.09, wpb=64826, bsz=128, num_updates=13481, lr=9.99001e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=155057 2021-06-20 13:43:13 | INFO | train_inner | epoch 005: 1553 / 3002 loss=2.485, ppl=5.6, wps=5853.1, ups=0.09, wpb=64902, bsz=128, num_updates=13482, lr=9.99001e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=155068 2021-06-20 13:43:24 | INFO | train_inner | epoch 005: 1554 / 3002 loss=2.6, ppl=6.06, wps=5812.8, ups=0.09, wpb=64741, bsz=128, num_updates=13483, lr=9.99001e-05, gnorm=1.975, loss_scale=16, train_wall=11, gb_free=2.8, wall=155079 2021-06-20 13:43:35 | INFO | train_inner | epoch 005: 1555 / 3002 loss=2.561, ppl=5.9, wps=5929.9, ups=0.09, wpb=64881, bsz=128, num_updates=13484, lr=9.99001e-05, gnorm=1.967, loss_scale=16, train_wall=10, gb_free=2.8, wall=155090 2021-06-20 13:43:46 | INFO | train_inner | epoch 005: 1556 / 3002 loss=2.384, ppl=5.22, wps=5950.5, ups=0.09, wpb=64916, bsz=128, num_updates=13485, lr=9.99001e-05, gnorm=1.993, loss_scale=16, train_wall=10, gb_free=2.8, wall=155101 2021-06-20 13:43:57 | INFO | train_inner | epoch 005: 1557 / 3002 loss=2.663, ppl=6.33, wps=5795.1, ups=0.09, wpb=64825, bsz=128, num_updates=13486, lr=9.99001e-05, gnorm=1.97, loss_scale=16, train_wall=11, gb_free=2.8, wall=155112 2021-06-20 13:44:08 | INFO | train_inner | epoch 005: 1558 / 3002 loss=2.561, ppl=5.9, wps=5972.7, ups=0.09, wpb=64895, bsz=128, num_updates=13487, lr=9.99001e-05, gnorm=1.972, loss_scale=16, train_wall=10, gb_free=2.8, wall=155123 2021-06-20 13:44:19 | INFO | train_inner | epoch 005: 1559 / 3002 loss=2.384, ppl=5.22, wps=5959.5, ups=0.09, wpb=64854, bsz=128, num_updates=13488, lr=9.99001e-05, gnorm=2.143, loss_scale=16, train_wall=10, gb_free=2.8, wall=155134 2021-06-20 13:44:30 | INFO | train_inner | epoch 005: 1560 / 3002 loss=2.426, ppl=5.37, wps=5824.7, ups=0.09, wpb=64814, bsz=128, num_updates=13489, lr=9.99001e-05, gnorm=2, loss_scale=16, train_wall=11, gb_free=2.8, wall=155145 2021-06-20 13:44:42 | INFO | train_inner | epoch 005: 1561 / 3002 loss=2.666, ppl=6.35, wps=5709.3, ups=0.09, wpb=64774, bsz=128, num_updates=13490, lr=9.99001e-05, gnorm=1.909, loss_scale=16, train_wall=11, gb_free=2.8, wall=155156 2021-06-20 13:44:53 | INFO | train_inner | epoch 005: 1562 / 3002 loss=2.597, ppl=6.05, wps=5746.1, ups=0.09, wpb=64841, bsz=128, num_updates=13491, lr=9.99001e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=155167 2021-06-20 13:45:04 | INFO | train_inner | epoch 005: 1563 / 3002 loss=2.568, ppl=5.93, wps=5922.5, ups=0.09, wpb=64918, bsz=128, num_updates=13492, lr=9.99001e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=155178 2021-06-20 13:45:15 | INFO | train_inner | epoch 005: 1564 / 3002 loss=2.519, ppl=5.73, wps=5945.3, ups=0.09, wpb=64599, bsz=128, num_updates=13493, lr=9.99e-05, gnorm=1.999, loss_scale=16, train_wall=10, gb_free=2.8, wall=155189 2021-06-20 13:45:26 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 13:45:37 | INFO | train_inner | epoch 005: 1566 / 3002 loss=2.557, ppl=5.88, wps=2926.5, ups=0.05, wpb=64846, bsz=128, num_updates=13494, lr=9.99e-05, gnorm=1.995, loss_scale=8, train_wall=21, gb_free=2.8, wall=155211 2021-06-20 13:45:48 | INFO | train_inner | epoch 005: 1567 / 3002 loss=2.556, ppl=5.88, wps=5822.3, ups=0.09, wpb=64906, bsz=128, num_updates=13495, lr=9.99e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=155222 2021-06-20 13:45:59 | INFO | train_inner | epoch 005: 1568 / 3002 loss=2.503, ppl=5.67, wps=5814.7, ups=0.09, wpb=64847, bsz=128, num_updates=13496, lr=9.99e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=155234 2021-06-20 13:46:10 | INFO | train_inner | epoch 005: 1569 / 3002 loss=2.429, ppl=5.39, wps=5912.8, ups=0.09, wpb=64893, bsz=128, num_updates=13497, lr=9.99e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=155245 2021-06-20 13:46:21 | INFO | train_inner | epoch 005: 1570 / 3002 loss=2.259, ppl=4.79, wps=5980.5, ups=0.09, wpb=64804, bsz=128, num_updates=13498, lr=9.99e-05, gnorm=2.02, loss_scale=8, train_wall=10, gb_free=2.8, wall=155255 2021-06-20 13:46:32 | INFO | train_inner | epoch 005: 1571 / 3002 loss=2.584, ppl=6, wps=5840.6, ups=0.09, wpb=64832, bsz=128, num_updates=13499, lr=9.99e-05, gnorm=2.066, loss_scale=8, train_wall=11, gb_free=2.8, wall=155266 2021-06-20 13:46:43 | INFO | train_inner | epoch 005: 1572 / 3002 loss=2.707, ppl=6.53, wps=5763.3, ups=0.09, wpb=64828, bsz=128, num_updates=13500, lr=9.99e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=155278 2021-06-20 13:46:55 | INFO | train_inner | epoch 005: 1573 / 3002 loss=2.595, ppl=6.04, wps=5766.9, ups=0.09, wpb=64867, bsz=128, num_updates=13501, lr=9.99e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=155289 2021-06-20 13:47:06 | INFO | train_inner | epoch 005: 1574 / 3002 loss=2.483, ppl=5.59, wps=5730.9, ups=0.09, wpb=64765, bsz=128, num_updates=13502, lr=9.99e-05, gnorm=2.056, loss_scale=8, train_wall=11, gb_free=2.8, wall=155300 2021-06-20 13:47:17 | INFO | train_inner | epoch 005: 1575 / 3002 loss=2.461, ppl=5.51, wps=5993.8, ups=0.09, wpb=64889, bsz=128, num_updates=13503, lr=9.99e-05, gnorm=1.914, loss_scale=8, train_wall=10, gb_free=2.8, wall=155311 2021-06-20 13:47:28 | INFO | train_inner | epoch 005: 1576 / 3002 loss=2.534, ppl=5.79, wps=5719.6, ups=0.09, wpb=64840, bsz=128, num_updates=13504, lr=9.99e-05, gnorm=1.872, loss_scale=8, train_wall=11, gb_free=2.8, wall=155322 2021-06-20 13:47:39 | INFO | train_inner | epoch 005: 1577 / 3002 loss=2.46, ppl=5.5, wps=5831.2, ups=0.09, wpb=64902, bsz=128, num_updates=13505, lr=9.99e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=155334 2021-06-20 13:47:50 | INFO | train_inner | epoch 005: 1578 / 3002 loss=2.656, ppl=6.3, wps=5845.1, ups=0.09, wpb=64826, bsz=128, num_updates=13506, lr=9.98999e-05, gnorm=2.061, loss_scale=8, train_wall=11, gb_free=2.8, wall=155345 2021-06-20 13:48:02 | INFO | train_inner | epoch 005: 1579 / 3002 loss=2.41, ppl=5.31, wps=5787.5, ups=0.09, wpb=64804, bsz=128, num_updates=13507, lr=9.98999e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=155356 2021-06-20 13:48:13 | INFO | train_inner | epoch 005: 1580 / 3002 loss=2.729, ppl=6.63, wps=5683.5, ups=0.09, wpb=64795, bsz=128, num_updates=13508, lr=9.98999e-05, gnorm=2.015, loss_scale=8, train_wall=11, gb_free=2.8, wall=155367 2021-06-20 13:48:24 | INFO | train_inner | epoch 005: 1581 / 3002 loss=2.461, ppl=5.5, wps=5932.7, ups=0.09, wpb=64854, bsz=128, num_updates=13509, lr=9.98999e-05, gnorm=1.989, loss_scale=8, train_wall=10, gb_free=2.8, wall=155378 2021-06-20 13:48:35 | INFO | train_inner | epoch 005: 1582 / 3002 loss=2.352, ppl=5.1, wps=5906.3, ups=0.09, wpb=64834, bsz=128, num_updates=13510, lr=9.98999e-05, gnorm=2.177, loss_scale=8, train_wall=10, gb_free=2.8, wall=155389 2021-06-20 13:48:46 | INFO | train_inner | epoch 005: 1583 / 3002 loss=2.531, ppl=5.78, wps=5763.8, ups=0.09, wpb=64810, bsz=128, num_updates=13511, lr=9.98999e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=155400 2021-06-20 13:48:57 | INFO | train_inner | epoch 005: 1584 / 3002 loss=2.439, ppl=5.42, wps=5859.9, ups=0.09, wpb=64820, bsz=128, num_updates=13512, lr=9.98999e-05, gnorm=1.885, loss_scale=8, train_wall=11, gb_free=2.8, wall=155411 2021-06-20 13:49:08 | INFO | train_inner | epoch 005: 1585 / 3002 loss=2.395, ppl=5.26, wps=5766.7, ups=0.09, wpb=64765, bsz=128, num_updates=13513, lr=9.98999e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=155423 2021-06-20 13:49:20 | INFO | train_inner | epoch 005: 1586 / 3002 loss=2.616, ppl=6.13, wps=5824, ups=0.09, wpb=64824, bsz=128, num_updates=13514, lr=9.98999e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=155434 2021-06-20 13:49:31 | INFO | train_inner | epoch 005: 1587 / 3002 loss=2.58, ppl=5.98, wps=5821.8, ups=0.09, wpb=64811, bsz=128, num_updates=13515, lr=9.98999e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=155445 2021-06-20 13:49:42 | INFO | train_inner | epoch 005: 1588 / 3002 loss=2.519, ppl=5.73, wps=5853.5, ups=0.09, wpb=64756, bsz=128, num_updates=13516, lr=9.98999e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=155456 2021-06-20 13:49:53 | INFO | train_inner | epoch 005: 1589 / 3002 loss=2.407, ppl=5.3, wps=5780.2, ups=0.09, wpb=64868, bsz=128, num_updates=13517, lr=9.98999e-05, gnorm=1.896, loss_scale=8, train_wall=11, gb_free=2.8, wall=155467 2021-06-20 13:50:04 | INFO | train_inner | epoch 005: 1590 / 3002 loss=2.57, ppl=5.94, wps=5782.3, ups=0.09, wpb=64704, bsz=128, num_updates=13518, lr=9.98998e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=155478 2021-06-20 13:50:15 | INFO | train_inner | epoch 005: 1591 / 3002 loss=2.586, ppl=6, wps=5729.8, ups=0.09, wpb=64781, bsz=128, num_updates=13519, lr=9.98998e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=155490 2021-06-20 13:50:26 | INFO | train_inner | epoch 005: 1592 / 3002 loss=2.503, ppl=5.67, wps=5892.8, ups=0.09, wpb=64847, bsz=128, num_updates=13520, lr=9.98998e-05, gnorm=2.092, loss_scale=8, train_wall=11, gb_free=2.8, wall=155501 2021-06-20 13:50:37 | INFO | train_inner | epoch 005: 1593 / 3002 loss=2.53, ppl=5.78, wps=5922.4, ups=0.09, wpb=64859, bsz=128, num_updates=13521, lr=9.98998e-05, gnorm=2.087, loss_scale=8, train_wall=10, gb_free=2.8, wall=155512 2021-06-20 13:50:48 | INFO | train_inner | epoch 005: 1594 / 3002 loss=2.429, ppl=5.38, wps=5942, ups=0.09, wpb=64889, bsz=128, num_updates=13522, lr=9.98998e-05, gnorm=1.892, loss_scale=8, train_wall=10, gb_free=2.8, wall=155523 2021-06-20 13:50:59 | INFO | train_inner | epoch 005: 1595 / 3002 loss=2.414, ppl=5.33, wps=5888.1, ups=0.09, wpb=64733, bsz=128, num_updates=13523, lr=9.98998e-05, gnorm=2.023, loss_scale=8, train_wall=11, gb_free=2.8, wall=155534 2021-06-20 13:51:10 | INFO | train_inner | epoch 005: 1596 / 3002 loss=2.498, ppl=5.65, wps=5838.3, ups=0.09, wpb=64818, bsz=128, num_updates=13524, lr=9.98998e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=155545 2021-06-20 13:51:21 | INFO | train_inner | epoch 005: 1597 / 3002 loss=2.407, ppl=5.31, wps=6035.5, ups=0.09, wpb=64899, bsz=128, num_updates=13525, lr=9.98998e-05, gnorm=1.933, loss_scale=8, train_wall=10, gb_free=2.8, wall=155555 2021-06-20 13:51:32 | INFO | train_inner | epoch 005: 1598 / 3002 loss=2.511, ppl=5.7, wps=5879, ups=0.09, wpb=64853, bsz=128, num_updates=13526, lr=9.98998e-05, gnorm=2.221, loss_scale=8, train_wall=11, gb_free=2.8, wall=155567 2021-06-20 13:51:43 | INFO | train_inner | epoch 005: 1599 / 3002 loss=2.426, ppl=5.37, wps=5928, ups=0.09, wpb=64762, bsz=128, num_updates=13527, lr=9.98998e-05, gnorm=1.962, loss_scale=8, train_wall=10, gb_free=2.8, wall=155577 2021-06-20 13:51:54 | INFO | train_inner | epoch 005: 1600 / 3002 loss=2.365, ppl=5.15, wps=5797.3, ups=0.09, wpb=64807, bsz=128, num_updates=13528, lr=9.98998e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=155589 2021-06-20 13:52:05 | INFO | train_inner | epoch 005: 1601 / 3002 loss=2.651, ppl=6.28, wps=5822.8, ups=0.09, wpb=64832, bsz=128, num_updates=13529, lr=9.98998e-05, gnorm=2.049, loss_scale=8, train_wall=11, gb_free=2.8, wall=155600 2021-06-20 13:52:17 | INFO | train_inner | epoch 005: 1602 / 3002 loss=2.672, ppl=6.38, wps=5755.7, ups=0.09, wpb=64853, bsz=128, num_updates=13530, lr=9.98998e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=155611 2021-06-20 13:52:28 | INFO | train_inner | epoch 005: 1603 / 3002 loss=2.531, ppl=5.78, wps=5748.6, ups=0.09, wpb=64822, bsz=128, num_updates=13531, lr=9.98997e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=155622 2021-06-20 13:52:39 | INFO | train_inner | epoch 005: 1604 / 3002 loss=2.598, ppl=6.05, wps=5757.1, ups=0.09, wpb=64842, bsz=128, num_updates=13532, lr=9.98997e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=155634 2021-06-20 13:52:50 | INFO | train_inner | epoch 005: 1605 / 3002 loss=2.302, ppl=4.93, wps=5990.6, ups=0.09, wpb=64877, bsz=128, num_updates=13533, lr=9.98997e-05, gnorm=1.852, loss_scale=8, train_wall=10, gb_free=2.8, wall=155644 2021-06-20 13:53:01 | INFO | train_inner | epoch 005: 1606 / 3002 loss=2.528, ppl=5.77, wps=5948.8, ups=0.09, wpb=64820, bsz=128, num_updates=13534, lr=9.98997e-05, gnorm=1.915, loss_scale=8, train_wall=10, gb_free=2.8, wall=155655 2021-06-20 13:53:12 | INFO | train_inner | epoch 005: 1607 / 3002 loss=2.576, ppl=5.96, wps=5853.6, ups=0.09, wpb=64851, bsz=128, num_updates=13535, lr=9.98997e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=155666 2021-06-20 13:53:23 | INFO | train_inner | epoch 005: 1608 / 3002 loss=2.467, ppl=5.53, wps=5801.4, ups=0.09, wpb=64824, bsz=128, num_updates=13536, lr=9.98997e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=155678 2021-06-20 13:53:34 | INFO | train_inner | epoch 005: 1609 / 3002 loss=2.419, ppl=5.35, wps=5826.8, ups=0.09, wpb=64837, bsz=128, num_updates=13537, lr=9.98997e-05, gnorm=1.923, loss_scale=8, train_wall=11, gb_free=2.8, wall=155689 2021-06-20 13:53:46 | INFO | train_inner | epoch 005: 1610 / 3002 loss=2.659, ppl=6.31, wps=5684.3, ups=0.09, wpb=64792, bsz=128, num_updates=13538, lr=9.98997e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=155700 2021-06-20 13:53:57 | INFO | train_inner | epoch 005: 1611 / 3002 loss=2.474, ppl=5.55, wps=5803.3, ups=0.09, wpb=64746, bsz=128, num_updates=13539, lr=9.98997e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=155711 2021-06-20 13:54:08 | INFO | train_inner | epoch 005: 1612 / 3002 loss=2.319, ppl=4.99, wps=5803.9, ups=0.09, wpb=64794, bsz=128, num_updates=13540, lr=9.98997e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=155722 2021-06-20 13:54:19 | INFO | train_inner | epoch 005: 1613 / 3002 loss=2.558, ppl=5.89, wps=5763, ups=0.09, wpb=64820, bsz=128, num_updates=13541, lr=9.98997e-05, gnorm=2.457, loss_scale=8, train_wall=11, gb_free=2.8, wall=155734 2021-06-20 13:54:30 | INFO | train_inner | epoch 005: 1614 / 3002 loss=2.643, ppl=6.25, wps=5810.8, ups=0.09, wpb=64759, bsz=128, num_updates=13542, lr=9.98997e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=155745 2021-06-20 13:54:42 | INFO | train_inner | epoch 005: 1615 / 3002 loss=2.53, ppl=5.78, wps=5764.8, ups=0.09, wpb=64812, bsz=128, num_updates=13543, lr=9.98996e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=155756 2021-06-20 13:54:53 | INFO | train_inner | epoch 005: 1616 / 3002 loss=2.505, ppl=5.67, wps=5996.3, ups=0.09, wpb=64910, bsz=128, num_updates=13544, lr=9.98996e-05, gnorm=1.944, loss_scale=8, train_wall=10, gb_free=2.8, wall=155767 2021-06-20 13:55:04 | INFO | train_inner | epoch 005: 1617 / 3002 loss=2.46, ppl=5.5, wps=5786.7, ups=0.09, wpb=64779, bsz=128, num_updates=13545, lr=9.98996e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=155778 2021-06-20 13:55:15 | INFO | train_inner | epoch 005: 1618 / 3002 loss=2.482, ppl=5.59, wps=5761.1, ups=0.09, wpb=64852, bsz=128, num_updates=13546, lr=9.98996e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=155789 2021-06-20 13:55:26 | INFO | train_inner | epoch 005: 1619 / 3002 loss=2.422, ppl=5.36, wps=5707.9, ups=0.09, wpb=64864, bsz=128, num_updates=13547, lr=9.98996e-05, gnorm=2.042, loss_scale=8, train_wall=11, gb_free=2.8, wall=155801 2021-06-20 13:55:38 | INFO | train_inner | epoch 005: 1620 / 3002 loss=2.501, ppl=5.66, wps=5750.7, ups=0.09, wpb=64775, bsz=128, num_updates=13548, lr=9.98996e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=155812 2021-06-20 13:55:49 | INFO | train_inner | epoch 005: 1621 / 3002 loss=2.585, ppl=6, wps=5855, ups=0.09, wpb=64835, bsz=128, num_updates=13549, lr=9.98996e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=155823 2021-06-20 13:56:00 | INFO | train_inner | epoch 005: 1622 / 3002 loss=2.547, ppl=5.85, wps=5882.4, ups=0.09, wpb=64794, bsz=128, num_updates=13550, lr=9.98996e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=155834 2021-06-20 13:56:11 | INFO | train_inner | epoch 005: 1623 / 3002 loss=2.488, ppl=5.61, wps=5816.2, ups=0.09, wpb=64818, bsz=128, num_updates=13551, lr=9.98996e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=155845 2021-06-20 13:56:22 | INFO | train_inner | epoch 005: 1624 / 3002 loss=2.54, ppl=5.82, wps=5860, ups=0.09, wpb=64774, bsz=128, num_updates=13552, lr=9.98996e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=155856 2021-06-20 13:56:33 | INFO | train_inner | epoch 005: 1625 / 3002 loss=2.56, ppl=5.9, wps=5750.1, ups=0.09, wpb=64811, bsz=128, num_updates=13553, lr=9.98996e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=155867 2021-06-20 13:56:44 | INFO | train_inner | epoch 005: 1626 / 3002 loss=2.647, ppl=6.26, wps=5890.5, ups=0.09, wpb=64807, bsz=128, num_updates=13554, lr=9.98996e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=155878 2021-06-20 13:56:55 | INFO | train_inner | epoch 005: 1627 / 3002 loss=2.591, ppl=6.03, wps=5756.1, ups=0.09, wpb=64791, bsz=128, num_updates=13555, lr=9.98996e-05, gnorm=4.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=155890 2021-06-20 13:57:07 | INFO | train_inner | epoch 005: 1628 / 3002 loss=2.518, ppl=5.73, wps=5773.2, ups=0.09, wpb=64828, bsz=128, num_updates=13556, lr=9.98995e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=155901 2021-06-20 13:57:18 | INFO | train_inner | epoch 005: 1629 / 3002 loss=2.612, ppl=6.11, wps=5865.9, ups=0.09, wpb=64910, bsz=128, num_updates=13557, lr=9.98995e-05, gnorm=2.075, loss_scale=8, train_wall=11, gb_free=2.8, wall=155912 2021-06-20 13:57:29 | INFO | train_inner | epoch 005: 1630 / 3002 loss=2.483, ppl=5.59, wps=5799.9, ups=0.09, wpb=64858, bsz=128, num_updates=13558, lr=9.98995e-05, gnorm=3.089, loss_scale=8, train_wall=11, gb_free=2.8, wall=155923 2021-06-20 13:57:40 | INFO | train_inner | epoch 005: 1631 / 3002 loss=2.397, ppl=5.27, wps=5955.4, ups=0.09, wpb=64826, bsz=128, num_updates=13559, lr=9.98995e-05, gnorm=2.084, loss_scale=8, train_wall=10, gb_free=2.8, wall=155934 2021-06-20 13:57:51 | INFO | train_inner | epoch 005: 1632 / 3002 loss=2.613, ppl=6.12, wps=5870, ups=0.09, wpb=64852, bsz=128, num_updates=13560, lr=9.98995e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=155945 2021-06-20 13:58:02 | INFO | train_inner | epoch 005: 1633 / 3002 loss=2.474, ppl=5.55, wps=5806, ups=0.09, wpb=64777, bsz=128, num_updates=13561, lr=9.98995e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=155956 2021-06-20 13:58:13 | INFO | train_inner | epoch 005: 1634 / 3002 loss=2.58, ppl=5.98, wps=5791.6, ups=0.09, wpb=64794, bsz=128, num_updates=13562, lr=9.98995e-05, gnorm=2.08, loss_scale=8, train_wall=11, gb_free=2.8, wall=155968 2021-06-20 13:58:24 | INFO | train_inner | epoch 005: 1635 / 3002 loss=2.438, ppl=5.42, wps=5719.1, ups=0.09, wpb=64752, bsz=128, num_updates=13563, lr=9.98995e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=155979 2021-06-20 13:58:36 | INFO | train_inner | epoch 005: 1636 / 3002 loss=2.467, ppl=5.53, wps=5878.7, ups=0.09, wpb=64777, bsz=128, num_updates=13564, lr=9.98995e-05, gnorm=1.88, loss_scale=8, train_wall=11, gb_free=2.8, wall=155990 2021-06-20 13:58:47 | INFO | train_inner | epoch 005: 1637 / 3002 loss=2.618, ppl=6.14, wps=5836.4, ups=0.09, wpb=64816, bsz=128, num_updates=13565, lr=9.98995e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=156001 2021-06-20 13:58:58 | INFO | train_inner | epoch 005: 1638 / 3002 loss=2.441, ppl=5.43, wps=5796.9, ups=0.09, wpb=64769, bsz=128, num_updates=13566, lr=9.98995e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=156012 2021-06-20 13:59:09 | INFO | train_inner | epoch 005: 1639 / 3002 loss=2.423, ppl=5.36, wps=5835.2, ups=0.09, wpb=64790, bsz=128, num_updates=13567, lr=9.98995e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=156023 2021-06-20 13:59:20 | INFO | train_inner | epoch 005: 1640 / 3002 loss=2.658, ppl=6.31, wps=5879.4, ups=0.09, wpb=64817, bsz=128, num_updates=13568, lr=9.98994e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=156034 2021-06-20 13:59:31 | INFO | train_inner | epoch 005: 1641 / 3002 loss=2.657, ppl=6.31, wps=5886.4, ups=0.09, wpb=64746, bsz=128, num_updates=13569, lr=9.98994e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=156045 2021-06-20 13:59:42 | INFO | train_inner | epoch 005: 1642 / 3002 loss=2.547, ppl=5.84, wps=5835.1, ups=0.09, wpb=64727, bsz=128, num_updates=13570, lr=9.98994e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=156056 2021-06-20 13:59:53 | INFO | train_inner | epoch 005: 1643 / 3002 loss=2.43, ppl=5.39, wps=5766.7, ups=0.09, wpb=64850, bsz=128, num_updates=13571, lr=9.98994e-05, gnorm=5.728, loss_scale=8, train_wall=11, gb_free=2.8, wall=156068 2021-06-20 14:00:04 | INFO | train_inner | epoch 005: 1644 / 3002 loss=2.433, ppl=5.4, wps=5840.9, ups=0.09, wpb=64837, bsz=128, num_updates=13572, lr=9.98994e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=156079 2021-06-20 14:00:15 | INFO | train_inner | epoch 005: 1645 / 3002 loss=2.406, ppl=5.3, wps=5835.4, ups=0.09, wpb=64807, bsz=128, num_updates=13573, lr=9.98994e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=156090 2021-06-20 14:00:27 | INFO | train_inner | epoch 005: 1646 / 3002 loss=2.536, ppl=5.8, wps=5784, ups=0.09, wpb=64877, bsz=128, num_updates=13574, lr=9.98994e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=156101 2021-06-20 14:00:38 | INFO | train_inner | epoch 005: 1647 / 3002 loss=2.566, ppl=5.92, wps=5788, ups=0.09, wpb=64790, bsz=128, num_updates=13575, lr=9.98994e-05, gnorm=1.891, loss_scale=8, train_wall=11, gb_free=2.8, wall=156112 2021-06-20 14:00:49 | INFO | train_inner | epoch 005: 1648 / 3002 loss=2.504, ppl=5.67, wps=5783.6, ups=0.09, wpb=64822, bsz=128, num_updates=13576, lr=9.98994e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=156123 2021-06-20 14:01:00 | INFO | train_inner | epoch 005: 1649 / 3002 loss=2.489, ppl=5.61, wps=5784.5, ups=0.09, wpb=64739, bsz=128, num_updates=13577, lr=9.98994e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=156135 2021-06-20 14:01:11 | INFO | train_inner | epoch 005: 1650 / 3002 loss=2.513, ppl=5.71, wps=5777.7, ups=0.09, wpb=64783, bsz=128, num_updates=13578, lr=9.98994e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=156146 2021-06-20 14:01:23 | INFO | train_inner | epoch 005: 1651 / 3002 loss=2.582, ppl=5.99, wps=5822.7, ups=0.09, wpb=64732, bsz=128, num_updates=13579, lr=9.98994e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=156157 2021-06-20 14:01:34 | INFO | train_inner | epoch 005: 1652 / 3002 loss=2.496, ppl=5.64, wps=5921.2, ups=0.09, wpb=64889, bsz=128, num_updates=13580, lr=9.98994e-05, gnorm=2.258, loss_scale=8, train_wall=10, gb_free=2.8, wall=156168 2021-06-20 14:01:45 | INFO | train_inner | epoch 005: 1653 / 3002 loss=2.457, ppl=5.49, wps=5863.9, ups=0.09, wpb=64903, bsz=128, num_updates=13581, lr=9.98993e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=156179 2021-06-20 14:01:56 | INFO | train_inner | epoch 005: 1654 / 3002 loss=2.411, ppl=5.32, wps=5927.9, ups=0.09, wpb=64854, bsz=128, num_updates=13582, lr=9.98993e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=156190 2021-06-20 14:02:07 | INFO | train_inner | epoch 005: 1655 / 3002 loss=2.521, ppl=5.74, wps=5854.2, ups=0.09, wpb=64858, bsz=128, num_updates=13583, lr=9.98993e-05, gnorm=2.494, loss_scale=8, train_wall=11, gb_free=2.8, wall=156201 2021-06-20 14:02:18 | INFO | train_inner | epoch 005: 1656 / 3002 loss=2.581, ppl=5.99, wps=5812.2, ups=0.09, wpb=64751, bsz=128, num_updates=13584, lr=9.98993e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=156212 2021-06-20 14:02:29 | INFO | train_inner | epoch 005: 1657 / 3002 loss=2.351, ppl=5.1, wps=5755.9, ups=0.09, wpb=64749, bsz=128, num_updates=13585, lr=9.98993e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=156223 2021-06-20 14:02:40 | INFO | train_inner | epoch 005: 1658 / 3002 loss=2.489, ppl=5.61, wps=5837.8, ups=0.09, wpb=64854, bsz=128, num_updates=13586, lr=9.98993e-05, gnorm=1.89, loss_scale=8, train_wall=11, gb_free=2.8, wall=156234 2021-06-20 14:02:51 | INFO | train_inner | epoch 005: 1659 / 3002 loss=2.398, ppl=5.27, wps=5829.6, ups=0.09, wpb=64810, bsz=128, num_updates=13587, lr=9.98993e-05, gnorm=2.075, loss_scale=8, train_wall=11, gb_free=2.8, wall=156246 2021-06-20 14:03:02 | INFO | train_inner | epoch 005: 1660 / 3002 loss=2.423, ppl=5.36, wps=5867.4, ups=0.09, wpb=64862, bsz=128, num_updates=13588, lr=9.98993e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=156257 2021-06-20 14:03:13 | INFO | train_inner | epoch 005: 1661 / 3002 loss=2.63, ppl=6.19, wps=5920.4, ups=0.09, wpb=64842, bsz=128, num_updates=13589, lr=9.98993e-05, gnorm=1.85, loss_scale=8, train_wall=11, gb_free=2.8, wall=156268 2021-06-20 14:03:24 | INFO | train_inner | epoch 005: 1662 / 3002 loss=2.546, ppl=5.84, wps=5862, ups=0.09, wpb=64808, bsz=128, num_updates=13590, lr=9.98993e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=156279 2021-06-20 14:03:35 | INFO | train_inner | epoch 005: 1663 / 3002 loss=2.428, ppl=5.38, wps=5865.6, ups=0.09, wpb=64810, bsz=128, num_updates=13591, lr=9.98993e-05, gnorm=6.096, loss_scale=8, train_wall=11, gb_free=2.8, wall=156290 2021-06-20 14:03:46 | INFO | train_inner | epoch 005: 1664 / 3002 loss=2.452, ppl=5.47, wps=5834, ups=0.09, wpb=64834, bsz=128, num_updates=13592, lr=9.98993e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=156301 2021-06-20 14:03:57 | INFO | train_inner | epoch 005: 1665 / 3002 loss=2.694, ppl=6.47, wps=5928.8, ups=0.09, wpb=64855, bsz=128, num_updates=13593, lr=9.98992e-05, gnorm=1.982, loss_scale=8, train_wall=10, gb_free=2.8, wall=156312 2021-06-20 14:04:09 | INFO | train_inner | epoch 005: 1666 / 3002 loss=2.431, ppl=5.39, wps=5855.4, ups=0.09, wpb=64884, bsz=128, num_updates=13594, lr=9.98992e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=156323 2021-06-20 14:04:20 | INFO | train_inner | epoch 005: 1667 / 3002 loss=2.531, ppl=5.78, wps=5805.6, ups=0.09, wpb=64774, bsz=128, num_updates=13595, lr=9.98992e-05, gnorm=2.162, loss_scale=8, train_wall=11, gb_free=2.8, wall=156334 2021-06-20 14:04:31 | INFO | train_inner | epoch 005: 1668 / 3002 loss=2.591, ppl=6.03, wps=5765.8, ups=0.09, wpb=64789, bsz=128, num_updates=13596, lr=9.98992e-05, gnorm=2.026, loss_scale=8, train_wall=11, gb_free=2.8, wall=156345 2021-06-20 14:04:42 | INFO | train_inner | epoch 005: 1669 / 3002 loss=2.614, ppl=6.12, wps=5776.4, ups=0.09, wpb=64848, bsz=128, num_updates=13597, lr=9.98992e-05, gnorm=8.566, loss_scale=8, train_wall=11, gb_free=2.8, wall=156356 2021-06-20 14:04:53 | INFO | train_inner | epoch 005: 1670 / 3002 loss=2.463, ppl=5.51, wps=5874.5, ups=0.09, wpb=64823, bsz=128, num_updates=13598, lr=9.98992e-05, gnorm=2.274, loss_scale=8, train_wall=11, gb_free=2.8, wall=156368 2021-06-20 14:05:04 | INFO | train_inner | epoch 005: 1671 / 3002 loss=2.632, ppl=6.2, wps=5838.4, ups=0.09, wpb=64769, bsz=128, num_updates=13599, lr=9.98992e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=156379 2021-06-20 14:05:15 | INFO | train_inner | epoch 005: 1672 / 3002 loss=2.46, ppl=5.5, wps=5856.6, ups=0.09, wpb=64884, bsz=128, num_updates=13600, lr=9.98992e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=156390 2021-06-20 14:05:27 | INFO | train_inner | epoch 005: 1673 / 3002 loss=2.554, ppl=5.87, wps=5802.1, ups=0.09, wpb=64824, bsz=128, num_updates=13601, lr=9.98992e-05, gnorm=2.611, loss_scale=8, train_wall=11, gb_free=2.8, wall=156401 2021-06-20 14:05:38 | INFO | train_inner | epoch 005: 1674 / 3002 loss=2.628, ppl=6.18, wps=5896.2, ups=0.09, wpb=64863, bsz=128, num_updates=13602, lr=9.98992e-05, gnorm=2.147, loss_scale=8, train_wall=11, gb_free=2.8, wall=156412 2021-06-20 14:05:49 | INFO | train_inner | epoch 005: 1675 / 3002 loss=2.547, ppl=5.84, wps=5819.4, ups=0.09, wpb=64815, bsz=128, num_updates=13603, lr=9.98992e-05, gnorm=2.835, loss_scale=8, train_wall=11, gb_free=2.8, wall=156423 2021-06-20 14:06:00 | INFO | train_inner | epoch 005: 1676 / 3002 loss=2.544, ppl=5.83, wps=5862.5, ups=0.09, wpb=64813, bsz=128, num_updates=13604, lr=9.98992e-05, gnorm=2.16, loss_scale=8, train_wall=11, gb_free=2.8, wall=156434 2021-06-20 14:06:11 | INFO | train_inner | epoch 005: 1677 / 3002 loss=2.512, ppl=5.7, wps=5803.2, ups=0.09, wpb=64807, bsz=128, num_updates=13605, lr=9.98992e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=156445 2021-06-20 14:06:22 | INFO | train_inner | epoch 005: 1678 / 3002 loss=2.544, ppl=5.83, wps=5800.3, ups=0.09, wpb=64795, bsz=128, num_updates=13606, lr=9.98991e-05, gnorm=2.812, loss_scale=8, train_wall=11, gb_free=2.8, wall=156456 2021-06-20 14:06:33 | INFO | train_inner | epoch 005: 1679 / 3002 loss=2.429, ppl=5.38, wps=5793, ups=0.09, wpb=64895, bsz=128, num_updates=13607, lr=9.98991e-05, gnorm=2.046, loss_scale=8, train_wall=11, gb_free=2.8, wall=156468 2021-06-20 14:06:44 | INFO | train_inner | epoch 005: 1680 / 3002 loss=2.587, ppl=6.01, wps=5833.1, ups=0.09, wpb=64824, bsz=128, num_updates=13608, lr=9.98991e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=156479 2021-06-20 14:06:56 | INFO | train_inner | epoch 005: 1681 / 3002 loss=2.586, ppl=6.01, wps=5786.5, ups=0.09, wpb=64751, bsz=128, num_updates=13609, lr=9.98991e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=156490 2021-06-20 14:07:07 | INFO | train_inner | epoch 005: 1682 / 3002 loss=2.459, ppl=5.5, wps=5916.8, ups=0.09, wpb=64813, bsz=128, num_updates=13610, lr=9.98991e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=156501 2021-06-20 14:07:18 | INFO | train_inner | epoch 005: 1683 / 3002 loss=2.355, ppl=5.11, wps=5764.5, ups=0.09, wpb=64797, bsz=128, num_updates=13611, lr=9.98991e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=156512 2021-06-20 14:07:29 | INFO | train_inner | epoch 005: 1684 / 3002 loss=2.646, ppl=6.26, wps=5772.1, ups=0.09, wpb=64813, bsz=128, num_updates=13612, lr=9.98991e-05, gnorm=2.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=156523 2021-06-20 14:07:40 | INFO | train_inner | epoch 005: 1685 / 3002 loss=2.575, ppl=5.96, wps=5946.6, ups=0.09, wpb=64831, bsz=128, num_updates=13613, lr=9.98991e-05, gnorm=2.174, loss_scale=8, train_wall=10, gb_free=2.8, wall=156534 2021-06-20 14:07:51 | INFO | train_inner | epoch 005: 1686 / 3002 loss=2.566, ppl=5.92, wps=5688.6, ups=0.09, wpb=64843, bsz=128, num_updates=13614, lr=9.98991e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=156546 2021-06-20 14:08:02 | INFO | train_inner | epoch 005: 1687 / 3002 loss=2.572, ppl=5.95, wps=5903.7, ups=0.09, wpb=64835, bsz=128, num_updates=13615, lr=9.98991e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=156557 2021-06-20 14:08:14 | INFO | train_inner | epoch 005: 1688 / 3002 loss=2.699, ppl=6.49, wps=5746.9, ups=0.09, wpb=64753, bsz=128, num_updates=13616, lr=9.98991e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=156568 2021-06-20 14:08:25 | INFO | train_inner | epoch 005: 1689 / 3002 loss=2.506, ppl=5.68, wps=5854.4, ups=0.09, wpb=64859, bsz=128, num_updates=13617, lr=9.98991e-05, gnorm=1.878, loss_scale=8, train_wall=11, gb_free=2.8, wall=156579 2021-06-20 14:08:36 | INFO | train_inner | epoch 005: 1690 / 3002 loss=2.488, ppl=5.61, wps=5916.6, ups=0.09, wpb=64863, bsz=128, num_updates=13618, lr=9.9899e-05, gnorm=1.889, loss_scale=8, train_wall=10, gb_free=2.8, wall=156590 2021-06-20 14:08:47 | INFO | train_inner | epoch 005: 1691 / 3002 loss=2.37, ppl=5.17, wps=5892.3, ups=0.09, wpb=64824, bsz=128, num_updates=13619, lr=9.9899e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=156601 2021-06-20 14:08:58 | INFO | train_inner | epoch 005: 1692 / 3002 loss=2.518, ppl=5.73, wps=5743.3, ups=0.09, wpb=64842, bsz=128, num_updates=13620, lr=9.9899e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=156612 2021-06-20 14:09:09 | INFO | train_inner | epoch 005: 1693 / 3002 loss=2.621, ppl=6.15, wps=5736.8, ups=0.09, wpb=64822, bsz=128, num_updates=13621, lr=9.9899e-05, gnorm=1.925, loss_scale=16, train_wall=11, gb_free=2.8, wall=156624 2021-06-20 14:09:20 | INFO | train_inner | epoch 005: 1694 / 3002 loss=2.634, ppl=6.21, wps=5795.5, ups=0.09, wpb=64779, bsz=128, num_updates=13622, lr=9.9899e-05, gnorm=2.139, loss_scale=16, train_wall=11, gb_free=2.8, wall=156635 2021-06-20 14:09:31 | INFO | train_inner | epoch 005: 1695 / 3002 loss=2.572, ppl=5.95, wps=5832.1, ups=0.09, wpb=64810, bsz=128, num_updates=13623, lr=9.9899e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=156646 2021-06-20 14:09:42 | INFO | train_inner | epoch 005: 1696 / 3002 loss=2.527, ppl=5.76, wps=5909.2, ups=0.09, wpb=64828, bsz=128, num_updates=13624, lr=9.9899e-05, gnorm=2.065, loss_scale=16, train_wall=11, gb_free=2.8, wall=156657 2021-06-20 14:09:54 | INFO | train_inner | epoch 005: 1697 / 3002 loss=2.561, ppl=5.9, wps=5809.9, ups=0.09, wpb=64799, bsz=128, num_updates=13625, lr=9.9899e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=156668 2021-06-20 14:10:05 | INFO | train_inner | epoch 005: 1698 / 3002 loss=2.467, ppl=5.53, wps=5891.4, ups=0.09, wpb=64754, bsz=128, num_updates=13626, lr=9.9899e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=156679 2021-06-20 14:10:16 | INFO | train_inner | epoch 005: 1699 / 3002 loss=2.337, ppl=5.05, wps=5900.8, ups=0.09, wpb=64862, bsz=128, num_updates=13627, lr=9.9899e-05, gnorm=1.95, loss_scale=16, train_wall=11, gb_free=2.8, wall=156690 2021-06-20 14:10:27 | INFO | train_inner | epoch 005: 1700 / 3002 loss=2.516, ppl=5.72, wps=5822.2, ups=0.09, wpb=64805, bsz=128, num_updates=13628, lr=9.9899e-05, gnorm=1.981, loss_scale=16, train_wall=11, gb_free=2.8, wall=156701 2021-06-20 14:10:38 | INFO | train_inner | epoch 005: 1701 / 3002 loss=2.373, ppl=5.18, wps=5896.5, ups=0.09, wpb=64868, bsz=128, num_updates=13629, lr=9.9899e-05, gnorm=2.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=156712 2021-06-20 14:10:49 | INFO | train_inner | epoch 005: 1702 / 3002 loss=2.45, ppl=5.47, wps=5817.1, ups=0.09, wpb=64767, bsz=128, num_updates=13630, lr=9.9899e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=156723 2021-06-20 14:11:00 | INFO | train_inner | epoch 005: 1703 / 3002 loss=2.342, ppl=5.07, wps=5877.1, ups=0.09, wpb=64854, bsz=128, num_updates=13631, lr=9.98989e-05, gnorm=1.943, loss_scale=16, train_wall=11, gb_free=2.8, wall=156734 2021-06-20 14:11:11 | INFO | train_inner | epoch 005: 1704 / 3002 loss=2.507, ppl=5.69, wps=5796.1, ups=0.09, wpb=64878, bsz=128, num_updates=13632, lr=9.98989e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=156745 2021-06-20 14:11:22 | INFO | train_inner | epoch 005: 1705 / 3002 loss=2.455, ppl=5.48, wps=5919.7, ups=0.09, wpb=64796, bsz=128, num_updates=13633, lr=9.98989e-05, gnorm=1.9, loss_scale=16, train_wall=10, gb_free=2.8, wall=156756 2021-06-20 14:11:33 | INFO | train_inner | epoch 005: 1706 / 3002 loss=2.638, ppl=6.22, wps=5858.9, ups=0.09, wpb=64810, bsz=128, num_updates=13634, lr=9.98989e-05, gnorm=1.996, loss_scale=16, train_wall=11, gb_free=2.8, wall=156767 2021-06-20 14:11:44 | INFO | train_inner | epoch 005: 1707 / 3002 loss=2.618, ppl=6.14, wps=5972.2, ups=0.09, wpb=64891, bsz=128, num_updates=13635, lr=9.98989e-05, gnorm=1.993, loss_scale=16, train_wall=10, gb_free=2.8, wall=156778 2021-06-20 14:11:55 | INFO | train_inner | epoch 005: 1708 / 3002 loss=2.493, ppl=5.63, wps=5823.4, ups=0.09, wpb=64861, bsz=128, num_updates=13636, lr=9.98989e-05, gnorm=1.926, loss_scale=16, train_wall=11, gb_free=2.8, wall=156789 2021-06-20 14:12:06 | INFO | train_inner | epoch 005: 1709 / 3002 loss=2.457, ppl=5.49, wps=5884, ups=0.09, wpb=64851, bsz=128, num_updates=13637, lr=9.98989e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=156800 2021-06-20 14:12:17 | INFO | train_inner | epoch 005: 1710 / 3002 loss=2.483, ppl=5.59, wps=5752.5, ups=0.09, wpb=64859, bsz=128, num_updates=13638, lr=9.98989e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=156812 2021-06-20 14:12:29 | INFO | train_inner | epoch 005: 1711 / 3002 loss=2.4, ppl=5.28, wps=5820, ups=0.09, wpb=64810, bsz=128, num_updates=13639, lr=9.98989e-05, gnorm=2.007, loss_scale=16, train_wall=11, gb_free=2.8, wall=156823 2021-06-20 14:12:40 | INFO | train_inner | epoch 005: 1712 / 3002 loss=2.703, ppl=6.51, wps=5765.5, ups=0.09, wpb=64784, bsz=128, num_updates=13640, lr=9.98989e-05, gnorm=2.104, loss_scale=16, train_wall=11, gb_free=2.8, wall=156834 2021-06-20 14:12:51 | INFO | train_inner | epoch 005: 1713 / 3002 loss=2.495, ppl=5.64, wps=5782.4, ups=0.09, wpb=64869, bsz=128, num_updates=13641, lr=9.98989e-05, gnorm=1.889, loss_scale=16, train_wall=11, gb_free=2.8, wall=156845 2021-06-20 14:13:02 | INFO | train_inner | epoch 005: 1714 / 3002 loss=2.479, ppl=5.58, wps=5781.4, ups=0.09, wpb=64849, bsz=128, num_updates=13642, lr=9.98989e-05, gnorm=1.925, loss_scale=16, train_wall=11, gb_free=2.8, wall=156857 2021-06-20 14:13:13 | INFO | train_inner | epoch 005: 1715 / 3002 loss=2.54, ppl=5.81, wps=5871.4, ups=0.09, wpb=64779, bsz=128, num_updates=13643, lr=9.98988e-05, gnorm=1.929, loss_scale=16, train_wall=11, gb_free=2.8, wall=156868 2021-06-20 14:13:24 | INFO | train_inner | epoch 005: 1716 / 3002 loss=2.452, ppl=5.47, wps=5781.2, ups=0.09, wpb=64724, bsz=128, num_updates=13644, lr=9.98988e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=156879 2021-06-20 14:13:35 | INFO | train_inner | epoch 005: 1717 / 3002 loss=2.609, ppl=6.1, wps=5870.6, ups=0.09, wpb=64810, bsz=128, num_updates=13645, lr=9.98988e-05, gnorm=2.008, loss_scale=16, train_wall=11, gb_free=2.8, wall=156890 2021-06-20 14:13:47 | INFO | train_inner | epoch 005: 1718 / 3002 loss=2.569, ppl=5.93, wps=5693.9, ups=0.09, wpb=64852, bsz=128, num_updates=13646, lr=9.98988e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=156901 2021-06-20 14:13:58 | INFO | train_inner | epoch 005: 1719 / 3002 loss=2.479, ppl=5.58, wps=5935, ups=0.09, wpb=64831, bsz=128, num_updates=13647, lr=9.98988e-05, gnorm=1.952, loss_scale=16, train_wall=10, gb_free=2.8, wall=156912 2021-06-20 14:14:09 | INFO | train_inner | epoch 005: 1720 / 3002 loss=2.45, ppl=5.46, wps=5836.1, ups=0.09, wpb=64886, bsz=128, num_updates=13648, lr=9.98988e-05, gnorm=2.066, loss_scale=16, train_wall=11, gb_free=2.8, wall=156923 2021-06-20 14:14:20 | INFO | train_inner | epoch 005: 1721 / 3002 loss=2.44, ppl=5.43, wps=5773.6, ups=0.09, wpb=64737, bsz=128, num_updates=13649, lr=9.98988e-05, gnorm=2.004, loss_scale=16, train_wall=11, gb_free=2.8, wall=156934 2021-06-20 14:14:31 | INFO | train_inner | epoch 005: 1722 / 3002 loss=2.652, ppl=6.28, wps=6043, ups=0.09, wpb=64896, bsz=128, num_updates=13650, lr=9.98988e-05, gnorm=1.931, loss_scale=16, train_wall=10, gb_free=2.8, wall=156945 2021-06-20 14:14:42 | INFO | train_inner | epoch 005: 1723 / 3002 loss=2.553, ppl=5.87, wps=5844.7, ups=0.09, wpb=64861, bsz=128, num_updates=13651, lr=9.98988e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=156956 2021-06-20 14:14:53 | INFO | train_inner | epoch 005: 1724 / 3002 loss=2.535, ppl=5.8, wps=5728.9, ups=0.09, wpb=64794, bsz=128, num_updates=13652, lr=9.98988e-05, gnorm=2.055, loss_scale=16, train_wall=11, gb_free=2.8, wall=156968 2021-06-20 14:15:04 | INFO | train_inner | epoch 005: 1725 / 3002 loss=2.474, ppl=5.56, wps=5842.6, ups=0.09, wpb=64830, bsz=128, num_updates=13653, lr=9.98988e-05, gnorm=1.937, loss_scale=16, train_wall=11, gb_free=2.8, wall=156979 2021-06-20 14:15:15 | INFO | train_inner | epoch 005: 1726 / 3002 loss=2.448, ppl=5.46, wps=5826.5, ups=0.09, wpb=64912, bsz=128, num_updates=13654, lr=9.98988e-05, gnorm=2.034, loss_scale=16, train_wall=11, gb_free=2.8, wall=156990 2021-06-20 14:15:27 | INFO | train_inner | epoch 005: 1727 / 3002 loss=2.378, ppl=5.2, wps=5872.1, ups=0.09, wpb=64846, bsz=128, num_updates=13655, lr=9.98988e-05, gnorm=1.964, loss_scale=16, train_wall=11, gb_free=2.8, wall=157001 2021-06-20 14:15:37 | INFO | train_inner | epoch 005: 1728 / 3002 loss=2.539, ppl=5.81, wps=5923.4, ups=0.09, wpb=64839, bsz=128, num_updates=13656, lr=9.98987e-05, gnorm=1.921, loss_scale=16, train_wall=10, gb_free=2.8, wall=157012 2021-06-20 14:15:49 | INFO | train_inner | epoch 005: 1729 / 3002 loss=2.562, ppl=5.91, wps=5850.8, ups=0.09, wpb=64778, bsz=128, num_updates=13657, lr=9.98987e-05, gnorm=1.888, loss_scale=16, train_wall=11, gb_free=2.8, wall=157023 2021-06-20 14:16:00 | INFO | train_inner | epoch 005: 1730 / 3002 loss=2.459, ppl=5.5, wps=5881.2, ups=0.09, wpb=64827, bsz=128, num_updates=13658, lr=9.98987e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=157034 2021-06-20 14:16:11 | INFO | train_inner | epoch 005: 1731 / 3002 loss=2.542, ppl=5.83, wps=5845.1, ups=0.09, wpb=64748, bsz=128, num_updates=13659, lr=9.98987e-05, gnorm=2.267, loss_scale=16, train_wall=11, gb_free=2.8, wall=157045 2021-06-20 14:16:22 | INFO | train_inner | epoch 005: 1732 / 3002 loss=2.708, ppl=6.54, wps=5785.7, ups=0.09, wpb=64704, bsz=128, num_updates=13660, lr=9.98987e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=157056 2021-06-20 14:16:33 | INFO | train_inner | epoch 005: 1733 / 3002 loss=2.407, ppl=5.3, wps=5749.8, ups=0.09, wpb=64772, bsz=128, num_updates=13661, lr=9.98987e-05, gnorm=1.882, loss_scale=16, train_wall=11, gb_free=2.8, wall=157067 2021-06-20 14:16:44 | INFO | train_inner | epoch 005: 1734 / 3002 loss=2.421, ppl=5.36, wps=5826.5, ups=0.09, wpb=64738, bsz=128, num_updates=13662, lr=9.98987e-05, gnorm=1.916, loss_scale=16, train_wall=11, gb_free=2.8, wall=157079 2021-06-20 14:16:55 | INFO | train_inner | epoch 005: 1735 / 3002 loss=2.464, ppl=5.52, wps=5838.8, ups=0.09, wpb=64952, bsz=128, num_updates=13663, lr=9.98987e-05, gnorm=1.887, loss_scale=16, train_wall=11, gb_free=2.8, wall=157090 2021-06-20 14:17:06 | INFO | train_inner | epoch 005: 1736 / 3002 loss=2.462, ppl=5.51, wps=5816, ups=0.09, wpb=64867, bsz=128, num_updates=13664, lr=9.98987e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=157101 2021-06-20 14:17:18 | INFO | train_inner | epoch 005: 1737 / 3002 loss=2.497, ppl=5.65, wps=5801, ups=0.09, wpb=64789, bsz=128, num_updates=13665, lr=9.98987e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=157112 2021-06-20 14:17:29 | INFO | train_inner | epoch 005: 1738 / 3002 loss=2.517, ppl=5.73, wps=5959.9, ups=0.09, wpb=64868, bsz=128, num_updates=13666, lr=9.98987e-05, gnorm=2.039, loss_scale=16, train_wall=10, gb_free=2.8, wall=157123 2021-06-20 14:17:40 | INFO | train_inner | epoch 005: 1739 / 3002 loss=2.483, ppl=5.59, wps=5907.6, ups=0.09, wpb=64878, bsz=128, num_updates=13667, lr=9.98987e-05, gnorm=2.021, loss_scale=16, train_wall=11, gb_free=2.8, wall=157134 2021-06-20 14:17:51 | INFO | train_inner | epoch 005: 1740 / 3002 loss=2.534, ppl=5.79, wps=5827.3, ups=0.09, wpb=64722, bsz=128, num_updates=13668, lr=9.98986e-05, gnorm=1.995, loss_scale=16, train_wall=11, gb_free=2.8, wall=157145 2021-06-20 14:18:02 | INFO | train_inner | epoch 005: 1741 / 3002 loss=2.628, ppl=6.18, wps=5756.5, ups=0.09, wpb=64812, bsz=128, num_updates=13669, lr=9.98986e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=157156 2021-06-20 14:18:13 | INFO | train_inner | epoch 005: 1742 / 3002 loss=2.558, ppl=5.89, wps=5884.1, ups=0.09, wpb=64939, bsz=128, num_updates=13670, lr=9.98986e-05, gnorm=1.967, loss_scale=16, train_wall=11, gb_free=2.8, wall=157167 2021-06-20 14:18:24 | INFO | train_inner | epoch 005: 1743 / 3002 loss=2.464, ppl=5.52, wps=5825.4, ups=0.09, wpb=64866, bsz=128, num_updates=13671, lr=9.98986e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=157178 2021-06-20 14:18:35 | INFO | train_inner | epoch 005: 1744 / 3002 loss=2.525, ppl=5.76, wps=5858.4, ups=0.09, wpb=64808, bsz=128, num_updates=13672, lr=9.98986e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=157189 2021-06-20 14:18:46 | INFO | train_inner | epoch 005: 1745 / 3002 loss=2.465, ppl=5.52, wps=5901, ups=0.09, wpb=64812, bsz=128, num_updates=13673, lr=9.98986e-05, gnorm=2.005, loss_scale=16, train_wall=11, gb_free=2.8, wall=157200 2021-06-20 14:18:57 | INFO | train_inner | epoch 005: 1746 / 3002 loss=2.419, ppl=5.35, wps=5926.2, ups=0.09, wpb=64799, bsz=128, num_updates=13674, lr=9.98986e-05, gnorm=1.945, loss_scale=16, train_wall=10, gb_free=2.8, wall=157211 2021-06-20 14:19:08 | INFO | train_inner | epoch 005: 1747 / 3002 loss=2.44, ppl=5.43, wps=5810.9, ups=0.09, wpb=64771, bsz=128, num_updates=13675, lr=9.98986e-05, gnorm=2.005, loss_scale=16, train_wall=11, gb_free=2.8, wall=157223 2021-06-20 14:19:19 | INFO | train_inner | epoch 005: 1748 / 3002 loss=2.408, ppl=5.31, wps=5826.8, ups=0.09, wpb=64831, bsz=128, num_updates=13676, lr=9.98986e-05, gnorm=1.981, loss_scale=16, train_wall=11, gb_free=2.8, wall=157234 2021-06-20 14:19:30 | INFO | train_inner | epoch 005: 1749 / 3002 loss=2.491, ppl=5.62, wps=5805.5, ups=0.09, wpb=64859, bsz=128, num_updates=13677, lr=9.98986e-05, gnorm=1.987, loss_scale=16, train_wall=11, gb_free=2.8, wall=157245 2021-06-20 14:19:42 | INFO | train_inner | epoch 005: 1750 / 3002 loss=2.535, ppl=5.8, wps=5848.4, ups=0.09, wpb=64874, bsz=128, num_updates=13678, lr=9.98986e-05, gnorm=2.097, loss_scale=16, train_wall=11, gb_free=2.8, wall=157256 2021-06-20 14:19:53 | INFO | train_inner | epoch 005: 1751 / 3002 loss=2.423, ppl=5.36, wps=5732.9, ups=0.09, wpb=64828, bsz=128, num_updates=13679, lr=9.98986e-05, gnorm=1.963, loss_scale=16, train_wall=11, gb_free=2.8, wall=157267 2021-06-20 14:20:04 | INFO | train_inner | epoch 005: 1752 / 3002 loss=2.469, ppl=5.54, wps=5822.9, ups=0.09, wpb=64825, bsz=128, num_updates=13680, lr=9.98986e-05, gnorm=1.981, loss_scale=16, train_wall=11, gb_free=2.8, wall=157278 2021-06-20 14:20:15 | INFO | train_inner | epoch 005: 1753 / 3002 loss=2.529, ppl=5.77, wps=5788.1, ups=0.09, wpb=64858, bsz=128, num_updates=13681, lr=9.98985e-05, gnorm=1.989, loss_scale=16, train_wall=11, gb_free=2.8, wall=157290 2021-06-20 14:20:26 | INFO | train_inner | epoch 005: 1754 / 3002 loss=2.754, ppl=6.75, wps=5843.9, ups=0.09, wpb=64843, bsz=128, num_updates=13682, lr=9.98985e-05, gnorm=2.073, loss_scale=16, train_wall=11, gb_free=2.8, wall=157301 2021-06-20 14:20:37 | INFO | train_inner | epoch 005: 1755 / 3002 loss=2.456, ppl=5.49, wps=5849.6, ups=0.09, wpb=64728, bsz=128, num_updates=13683, lr=9.98985e-05, gnorm=2.014, loss_scale=16, train_wall=11, gb_free=2.8, wall=157312 2021-06-20 14:20:49 | INFO | train_inner | epoch 005: 1756 / 3002 loss=2.364, ppl=5.15, wps=5830.5, ups=0.09, wpb=64916, bsz=128, num_updates=13684, lr=9.98985e-05, gnorm=1.882, loss_scale=16, train_wall=11, gb_free=2.8, wall=157323 2021-06-20 14:21:00 | INFO | train_inner | epoch 005: 1757 / 3002 loss=2.499, ppl=5.65, wps=5881.1, ups=0.09, wpb=64815, bsz=128, num_updates=13685, lr=9.98985e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=157334 2021-06-20 14:21:11 | INFO | train_inner | epoch 005: 1758 / 3002 loss=2.485, ppl=5.6, wps=5891.1, ups=0.09, wpb=64755, bsz=128, num_updates=13686, lr=9.98985e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=157345 2021-06-20 14:21:22 | INFO | train_inner | epoch 005: 1759 / 3002 loss=2.603, ppl=6.08, wps=5835.5, ups=0.09, wpb=64853, bsz=128, num_updates=13687, lr=9.98985e-05, gnorm=2.091, loss_scale=16, train_wall=11, gb_free=2.8, wall=157356 2021-06-20 14:21:33 | INFO | train_inner | epoch 005: 1760 / 3002 loss=2.45, ppl=5.47, wps=5741, ups=0.09, wpb=64756, bsz=128, num_updates=13688, lr=9.98985e-05, gnorm=1.942, loss_scale=16, train_wall=11, gb_free=2.8, wall=157367 2021-06-20 14:21:44 | INFO | train_inner | epoch 005: 1761 / 3002 loss=2.479, ppl=5.58, wps=5821.8, ups=0.09, wpb=64850, bsz=128, num_updates=13689, lr=9.98985e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=157378 2021-06-20 14:21:55 | INFO | train_inner | epoch 005: 1762 / 3002 loss=2.357, ppl=5.12, wps=5792.5, ups=0.09, wpb=64869, bsz=128, num_updates=13690, lr=9.98985e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=157390 2021-06-20 14:22:06 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 14:22:17 | INFO | train_inner | epoch 005: 1764 / 3002 loss=2.537, ppl=5.8, wps=2930.5, ups=0.05, wpb=64845, bsz=128, num_updates=13691, lr=9.98985e-05, gnorm=1.99, loss_scale=8, train_wall=21, gb_free=2.8, wall=157412 2021-06-20 14:22:28 | INFO | train_inner | epoch 005: 1765 / 3002 loss=2.498, ppl=5.65, wps=5899.9, ups=0.09, wpb=64794, bsz=128, num_updates=13692, lr=9.98985e-05, gnorm=2.018, loss_scale=8, train_wall=11, gb_free=2.8, wall=157423 2021-06-20 14:22:40 | INFO | train_inner | epoch 005: 1766 / 3002 loss=2.658, ppl=6.31, wps=5817.3, ups=0.09, wpb=64754, bsz=128, num_updates=13693, lr=9.98984e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=157434 2021-06-20 14:22:51 | INFO | train_inner | epoch 005: 1767 / 3002 loss=2.473, ppl=5.55, wps=5768.6, ups=0.09, wpb=64727, bsz=128, num_updates=13694, lr=9.98984e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=157445 2021-06-20 14:23:02 | INFO | train_inner | epoch 005: 1768 / 3002 loss=2.657, ppl=6.31, wps=5818.7, ups=0.09, wpb=64692, bsz=128, num_updates=13695, lr=9.98984e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=157456 2021-06-20 14:23:13 | INFO | train_inner | epoch 005: 1769 / 3002 loss=2.561, ppl=5.9, wps=5757.4, ups=0.09, wpb=64805, bsz=128, num_updates=13696, lr=9.98984e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=157467 2021-06-20 14:23:24 | INFO | train_inner | epoch 005: 1770 / 3002 loss=2.419, ppl=5.35, wps=5882.2, ups=0.09, wpb=64839, bsz=128, num_updates=13697, lr=9.98984e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=157478 2021-06-20 14:23:35 | INFO | train_inner | epoch 005: 1771 / 3002 loss=2.632, ppl=6.2, wps=5805.5, ups=0.09, wpb=64806, bsz=128, num_updates=13698, lr=9.98984e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=157490 2021-06-20 14:23:46 | INFO | train_inner | epoch 005: 1772 / 3002 loss=2.647, ppl=6.26, wps=5802.5, ups=0.09, wpb=64829, bsz=128, num_updates=13699, lr=9.98984e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=157501 2021-06-20 14:23:57 | INFO | train_inner | epoch 005: 1773 / 3002 loss=2.405, ppl=5.3, wps=5922.1, ups=0.09, wpb=64883, bsz=128, num_updates=13700, lr=9.98984e-05, gnorm=1.879, loss_scale=8, train_wall=11, gb_free=2.8, wall=157512 2021-06-20 14:24:08 | INFO | train_inner | epoch 005: 1774 / 3002 loss=2.48, ppl=5.58, wps=5931.3, ups=0.09, wpb=64826, bsz=128, num_updates=13701, lr=9.98984e-05, gnorm=1.965, loss_scale=8, train_wall=10, gb_free=2.8, wall=157523 2021-06-20 14:24:19 | INFO | train_inner | epoch 005: 1775 / 3002 loss=2.554, ppl=5.87, wps=5986, ups=0.09, wpb=64854, bsz=128, num_updates=13702, lr=9.98984e-05, gnorm=1.899, loss_scale=8, train_wall=10, gb_free=2.8, wall=157534 2021-06-20 14:24:30 | INFO | train_inner | epoch 005: 1776 / 3002 loss=2.535, ppl=5.8, wps=5783.8, ups=0.09, wpb=64826, bsz=128, num_updates=13703, lr=9.98984e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=157545 2021-06-20 14:24:41 | INFO | train_inner | epoch 005: 1777 / 3002 loss=2.461, ppl=5.5, wps=5872.3, ups=0.09, wpb=64900, bsz=128, num_updates=13704, lr=9.98984e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=157556 2021-06-20 14:24:53 | INFO | train_inner | epoch 005: 1778 / 3002 loss=2.471, ppl=5.55, wps=5804.9, ups=0.09, wpb=64805, bsz=128, num_updates=13705, lr=9.98984e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=157567 2021-06-20 14:25:04 | INFO | train_inner | epoch 005: 1779 / 3002 loss=2.576, ppl=5.96, wps=5688.1, ups=0.09, wpb=64696, bsz=128, num_updates=13706, lr=9.98983e-05, gnorm=2.281, loss_scale=8, train_wall=11, gb_free=2.8, wall=157578 2021-06-20 14:25:15 | INFO | train_inner | epoch 005: 1780 / 3002 loss=2.457, ppl=5.49, wps=5934.6, ups=0.09, wpb=64772, bsz=128, num_updates=13707, lr=9.98983e-05, gnorm=1.864, loss_scale=8, train_wall=10, gb_free=2.8, wall=157589 2021-06-20 14:25:26 | INFO | train_inner | epoch 005: 1781 / 3002 loss=2.609, ppl=6.1, wps=5865.9, ups=0.09, wpb=64821, bsz=128, num_updates=13708, lr=9.98983e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=157600 2021-06-20 14:25:37 | INFO | train_inner | epoch 005: 1782 / 3002 loss=2.436, ppl=5.41, wps=5688, ups=0.09, wpb=64793, bsz=128, num_updates=13709, lr=9.98983e-05, gnorm=2.127, loss_scale=8, train_wall=11, gb_free=2.8, wall=157612 2021-06-20 14:25:48 | INFO | train_inner | epoch 005: 1783 / 3002 loss=2.459, ppl=5.5, wps=5839.6, ups=0.09, wpb=64833, bsz=128, num_updates=13710, lr=9.98983e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=157623 2021-06-20 14:25:59 | INFO | train_inner | epoch 005: 1784 / 3002 loss=2.516, ppl=5.72, wps=5868.7, ups=0.09, wpb=64856, bsz=128, num_updates=13711, lr=9.98983e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=157634 2021-06-20 14:26:11 | INFO | train_inner | epoch 005: 1785 / 3002 loss=2.449, ppl=5.46, wps=5831.5, ups=0.09, wpb=64933, bsz=128, num_updates=13712, lr=9.98983e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=157645 2021-06-20 14:26:21 | INFO | train_inner | epoch 005: 1786 / 3002 loss=2.544, ppl=5.83, wps=5978.3, ups=0.09, wpb=64841, bsz=128, num_updates=13713, lr=9.98983e-05, gnorm=1.978, loss_scale=8, train_wall=10, gb_free=2.8, wall=157656 2021-06-20 14:26:33 | INFO | train_inner | epoch 005: 1787 / 3002 loss=2.508, ppl=5.69, wps=5775.7, ups=0.09, wpb=64846, bsz=128, num_updates=13714, lr=9.98983e-05, gnorm=2.004, loss_scale=8, train_wall=11, gb_free=2.8, wall=157667 2021-06-20 14:26:44 | INFO | train_inner | epoch 005: 1788 / 3002 loss=2.491, ppl=5.62, wps=5999.3, ups=0.09, wpb=64818, bsz=128, num_updates=13715, lr=9.98983e-05, gnorm=1.94, loss_scale=8, train_wall=10, gb_free=2.8, wall=157678 2021-06-20 14:26:55 | INFO | train_inner | epoch 005: 1789 / 3002 loss=2.531, ppl=5.78, wps=5865.8, ups=0.09, wpb=64873, bsz=128, num_updates=13716, lr=9.98983e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=157689 2021-06-20 14:27:06 | INFO | train_inner | epoch 005: 1790 / 3002 loss=2.645, ppl=6.25, wps=5911.6, ups=0.09, wpb=64794, bsz=128, num_updates=13717, lr=9.98983e-05, gnorm=1.979, loss_scale=8, train_wall=10, gb_free=2.8, wall=157700 2021-06-20 14:27:17 | INFO | train_inner | epoch 005: 1791 / 3002 loss=2.607, ppl=6.09, wps=5857.9, ups=0.09, wpb=64801, bsz=128, num_updates=13718, lr=9.98982e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=157711 2021-06-20 14:27:28 | INFO | train_inner | epoch 005: 1792 / 3002 loss=2.618, ppl=6.14, wps=5873.9, ups=0.09, wpb=64767, bsz=128, num_updates=13719, lr=9.98982e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=157722 2021-06-20 14:27:39 | INFO | train_inner | epoch 005: 1793 / 3002 loss=2.464, ppl=5.52, wps=5893.1, ups=0.09, wpb=64830, bsz=128, num_updates=13720, lr=9.98982e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=157733 2021-06-20 14:27:50 | INFO | train_inner | epoch 005: 1794 / 3002 loss=2.512, ppl=5.7, wps=5739.5, ups=0.09, wpb=64829, bsz=128, num_updates=13721, lr=9.98982e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=157744 2021-06-20 14:28:01 | INFO | train_inner | epoch 005: 1795 / 3002 loss=2.428, ppl=5.38, wps=5754.5, ups=0.09, wpb=64791, bsz=128, num_updates=13722, lr=9.98982e-05, gnorm=2.01, loss_scale=8, train_wall=11, gb_free=2.8, wall=157756 2021-06-20 14:28:12 | INFO | train_inner | epoch 005: 1796 / 3002 loss=2.555, ppl=5.88, wps=5934.6, ups=0.09, wpb=64876, bsz=128, num_updates=13723, lr=9.98982e-05, gnorm=1.943, loss_scale=8, train_wall=10, gb_free=2.8, wall=157766 2021-06-20 14:28:23 | INFO | train_inner | epoch 005: 1797 / 3002 loss=2.454, ppl=5.48, wps=5825.9, ups=0.09, wpb=64787, bsz=128, num_updates=13724, lr=9.98982e-05, gnorm=2.053, loss_scale=8, train_wall=11, gb_free=2.8, wall=157778 2021-06-20 14:28:34 | INFO | train_inner | epoch 005: 1798 / 3002 loss=2.46, ppl=5.5, wps=5875.7, ups=0.09, wpb=64898, bsz=128, num_updates=13725, lr=9.98982e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=157789 2021-06-20 14:28:45 | INFO | train_inner | epoch 005: 1799 / 3002 loss=2.41, ppl=5.32, wps=5902.3, ups=0.09, wpb=64865, bsz=128, num_updates=13726, lr=9.98982e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=157800 2021-06-20 14:28:56 | INFO | train_inner | epoch 005: 1800 / 3002 loss=2.399, ppl=5.27, wps=5973.5, ups=0.09, wpb=64922, bsz=128, num_updates=13727, lr=9.98982e-05, gnorm=1.964, loss_scale=8, train_wall=10, gb_free=2.8, wall=157810 2021-06-20 14:29:07 | INFO | train_inner | epoch 005: 1801 / 3002 loss=2.463, ppl=5.51, wps=5785.7, ups=0.09, wpb=64768, bsz=128, num_updates=13728, lr=9.98982e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=157822 2021-06-20 14:29:18 | INFO | train_inner | epoch 005: 1802 / 3002 loss=2.644, ppl=6.25, wps=5867.1, ups=0.09, wpb=64839, bsz=128, num_updates=13729, lr=9.98982e-05, gnorm=2.068, loss_scale=8, train_wall=11, gb_free=2.8, wall=157833 2021-06-20 14:29:29 | INFO | train_inner | epoch 005: 1803 / 3002 loss=2.618, ppl=6.14, wps=5947, ups=0.09, wpb=64851, bsz=128, num_updates=13730, lr=9.98982e-05, gnorm=2.005, loss_scale=8, train_wall=10, gb_free=2.8, wall=157844 2021-06-20 14:29:40 | INFO | train_inner | epoch 005: 1804 / 3002 loss=2.478, ppl=5.57, wps=5825.9, ups=0.09, wpb=64866, bsz=128, num_updates=13731, lr=9.98981e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=157855 2021-06-20 14:29:52 | INFO | train_inner | epoch 005: 1805 / 3002 loss=2.49, ppl=5.62, wps=5798.2, ups=0.09, wpb=64883, bsz=128, num_updates=13732, lr=9.98981e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=157866 2021-06-20 14:30:03 | INFO | train_inner | epoch 005: 1806 / 3002 loss=2.375, ppl=5.19, wps=5921.6, ups=0.09, wpb=64861, bsz=128, num_updates=13733, lr=9.98981e-05, gnorm=2.098, loss_scale=8, train_wall=11, gb_free=2.8, wall=157877 2021-06-20 14:30:14 | INFO | train_inner | epoch 005: 1807 / 3002 loss=2.43, ppl=5.39, wps=5731.1, ups=0.09, wpb=64794, bsz=128, num_updates=13734, lr=9.98981e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=157888 2021-06-20 14:30:25 | INFO | train_inner | epoch 005: 1808 / 3002 loss=2.455, ppl=5.48, wps=5835.9, ups=0.09, wpb=64877, bsz=128, num_updates=13735, lr=9.98981e-05, gnorm=2.423, loss_scale=8, train_wall=11, gb_free=2.8, wall=157899 2021-06-20 14:30:36 | INFO | train_inner | epoch 005: 1809 / 3002 loss=2.474, ppl=5.56, wps=5935, ups=0.09, wpb=64811, bsz=128, num_updates=13736, lr=9.98981e-05, gnorm=1.875, loss_scale=8, train_wall=10, gb_free=2.8, wall=157910 2021-06-20 14:30:47 | INFO | train_inner | epoch 005: 1810 / 3002 loss=2.439, ppl=5.42, wps=5918.9, ups=0.09, wpb=64827, bsz=128, num_updates=13737, lr=9.98981e-05, gnorm=1.889, loss_scale=8, train_wall=10, gb_free=2.8, wall=157921 2021-06-20 14:30:58 | INFO | train_inner | epoch 005: 1811 / 3002 loss=2.468, ppl=5.53, wps=5777.6, ups=0.09, wpb=64807, bsz=128, num_updates=13738, lr=9.98981e-05, gnorm=2.045, loss_scale=8, train_wall=11, gb_free=2.8, wall=157932 2021-06-20 14:31:09 | INFO | train_inner | epoch 005: 1812 / 3002 loss=2.549, ppl=5.85, wps=5809.8, ups=0.09, wpb=64830, bsz=128, num_updates=13739, lr=9.98981e-05, gnorm=2.023, loss_scale=8, train_wall=11, gb_free=2.8, wall=157944 2021-06-20 14:31:20 | INFO | train_inner | epoch 005: 1813 / 3002 loss=2.47, ppl=5.54, wps=5799.7, ups=0.09, wpb=64858, bsz=128, num_updates=13740, lr=9.98981e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=157955 2021-06-20 14:31:32 | INFO | train_inner | epoch 005: 1814 / 3002 loss=2.444, ppl=5.44, wps=5815.3, ups=0.09, wpb=64827, bsz=128, num_updates=13741, lr=9.98981e-05, gnorm=1.865, loss_scale=8, train_wall=11, gb_free=2.8, wall=157966 2021-06-20 14:31:43 | INFO | train_inner | epoch 005: 1815 / 3002 loss=2.482, ppl=5.59, wps=5729.5, ups=0.09, wpb=64842, bsz=128, num_updates=13742, lr=9.98981e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=157977 2021-06-20 14:31:54 | INFO | train_inner | epoch 005: 1816 / 3002 loss=2.525, ppl=5.76, wps=5828.3, ups=0.09, wpb=64905, bsz=128, num_updates=13743, lr=9.9898e-05, gnorm=2.008, loss_scale=8, train_wall=11, gb_free=2.8, wall=157988 2021-06-20 14:32:05 | INFO | train_inner | epoch 005: 1817 / 3002 loss=2.707, ppl=6.53, wps=5834.4, ups=0.09, wpb=64856, bsz=128, num_updates=13744, lr=9.9898e-05, gnorm=2.08, loss_scale=8, train_wall=11, gb_free=2.8, wall=157999 2021-06-20 14:32:16 | INFO | train_inner | epoch 005: 1818 / 3002 loss=2.514, ppl=5.71, wps=5743, ups=0.09, wpb=64738, bsz=128, num_updates=13745, lr=9.9898e-05, gnorm=1.985, loss_scale=8, train_wall=11, gb_free=2.8, wall=158011 2021-06-20 14:32:28 | INFO | train_inner | epoch 005: 1819 / 3002 loss=2.625, ppl=6.17, wps=5842.5, ups=0.09, wpb=64840, bsz=128, num_updates=13746, lr=9.9898e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=158022 2021-06-20 14:32:39 | INFO | train_inner | epoch 005: 1820 / 3002 loss=2.503, ppl=5.67, wps=5880.9, ups=0.09, wpb=64822, bsz=128, num_updates=13747, lr=9.9898e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=158033 2021-06-20 14:32:50 | INFO | train_inner | epoch 005: 1821 / 3002 loss=2.415, ppl=5.33, wps=5802.5, ups=0.09, wpb=64830, bsz=128, num_updates=13748, lr=9.9898e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=158044 2021-06-20 14:33:01 | INFO | train_inner | epoch 005: 1822 / 3002 loss=2.482, ppl=5.58, wps=5796.3, ups=0.09, wpb=64752, bsz=128, num_updates=13749, lr=9.9898e-05, gnorm=2.153, loss_scale=8, train_wall=11, gb_free=2.8, wall=158055 2021-06-20 14:33:12 | INFO | train_inner | epoch 005: 1823 / 3002 loss=2.58, ppl=5.98, wps=5889.3, ups=0.09, wpb=64845, bsz=128, num_updates=13750, lr=9.9898e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=158066 2021-06-20 14:33:23 | INFO | train_inner | epoch 005: 1824 / 3002 loss=2.574, ppl=5.95, wps=5867.1, ups=0.09, wpb=64864, bsz=128, num_updates=13751, lr=9.9898e-05, gnorm=1.907, loss_scale=8, train_wall=11, gb_free=2.8, wall=158077 2021-06-20 14:33:34 | INFO | train_inner | epoch 005: 1825 / 3002 loss=2.321, ppl=5, wps=5752.9, ups=0.09, wpb=64815, bsz=128, num_updates=13752, lr=9.9898e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=158089 2021-06-20 14:33:45 | INFO | train_inner | epoch 005: 1826 / 3002 loss=2.568, ppl=5.93, wps=5765.6, ups=0.09, wpb=64844, bsz=128, num_updates=13753, lr=9.9898e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=158100 2021-06-20 14:33:56 | INFO | train_inner | epoch 005: 1827 / 3002 loss=2.536, ppl=5.8, wps=5894.8, ups=0.09, wpb=64891, bsz=128, num_updates=13754, lr=9.9898e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=158111 2021-06-20 14:34:08 | INFO | train_inner | epoch 005: 1828 / 3002 loss=2.405, ppl=5.3, wps=5848.2, ups=0.09, wpb=64821, bsz=128, num_updates=13755, lr=9.9898e-05, gnorm=2.01, loss_scale=8, train_wall=11, gb_free=2.8, wall=158122 2021-06-20 14:34:19 | INFO | train_inner | epoch 005: 1829 / 3002 loss=2.474, ppl=5.55, wps=5846.6, ups=0.09, wpb=64845, bsz=128, num_updates=13756, lr=9.98979e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=158133 2021-06-20 14:34:30 | INFO | train_inner | epoch 005: 1830 / 3002 loss=2.492, ppl=5.63, wps=5794.2, ups=0.09, wpb=64735, bsz=128, num_updates=13757, lr=9.98979e-05, gnorm=2.078, loss_scale=8, train_wall=11, gb_free=2.8, wall=158144 2021-06-20 14:34:41 | INFO | train_inner | epoch 005: 1831 / 3002 loss=2.433, ppl=5.4, wps=5879.2, ups=0.09, wpb=64913, bsz=128, num_updates=13758, lr=9.98979e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=158155 2021-06-20 14:34:52 | INFO | train_inner | epoch 005: 1832 / 3002 loss=2.598, ppl=6.05, wps=5870.5, ups=0.09, wpb=64791, bsz=128, num_updates=13759, lr=9.98979e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=158166 2021-06-20 14:35:03 | INFO | train_inner | epoch 005: 1833 / 3002 loss=2.323, ppl=5, wps=5769.6, ups=0.09, wpb=64802, bsz=128, num_updates=13760, lr=9.98979e-05, gnorm=1.876, loss_scale=8, train_wall=11, gb_free=2.8, wall=158177 2021-06-20 14:35:14 | INFO | train_inner | epoch 005: 1834 / 3002 loss=2.579, ppl=5.97, wps=5723.1, ups=0.09, wpb=64777, bsz=128, num_updates=13761, lr=9.98979e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=158189 2021-06-20 14:35:26 | INFO | train_inner | epoch 005: 1835 / 3002 loss=2.437, ppl=5.42, wps=5789.5, ups=0.09, wpb=64750, bsz=128, num_updates=13762, lr=9.98979e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=158200 2021-06-20 14:35:37 | INFO | train_inner | epoch 005: 1836 / 3002 loss=2.392, ppl=5.25, wps=5829.5, ups=0.09, wpb=64908, bsz=128, num_updates=13763, lr=9.98979e-05, gnorm=1.896, loss_scale=8, train_wall=11, gb_free=2.8, wall=158211 2021-06-20 14:35:48 | INFO | train_inner | epoch 005: 1837 / 3002 loss=2.596, ppl=6.05, wps=5877.2, ups=0.09, wpb=64901, bsz=128, num_updates=13764, lr=9.98979e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=158222 2021-06-20 14:35:59 | INFO | train_inner | epoch 005: 1838 / 3002 loss=2.427, ppl=5.38, wps=5882.6, ups=0.09, wpb=64865, bsz=128, num_updates=13765, lr=9.98979e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=158233 2021-06-20 14:36:10 | INFO | train_inner | epoch 005: 1839 / 3002 loss=2.558, ppl=5.89, wps=5924.9, ups=0.09, wpb=64932, bsz=128, num_updates=13766, lr=9.98979e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=158244 2021-06-20 14:36:21 | INFO | train_inner | epoch 005: 1840 / 3002 loss=2.627, ppl=6.18, wps=5794.4, ups=0.09, wpb=64905, bsz=128, num_updates=13767, lr=9.98979e-05, gnorm=2.033, loss_scale=8, train_wall=11, gb_free=2.8, wall=158255 2021-06-20 14:36:32 | INFO | train_inner | epoch 005: 1841 / 3002 loss=2.359, ppl=5.13, wps=5829.9, ups=0.09, wpb=64772, bsz=128, num_updates=13768, lr=9.98978e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=158266 2021-06-20 14:36:43 | INFO | train_inner | epoch 005: 1842 / 3002 loss=2.436, ppl=5.41, wps=5738.2, ups=0.09, wpb=64729, bsz=128, num_updates=13769, lr=9.98978e-05, gnorm=1.915, loss_scale=8, train_wall=11, gb_free=2.8, wall=158278 2021-06-20 14:36:55 | INFO | train_inner | epoch 005: 1843 / 3002 loss=2.448, ppl=5.46, wps=5737.4, ups=0.09, wpb=64765, bsz=128, num_updates=13770, lr=9.98978e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=158289 2021-06-20 14:37:06 | INFO | train_inner | epoch 005: 1844 / 3002 loss=2.571, ppl=5.94, wps=5847.5, ups=0.09, wpb=64822, bsz=128, num_updates=13771, lr=9.98978e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=158300 2021-06-20 14:37:17 | INFO | train_inner | epoch 005: 1845 / 3002 loss=2.499, ppl=5.65, wps=5793.9, ups=0.09, wpb=64815, bsz=128, num_updates=13772, lr=9.98978e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=158311 2021-06-20 14:37:28 | INFO | train_inner | epoch 005: 1846 / 3002 loss=2.388, ppl=5.24, wps=5868.7, ups=0.09, wpb=64874, bsz=128, num_updates=13773, lr=9.98978e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=158322 2021-06-20 14:37:39 | INFO | train_inner | epoch 005: 1847 / 3002 loss=2.523, ppl=5.75, wps=5905.7, ups=0.09, wpb=64809, bsz=128, num_updates=13774, lr=9.98978e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=158333 2021-06-20 14:37:50 | INFO | train_inner | epoch 005: 1848 / 3002 loss=2.371, ppl=5.17, wps=5862.1, ups=0.09, wpb=64828, bsz=128, num_updates=13775, lr=9.98978e-05, gnorm=1.919, loss_scale=8, train_wall=11, gb_free=2.8, wall=158344 2021-06-20 14:38:01 | INFO | train_inner | epoch 005: 1849 / 3002 loss=2.629, ppl=6.19, wps=5916.8, ups=0.09, wpb=64874, bsz=128, num_updates=13776, lr=9.98978e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=158355 2021-06-20 14:38:12 | INFO | train_inner | epoch 005: 1850 / 3002 loss=2.394, ppl=5.26, wps=5843.2, ups=0.09, wpb=64902, bsz=128, num_updates=13777, lr=9.98978e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=158366 2021-06-20 14:38:23 | INFO | train_inner | epoch 005: 1851 / 3002 loss=2.487, ppl=5.61, wps=5794.5, ups=0.09, wpb=64815, bsz=128, num_updates=13778, lr=9.98978e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=158378 2021-06-20 14:38:35 | INFO | train_inner | epoch 005: 1852 / 3002 loss=2.474, ppl=5.56, wps=5775.9, ups=0.09, wpb=64777, bsz=128, num_updates=13779, lr=9.98978e-05, gnorm=2.088, loss_scale=8, train_wall=11, gb_free=2.8, wall=158389 2021-06-20 14:38:45 | INFO | train_inner | epoch 005: 1853 / 3002 loss=2.535, ppl=5.79, wps=5965.1, ups=0.09, wpb=64827, bsz=128, num_updates=13780, lr=9.98978e-05, gnorm=1.946, loss_scale=8, train_wall=10, gb_free=2.8, wall=158400 2021-06-20 14:38:57 | INFO | train_inner | epoch 005: 1854 / 3002 loss=2.526, ppl=5.76, wps=5815.9, ups=0.09, wpb=64806, bsz=128, num_updates=13781, lr=9.98977e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=158411 2021-06-20 14:39:08 | INFO | train_inner | epoch 005: 1855 / 3002 loss=2.337, ppl=5.05, wps=5839.5, ups=0.09, wpb=64894, bsz=128, num_updates=13782, lr=9.98977e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=158422 2021-06-20 14:39:19 | INFO | train_inner | epoch 005: 1856 / 3002 loss=2.604, ppl=6.08, wps=5808.2, ups=0.09, wpb=64809, bsz=128, num_updates=13783, lr=9.98977e-05, gnorm=2.138, loss_scale=8, train_wall=11, gb_free=2.8, wall=158433 2021-06-20 14:39:30 | INFO | train_inner | epoch 005: 1857 / 3002 loss=2.505, ppl=5.68, wps=5783.4, ups=0.09, wpb=64847, bsz=128, num_updates=13784, lr=9.98977e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=158444 2021-06-20 14:39:41 | INFO | train_inner | epoch 005: 1858 / 3002 loss=2.631, ppl=6.2, wps=5858.1, ups=0.09, wpb=64848, bsz=128, num_updates=13785, lr=9.98977e-05, gnorm=1.879, loss_scale=8, train_wall=11, gb_free=2.8, wall=158455 2021-06-20 14:39:52 | INFO | train_inner | epoch 005: 1859 / 3002 loss=2.584, ppl=5.99, wps=5862.1, ups=0.09, wpb=64842, bsz=128, num_updates=13786, lr=9.98977e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=158466 2021-06-20 14:40:03 | INFO | train_inner | epoch 005: 1860 / 3002 loss=2.443, ppl=5.44, wps=5788.3, ups=0.09, wpb=64846, bsz=128, num_updates=13787, lr=9.98977e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=158478 2021-06-20 14:40:14 | INFO | train_inner | epoch 005: 1861 / 3002 loss=2.703, ppl=6.51, wps=5894, ups=0.09, wpb=64830, bsz=128, num_updates=13788, lr=9.98977e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=158489 2021-06-20 14:40:25 | INFO | train_inner | epoch 005: 1862 / 3002 loss=2.522, ppl=5.74, wps=5827.6, ups=0.09, wpb=64869, bsz=128, num_updates=13789, lr=9.98977e-05, gnorm=2.363, loss_scale=8, train_wall=11, gb_free=2.8, wall=158500 2021-06-20 14:40:36 | INFO | train_inner | epoch 005: 1863 / 3002 loss=2.478, ppl=5.57, wps=5888, ups=0.09, wpb=64851, bsz=128, num_updates=13790, lr=9.98977e-05, gnorm=1.825, loss_scale=8, train_wall=11, gb_free=2.8, wall=158511 2021-06-20 14:40:48 | INFO | train_inner | epoch 005: 1864 / 3002 loss=2.394, ppl=5.26, wps=5789.7, ups=0.09, wpb=64801, bsz=128, num_updates=13791, lr=9.98977e-05, gnorm=1.855, loss_scale=8, train_wall=11, gb_free=2.8, wall=158522 2021-06-20 14:40:59 | INFO | train_inner | epoch 005: 1865 / 3002 loss=2.592, ppl=6.03, wps=5859.6, ups=0.09, wpb=64770, bsz=128, num_updates=13792, lr=9.98977e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=158533 2021-06-20 14:41:10 | INFO | train_inner | epoch 005: 1866 / 3002 loss=2.462, ppl=5.51, wps=5827.2, ups=0.09, wpb=64862, bsz=128, num_updates=13793, lr=9.98976e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=158544 2021-06-20 14:41:21 | INFO | train_inner | epoch 005: 1867 / 3002 loss=2.511, ppl=5.7, wps=5916.5, ups=0.09, wpb=64821, bsz=128, num_updates=13794, lr=9.98976e-05, gnorm=2.178, loss_scale=8, train_wall=11, gb_free=2.8, wall=158555 2021-06-20 14:41:32 | INFO | train_inner | epoch 005: 1868 / 3002 loss=2.319, ppl=4.99, wps=5860.1, ups=0.09, wpb=64868, bsz=128, num_updates=13795, lr=9.98976e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=158566 2021-06-20 14:41:43 | INFO | train_inner | epoch 005: 1869 / 3002 loss=2.55, ppl=5.86, wps=5781.7, ups=0.09, wpb=64768, bsz=128, num_updates=13796, lr=9.98976e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=158577 2021-06-20 14:41:54 | INFO | train_inner | epoch 005: 1870 / 3002 loss=2.548, ppl=5.85, wps=5845.1, ups=0.09, wpb=64876, bsz=128, num_updates=13797, lr=9.98976e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=158589 2021-06-20 14:42:05 | INFO | train_inner | epoch 005: 1871 / 3002 loss=2.7, ppl=6.5, wps=5823.2, ups=0.09, wpb=64658, bsz=128, num_updates=13798, lr=9.98976e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=158600 2021-06-20 14:42:16 | INFO | train_inner | epoch 005: 1872 / 3002 loss=2.525, ppl=5.76, wps=5933.8, ups=0.09, wpb=64836, bsz=128, num_updates=13799, lr=9.98976e-05, gnorm=1.929, loss_scale=8, train_wall=10, gb_free=2.8, wall=158611 2021-06-20 14:42:27 | INFO | train_inner | epoch 005: 1873 / 3002 loss=2.573, ppl=5.95, wps=5880, ups=0.09, wpb=64796, bsz=128, num_updates=13800, lr=9.98976e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=158622 2021-06-20 14:42:39 | INFO | train_inner | epoch 005: 1874 / 3002 loss=2.403, ppl=5.29, wps=5747.9, ups=0.09, wpb=64825, bsz=128, num_updates=13801, lr=9.98976e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=158633 2021-06-20 14:42:50 | INFO | train_inner | epoch 005: 1875 / 3002 loss=2.575, ppl=5.96, wps=5822.9, ups=0.09, wpb=64784, bsz=128, num_updates=13802, lr=9.98976e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=158644 2021-06-20 14:43:01 | INFO | train_inner | epoch 005: 1876 / 3002 loss=2.379, ppl=5.2, wps=5829.8, ups=0.09, wpb=64836, bsz=128, num_updates=13803, lr=9.98976e-05, gnorm=2.158, loss_scale=8, train_wall=11, gb_free=2.8, wall=158655 2021-06-20 14:43:12 | INFO | train_inner | epoch 005: 1877 / 3002 loss=2.438, ppl=5.42, wps=5925.2, ups=0.09, wpb=64860, bsz=128, num_updates=13804, lr=9.98976e-05, gnorm=2.041, loss_scale=8, train_wall=10, gb_free=2.8, wall=158666 2021-06-20 14:43:23 | INFO | train_inner | epoch 005: 1878 / 3002 loss=2.407, ppl=5.31, wps=5865.2, ups=0.09, wpb=64870, bsz=128, num_updates=13805, lr=9.98976e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=158677 2021-06-20 14:43:34 | INFO | train_inner | epoch 005: 1879 / 3002 loss=2.575, ppl=5.96, wps=5762.9, ups=0.09, wpb=64861, bsz=128, num_updates=13806, lr=9.98975e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=158688 2021-06-20 14:43:45 | INFO | train_inner | epoch 005: 1880 / 3002 loss=2.588, ppl=6.01, wps=5918.4, ups=0.09, wpb=64790, bsz=128, num_updates=13807, lr=9.98975e-05, gnorm=1.92, loss_scale=8, train_wall=10, gb_free=2.8, wall=158699 2021-06-20 14:43:56 | INFO | train_inner | epoch 005: 1881 / 3002 loss=2.523, ppl=5.75, wps=5715.6, ups=0.09, wpb=64806, bsz=128, num_updates=13808, lr=9.98975e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=158711 2021-06-20 14:44:08 | INFO | train_inner | epoch 005: 1882 / 3002 loss=2.432, ppl=5.39, wps=5751.2, ups=0.09, wpb=64819, bsz=128, num_updates=13809, lr=9.98975e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=158722 2021-06-20 14:44:19 | INFO | train_inner | epoch 005: 1883 / 3002 loss=2.436, ppl=5.41, wps=5921.4, ups=0.09, wpb=64814, bsz=128, num_updates=13810, lr=9.98975e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=158733 2021-06-20 14:44:30 | INFO | train_inner | epoch 005: 1884 / 3002 loss=2.477, ppl=5.57, wps=5839.4, ups=0.09, wpb=64904, bsz=128, num_updates=13811, lr=9.98975e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=158744 2021-06-20 14:44:41 | INFO | train_inner | epoch 005: 1885 / 3002 loss=2.452, ppl=5.47, wps=5817.2, ups=0.09, wpb=64847, bsz=128, num_updates=13812, lr=9.98975e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=158755 2021-06-20 14:44:52 | INFO | train_inner | epoch 005: 1886 / 3002 loss=2.552, ppl=5.86, wps=5780.7, ups=0.09, wpb=64867, bsz=128, num_updates=13813, lr=9.98975e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=158766 2021-06-20 14:45:03 | INFO | train_inner | epoch 005: 1887 / 3002 loss=2.316, ppl=4.98, wps=5715.5, ups=0.09, wpb=64781, bsz=128, num_updates=13814, lr=9.98975e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=158778 2021-06-20 14:45:14 | INFO | train_inner | epoch 005: 1888 / 3002 loss=2.475, ppl=5.56, wps=5918.3, ups=0.09, wpb=64871, bsz=128, num_updates=13815, lr=9.98975e-05, gnorm=1.939, loss_scale=8, train_wall=10, gb_free=2.8, wall=158789 2021-06-20 14:45:25 | INFO | train_inner | epoch 005: 1889 / 3002 loss=2.456, ppl=5.49, wps=5796.1, ups=0.09, wpb=64792, bsz=128, num_updates=13816, lr=9.98975e-05, gnorm=2.626, loss_scale=8, train_wall=11, gb_free=2.8, wall=158800 2021-06-20 14:45:37 | INFO | train_inner | epoch 005: 1890 / 3002 loss=2.48, ppl=5.58, wps=5828, ups=0.09, wpb=64866, bsz=128, num_updates=13817, lr=9.98975e-05, gnorm=2.23, loss_scale=8, train_wall=11, gb_free=2.8, wall=158811 2021-06-20 14:45:48 | INFO | train_inner | epoch 005: 1891 / 3002 loss=2.548, ppl=5.85, wps=5914.5, ups=0.09, wpb=64796, bsz=128, num_updates=13818, lr=9.98974e-05, gnorm=1.941, loss_scale=16, train_wall=10, gb_free=2.8, wall=158822 2021-06-20 14:45:59 | INFO | train_inner | epoch 005: 1892 / 3002 loss=2.567, ppl=5.93, wps=5904.8, ups=0.09, wpb=64757, bsz=128, num_updates=13819, lr=9.98974e-05, gnorm=1.946, loss_scale=16, train_wall=10, gb_free=2.8, wall=158833 2021-06-20 14:46:10 | INFO | train_inner | epoch 005: 1893 / 3002 loss=2.416, ppl=5.34, wps=5864.5, ups=0.09, wpb=64822, bsz=128, num_updates=13820, lr=9.98974e-05, gnorm=2.001, loss_scale=16, train_wall=11, gb_free=2.8, wall=158844 2021-06-20 14:46:21 | INFO | train_inner | epoch 005: 1894 / 3002 loss=2.525, ppl=5.76, wps=5790.2, ups=0.09, wpb=64738, bsz=128, num_updates=13821, lr=9.98974e-05, gnorm=2.044, loss_scale=16, train_wall=11, gb_free=2.8, wall=158855 2021-06-20 14:46:32 | INFO | train_inner | epoch 005: 1895 / 3002 loss=2.591, ppl=6.03, wps=5938.5, ups=0.09, wpb=64845, bsz=128, num_updates=13822, lr=9.98974e-05, gnorm=2.023, loss_scale=16, train_wall=10, gb_free=2.8, wall=158866 2021-06-20 14:46:43 | INFO | train_inner | epoch 005: 1896 / 3002 loss=2.47, ppl=5.54, wps=5891, ups=0.09, wpb=64819, bsz=128, num_updates=13823, lr=9.98974e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=158877 2021-06-20 14:46:54 | INFO | train_inner | epoch 005: 1897 / 3002 loss=2.37, ppl=5.17, wps=5840.9, ups=0.09, wpb=64777, bsz=128, num_updates=13824, lr=9.98974e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=158888 2021-06-20 14:47:05 | INFO | train_inner | epoch 005: 1898 / 3002 loss=2.594, ppl=6.04, wps=5922.7, ups=0.09, wpb=64847, bsz=128, num_updates=13825, lr=9.98974e-05, gnorm=1.98, loss_scale=16, train_wall=10, gb_free=2.8, wall=158899 2021-06-20 14:47:16 | INFO | train_inner | epoch 005: 1899 / 3002 loss=2.548, ppl=5.85, wps=5829.6, ups=0.09, wpb=64785, bsz=128, num_updates=13826, lr=9.98974e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=158910 2021-06-20 14:47:27 | INFO | train_inner | epoch 005: 1900 / 3002 loss=2.532, ppl=5.79, wps=5833.3, ups=0.09, wpb=64807, bsz=128, num_updates=13827, lr=9.98974e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=158921 2021-06-20 14:47:38 | INFO | train_inner | epoch 005: 1901 / 3002 loss=2.592, ppl=6.03, wps=5795.4, ups=0.09, wpb=64828, bsz=128, num_updates=13828, lr=9.98974e-05, gnorm=1.926, loss_scale=16, train_wall=11, gb_free=2.8, wall=158932 2021-06-20 14:47:49 | INFO | train_inner | epoch 005: 1902 / 3002 loss=2.43, ppl=5.39, wps=5936.7, ups=0.09, wpb=64850, bsz=128, num_updates=13829, lr=9.98974e-05, gnorm=1.974, loss_scale=16, train_wall=10, gb_free=2.8, wall=158943 2021-06-20 14:48:00 | INFO | train_inner | epoch 005: 1903 / 3002 loss=2.535, ppl=5.8, wps=5853.8, ups=0.09, wpb=64786, bsz=128, num_updates=13830, lr=9.98974e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=158954 2021-06-20 14:48:11 | INFO | train_inner | epoch 005: 1904 / 3002 loss=2.55, ppl=5.86, wps=5925.4, ups=0.09, wpb=64881, bsz=128, num_updates=13831, lr=9.98973e-05, gnorm=1.912, loss_scale=16, train_wall=10, gb_free=2.8, wall=158965 2021-06-20 14:48:22 | INFO | train_inner | epoch 005: 1905 / 3002 loss=2.66, ppl=6.32, wps=5725.8, ups=0.09, wpb=64843, bsz=128, num_updates=13832, lr=9.98973e-05, gnorm=1.909, loss_scale=16, train_wall=11, gb_free=2.8, wall=158977 2021-06-20 14:48:34 | INFO | train_inner | epoch 005: 1906 / 3002 loss=2.58, ppl=5.98, wps=5725.6, ups=0.09, wpb=64833, bsz=128, num_updates=13833, lr=9.98973e-05, gnorm=2.042, loss_scale=16, train_wall=11, gb_free=2.8, wall=158988 2021-06-20 14:48:45 | INFO | train_inner | epoch 005: 1907 / 3002 loss=2.395, ppl=5.26, wps=5773.4, ups=0.09, wpb=64815, bsz=128, num_updates=13834, lr=9.98973e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=158999 2021-06-20 14:48:56 | INFO | train_inner | epoch 005: 1908 / 3002 loss=2.577, ppl=5.97, wps=5745.3, ups=0.09, wpb=64746, bsz=128, num_updates=13835, lr=9.98973e-05, gnorm=2.073, loss_scale=16, train_wall=11, gb_free=2.8, wall=159011 2021-06-20 14:49:07 | INFO | train_inner | epoch 005: 1909 / 3002 loss=2.607, ppl=6.09, wps=5893.7, ups=0.09, wpb=64834, bsz=128, num_updates=13836, lr=9.98973e-05, gnorm=2.202, loss_scale=16, train_wall=11, gb_free=2.8, wall=159022 2021-06-20 14:49:18 | INFO | train_inner | epoch 005: 1910 / 3002 loss=2.484, ppl=5.59, wps=5969.5, ups=0.09, wpb=64842, bsz=128, num_updates=13837, lr=9.98973e-05, gnorm=1.955, loss_scale=16, train_wall=10, gb_free=2.8, wall=159032 2021-06-20 14:49:29 | INFO | train_inner | epoch 005: 1911 / 3002 loss=2.621, ppl=6.15, wps=5866.2, ups=0.09, wpb=64898, bsz=128, num_updates=13838, lr=9.98973e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=159043 2021-06-20 14:49:40 | INFO | train_inner | epoch 005: 1912 / 3002 loss=2.441, ppl=5.43, wps=5796.5, ups=0.09, wpb=64749, bsz=128, num_updates=13839, lr=9.98973e-05, gnorm=2.702, loss_scale=16, train_wall=11, gb_free=2.8, wall=159055 2021-06-20 14:49:52 | INFO | train_inner | epoch 005: 1913 / 3002 loss=2.375, ppl=5.19, wps=5742.6, ups=0.09, wpb=64834, bsz=128, num_updates=13840, lr=9.98973e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=159066 2021-06-20 14:50:03 | INFO | train_inner | epoch 005: 1914 / 3002 loss=2.501, ppl=5.66, wps=5896.7, ups=0.09, wpb=64852, bsz=128, num_updates=13841, lr=9.98973e-05, gnorm=1.993, loss_scale=16, train_wall=11, gb_free=2.8, wall=159077 2021-06-20 14:50:14 | INFO | train_inner | epoch 005: 1915 / 3002 loss=2.65, ppl=6.28, wps=5847.6, ups=0.09, wpb=64827, bsz=128, num_updates=13842, lr=9.98973e-05, gnorm=1.951, loss_scale=16, train_wall=11, gb_free=2.8, wall=159088 2021-06-20 14:50:25 | INFO | train_inner | epoch 005: 1916 / 3002 loss=2.459, ppl=5.5, wps=5752.7, ups=0.09, wpb=64815, bsz=128, num_updates=13843, lr=9.98972e-05, gnorm=2.133, loss_scale=16, train_wall=11, gb_free=2.8, wall=159099 2021-06-20 14:50:36 | INFO | train_inner | epoch 005: 1917 / 3002 loss=2.373, ppl=5.18, wps=5769.2, ups=0.09, wpb=64806, bsz=128, num_updates=13844, lr=9.98972e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=159111 2021-06-20 14:50:47 | INFO | train_inner | epoch 005: 1918 / 3002 loss=2.493, ppl=5.63, wps=5750, ups=0.09, wpb=64821, bsz=128, num_updates=13845, lr=9.98972e-05, gnorm=2.065, loss_scale=16, train_wall=11, gb_free=2.8, wall=159122 2021-06-20 14:50:59 | INFO | train_inner | epoch 005: 1919 / 3002 loss=2.498, ppl=5.65, wps=5846.9, ups=0.09, wpb=64926, bsz=128, num_updates=13846, lr=9.98972e-05, gnorm=1.983, loss_scale=16, train_wall=11, gb_free=2.8, wall=159133 2021-06-20 14:51:10 | INFO | train_inner | epoch 005: 1920 / 3002 loss=2.448, ppl=5.45, wps=5763.4, ups=0.09, wpb=64802, bsz=128, num_updates=13847, lr=9.98972e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=159144 2021-06-20 14:51:21 | INFO | train_inner | epoch 005: 1921 / 3002 loss=2.698, ppl=6.49, wps=5876.9, ups=0.09, wpb=64968, bsz=128, num_updates=13848, lr=9.98972e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=159155 2021-06-20 14:51:32 | INFO | train_inner | epoch 005: 1922 / 3002 loss=2.465, ppl=5.52, wps=5959.6, ups=0.09, wpb=64828, bsz=128, num_updates=13849, lr=9.98972e-05, gnorm=1.932, loss_scale=16, train_wall=10, gb_free=2.8, wall=159166 2021-06-20 14:51:43 | INFO | train_inner | epoch 005: 1923 / 3002 loss=2.52, ppl=5.73, wps=5786, ups=0.09, wpb=64830, bsz=128, num_updates=13850, lr=9.98972e-05, gnorm=2.103, loss_scale=16, train_wall=11, gb_free=2.8, wall=159177 2021-06-20 14:51:54 | INFO | train_inner | epoch 005: 1924 / 3002 loss=2.685, ppl=6.43, wps=5764.4, ups=0.09, wpb=64798, bsz=128, num_updates=13851, lr=9.98972e-05, gnorm=1.996, loss_scale=16, train_wall=11, gb_free=2.8, wall=159189 2021-06-20 14:52:05 | INFO | train_inner | epoch 005: 1925 / 3002 loss=2.361, ppl=5.14, wps=5931, ups=0.09, wpb=64855, bsz=128, num_updates=13852, lr=9.98972e-05, gnorm=1.982, loss_scale=16, train_wall=10, gb_free=2.8, wall=159199 2021-06-20 14:52:16 | INFO | train_inner | epoch 005: 1926 / 3002 loss=2.602, ppl=6.07, wps=5996.8, ups=0.09, wpb=64861, bsz=128, num_updates=13853, lr=9.98972e-05, gnorm=2.071, loss_scale=16, train_wall=10, gb_free=2.8, wall=159210 2021-06-20 14:52:27 | INFO | train_inner | epoch 005: 1927 / 3002 loss=2.523, ppl=5.75, wps=5768.3, ups=0.09, wpb=64852, bsz=128, num_updates=13854, lr=9.98972e-05, gnorm=2.056, loss_scale=16, train_wall=11, gb_free=2.8, wall=159222 2021-06-20 14:52:38 | INFO | train_inner | epoch 005: 1928 / 3002 loss=2.603, ppl=6.07, wps=5877.1, ups=0.09, wpb=64803, bsz=128, num_updates=13855, lr=9.98972e-05, gnorm=1.951, loss_scale=16, train_wall=11, gb_free=2.8, wall=159233 2021-06-20 14:52:49 | INFO | train_inner | epoch 005: 1929 / 3002 loss=2.543, ppl=5.83, wps=5861.7, ups=0.09, wpb=64825, bsz=128, num_updates=13856, lr=9.98971e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=159244 2021-06-20 14:53:00 | INFO | train_inner | epoch 005: 1930 / 3002 loss=2.506, ppl=5.68, wps=5801.4, ups=0.09, wpb=64838, bsz=128, num_updates=13857, lr=9.98971e-05, gnorm=1.963, loss_scale=16, train_wall=11, gb_free=2.8, wall=159255 2021-06-20 14:53:12 | INFO | train_inner | epoch 005: 1931 / 3002 loss=2.498, ppl=5.65, wps=5772.1, ups=0.09, wpb=64729, bsz=128, num_updates=13858, lr=9.98971e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=159266 2021-06-20 14:53:22 | INFO | train_inner | epoch 005: 1932 / 3002 loss=2.627, ppl=6.18, wps=6036.8, ups=0.09, wpb=64856, bsz=128, num_updates=13859, lr=9.98971e-05, gnorm=1.972, loss_scale=16, train_wall=10, gb_free=2.8, wall=159277 2021-06-20 14:53:34 | INFO | train_inner | epoch 005: 1933 / 3002 loss=2.607, ppl=6.09, wps=5771.2, ups=0.09, wpb=64802, bsz=128, num_updates=13860, lr=9.98971e-05, gnorm=1.927, loss_scale=16, train_wall=11, gb_free=2.8, wall=159288 2021-06-20 14:53:45 | INFO | train_inner | epoch 005: 1934 / 3002 loss=2.556, ppl=5.88, wps=5680.8, ups=0.09, wpb=64774, bsz=128, num_updates=13861, lr=9.98971e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=159299 2021-06-20 14:53:56 | INFO | train_inner | epoch 005: 1935 / 3002 loss=2.44, ppl=5.43, wps=5789.7, ups=0.09, wpb=64810, bsz=128, num_updates=13862, lr=9.98971e-05, gnorm=2.343, loss_scale=16, train_wall=11, gb_free=2.8, wall=159311 2021-06-20 14:54:07 | INFO | train_inner | epoch 005: 1936 / 3002 loss=2.401, ppl=5.28, wps=5882.8, ups=0.09, wpb=64887, bsz=128, num_updates=13863, lr=9.98971e-05, gnorm=1.945, loss_scale=16, train_wall=11, gb_free=2.8, wall=159322 2021-06-20 14:54:19 | INFO | train_inner | epoch 005: 1937 / 3002 loss=2.538, ppl=5.81, wps=5753.8, ups=0.09, wpb=64784, bsz=128, num_updates=13864, lr=9.98971e-05, gnorm=1.981, loss_scale=16, train_wall=11, gb_free=2.8, wall=159333 2021-06-20 14:54:30 | INFO | train_inner | epoch 005: 1938 / 3002 loss=2.45, ppl=5.47, wps=5794, ups=0.09, wpb=64798, bsz=128, num_updates=13865, lr=9.98971e-05, gnorm=2.044, loss_scale=16, train_wall=11, gb_free=2.8, wall=159344 2021-06-20 14:54:41 | INFO | train_inner | epoch 005: 1939 / 3002 loss=2.487, ppl=5.6, wps=5840.7, ups=0.09, wpb=64731, bsz=128, num_updates=13866, lr=9.98971e-05, gnorm=2.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=159355 2021-06-20 14:54:52 | INFO | train_inner | epoch 005: 1940 / 3002 loss=2.49, ppl=5.62, wps=5923.1, ups=0.09, wpb=64864, bsz=128, num_updates=13867, lr=9.98971e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=159366 2021-06-20 14:55:03 | INFO | train_inner | epoch 005: 1941 / 3002 loss=2.379, ppl=5.2, wps=5768.3, ups=0.09, wpb=64798, bsz=128, num_updates=13868, lr=9.9897e-05, gnorm=1.93, loss_scale=16, train_wall=11, gb_free=2.8, wall=159377 2021-06-20 14:55:14 | INFO | train_inner | epoch 005: 1942 / 3002 loss=2.533, ppl=5.79, wps=5934.3, ups=0.09, wpb=64781, bsz=128, num_updates=13869, lr=9.9897e-05, gnorm=2.013, loss_scale=16, train_wall=10, gb_free=2.8, wall=159388 2021-06-20 14:55:25 | INFO | train_inner | epoch 005: 1943 / 3002 loss=2.608, ppl=6.1, wps=5711.5, ups=0.09, wpb=64758, bsz=128, num_updates=13870, lr=9.9897e-05, gnorm=2, loss_scale=16, train_wall=11, gb_free=2.8, wall=159400 2021-06-20 14:55:36 | INFO | train_inner | epoch 005: 1944 / 3002 loss=2.501, ppl=5.66, wps=5817.2, ups=0.09, wpb=64882, bsz=128, num_updates=13871, lr=9.9897e-05, gnorm=2.069, loss_scale=16, train_wall=11, gb_free=2.8, wall=159411 2021-06-20 14:55:47 | INFO | train_inner | epoch 005: 1945 / 3002 loss=2.463, ppl=5.51, wps=5953.6, ups=0.09, wpb=64757, bsz=128, num_updates=13872, lr=9.9897e-05, gnorm=1.944, loss_scale=16, train_wall=10, gb_free=2.8, wall=159422 2021-06-20 14:55:58 | INFO | train_inner | epoch 005: 1946 / 3002 loss=2.451, ppl=5.47, wps=5793.4, ups=0.09, wpb=64839, bsz=128, num_updates=13873, lr=9.9897e-05, gnorm=1.868, loss_scale=16, train_wall=11, gb_free=2.8, wall=159433 2021-06-20 14:56:10 | INFO | train_inner | epoch 005: 1947 / 3002 loss=2.424, ppl=5.37, wps=5747.9, ups=0.09, wpb=64854, bsz=128, num_updates=13874, lr=9.9897e-05, gnorm=1.992, loss_scale=16, train_wall=11, gb_free=2.8, wall=159444 2021-06-20 14:56:21 | INFO | train_inner | epoch 005: 1948 / 3002 loss=2.522, ppl=5.74, wps=5928.8, ups=0.09, wpb=64816, bsz=128, num_updates=13875, lr=9.9897e-05, gnorm=1.955, loss_scale=16, train_wall=10, gb_free=2.8, wall=159455 2021-06-20 14:56:32 | INFO | train_inner | epoch 005: 1949 / 3002 loss=2.444, ppl=5.44, wps=5859.3, ups=0.09, wpb=64859, bsz=128, num_updates=13876, lr=9.9897e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=159466 2021-06-20 14:56:43 | INFO | train_inner | epoch 005: 1950 / 3002 loss=2.452, ppl=5.47, wps=5790.8, ups=0.09, wpb=64875, bsz=128, num_updates=13877, lr=9.9897e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=159477 2021-06-20 14:56:54 | INFO | train_inner | epoch 005: 1951 / 3002 loss=2.494, ppl=5.63, wps=5888.3, ups=0.09, wpb=64812, bsz=128, num_updates=13878, lr=9.9897e-05, gnorm=1.997, loss_scale=16, train_wall=11, gb_free=2.8, wall=159488 2021-06-20 14:57:05 | INFO | train_inner | epoch 005: 1952 / 3002 loss=2.398, ppl=5.27, wps=5766, ups=0.09, wpb=64881, bsz=128, num_updates=13879, lr=9.9897e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=159500 2021-06-20 14:57:16 | INFO | train_inner | epoch 005: 1953 / 3002 loss=2.419, ppl=5.35, wps=5939.2, ups=0.09, wpb=64780, bsz=128, num_updates=13880, lr=9.9897e-05, gnorm=1.886, loss_scale=16, train_wall=10, gb_free=2.8, wall=159510 2021-06-20 14:57:27 | INFO | train_inner | epoch 005: 1954 / 3002 loss=2.717, ppl=6.58, wps=5886.5, ups=0.09, wpb=64789, bsz=128, num_updates=13881, lr=9.98969e-05, gnorm=2.024, loss_scale=16, train_wall=11, gb_free=2.8, wall=159521 2021-06-20 14:57:38 | INFO | train_inner | epoch 005: 1955 / 3002 loss=2.473, ppl=5.55, wps=5887.9, ups=0.09, wpb=64884, bsz=128, num_updates=13882, lr=9.98969e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=159532 2021-06-20 14:57:49 | INFO | train_inner | epoch 005: 1956 / 3002 loss=2.601, ppl=6.07, wps=5886.1, ups=0.09, wpb=64897, bsz=128, num_updates=13883, lr=9.98969e-05, gnorm=2.085, loss_scale=16, train_wall=11, gb_free=2.8, wall=159544 2021-06-20 14:58:00 | INFO | train_inner | epoch 005: 1957 / 3002 loss=2.612, ppl=6.11, wps=5912.4, ups=0.09, wpb=64759, bsz=128, num_updates=13884, lr=9.98969e-05, gnorm=2.158, loss_scale=16, train_wall=11, gb_free=2.8, wall=159554 2021-06-20 14:58:11 | INFO | train_inner | epoch 005: 1958 / 3002 loss=2.531, ppl=5.78, wps=5873, ups=0.09, wpb=64826, bsz=128, num_updates=13885, lr=9.98969e-05, gnorm=2.022, loss_scale=16, train_wall=11, gb_free=2.8, wall=159566 2021-06-20 14:58:22 | INFO | train_inner | epoch 005: 1959 / 3002 loss=2.638, ppl=6.22, wps=5852.4, ups=0.09, wpb=64882, bsz=128, num_updates=13886, lr=9.98969e-05, gnorm=2.049, loss_scale=16, train_wall=11, gb_free=2.8, wall=159577 2021-06-20 14:58:33 | INFO | train_inner | epoch 005: 1960 / 3002 loss=2.588, ppl=6.01, wps=5842.7, ups=0.09, wpb=64833, bsz=128, num_updates=13887, lr=9.98969e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=159588 2021-06-20 14:58:44 | INFO | train_inner | epoch 005: 1961 / 3002 loss=2.596, ppl=6.05, wps=5821.9, ups=0.09, wpb=64813, bsz=128, num_updates=13888, lr=9.98969e-05, gnorm=1.997, loss_scale=16, train_wall=11, gb_free=2.8, wall=159599 2021-06-20 14:58:55 | INFO | train_inner | epoch 005: 1962 / 3002 loss=2.491, ppl=5.62, wps=5991, ups=0.09, wpb=64854, bsz=128, num_updates=13889, lr=9.98969e-05, gnorm=1.911, loss_scale=16, train_wall=10, gb_free=2.8, wall=159610 2021-06-20 14:59:06 | INFO | train_inner | epoch 005: 1963 / 3002 loss=2.513, ppl=5.71, wps=5842.3, ups=0.09, wpb=64865, bsz=128, num_updates=13890, lr=9.98969e-05, gnorm=1.982, loss_scale=16, train_wall=11, gb_free=2.8, wall=159621 2021-06-20 14:59:18 | INFO | train_inner | epoch 005: 1964 / 3002 loss=2.433, ppl=5.4, wps=5787.4, ups=0.09, wpb=64901, bsz=128, num_updates=13891, lr=9.98969e-05, gnorm=1.862, loss_scale=16, train_wall=11, gb_free=2.8, wall=159632 2021-06-20 14:59:29 | INFO | train_inner | epoch 005: 1965 / 3002 loss=2.394, ppl=5.26, wps=5954.6, ups=0.09, wpb=64879, bsz=128, num_updates=13892, lr=9.98969e-05, gnorm=2.05, loss_scale=16, train_wall=10, gb_free=2.8, wall=159643 2021-06-20 14:59:40 | INFO | train_inner | epoch 005: 1966 / 3002 loss=2.492, ppl=5.63, wps=5806.1, ups=0.09, wpb=64858, bsz=128, num_updates=13893, lr=9.98968e-05, gnorm=2.011, loss_scale=16, train_wall=11, gb_free=2.8, wall=159654 2021-06-20 14:59:51 | INFO | train_inner | epoch 005: 1967 / 3002 loss=2.426, ppl=5.38, wps=5846.2, ups=0.09, wpb=64771, bsz=128, num_updates=13894, lr=9.98968e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=159665 2021-06-20 15:00:02 | INFO | train_inner | epoch 005: 1968 / 3002 loss=2.439, ppl=5.42, wps=5833.9, ups=0.09, wpb=64825, bsz=128, num_updates=13895, lr=9.98968e-05, gnorm=2.006, loss_scale=16, train_wall=11, gb_free=2.8, wall=159676 2021-06-20 15:00:13 | INFO | train_inner | epoch 005: 1969 / 3002 loss=2.47, ppl=5.54, wps=5763.9, ups=0.09, wpb=64788, bsz=128, num_updates=13896, lr=9.98968e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=159687 2021-06-20 15:00:24 | INFO | train_inner | epoch 005: 1970 / 3002 loss=2.548, ppl=5.85, wps=5761.1, ups=0.09, wpb=64868, bsz=128, num_updates=13897, lr=9.98968e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=159699 2021-06-20 15:00:36 | INFO | train_inner | epoch 005: 1971 / 3002 loss=2.556, ppl=5.88, wps=5782.7, ups=0.09, wpb=64813, bsz=128, num_updates=13898, lr=9.98968e-05, gnorm=1.989, loss_scale=16, train_wall=11, gb_free=2.8, wall=159710 2021-06-20 15:00:47 | INFO | train_inner | epoch 005: 1972 / 3002 loss=2.485, ppl=5.6, wps=5769.1, ups=0.09, wpb=64797, bsz=128, num_updates=13899, lr=9.98968e-05, gnorm=1.866, loss_scale=16, train_wall=11, gb_free=2.8, wall=159721 2021-06-20 15:00:58 | INFO | train_inner | epoch 005: 1973 / 3002 loss=2.309, ppl=4.95, wps=5873.4, ups=0.09, wpb=64924, bsz=128, num_updates=13900, lr=9.98968e-05, gnorm=1.896, loss_scale=16, train_wall=11, gb_free=2.8, wall=159732 2021-06-20 15:01:09 | INFO | train_inner | epoch 005: 1974 / 3002 loss=2.512, ppl=5.71, wps=5784.2, ups=0.09, wpb=64847, bsz=128, num_updates=13901, lr=9.98968e-05, gnorm=2.006, loss_scale=16, train_wall=11, gb_free=2.8, wall=159743 2021-06-20 15:01:20 | INFO | train_inner | epoch 005: 1975 / 3002 loss=2.494, ppl=5.63, wps=5790.4, ups=0.09, wpb=64823, bsz=128, num_updates=13902, lr=9.98968e-05, gnorm=2.026, loss_scale=16, train_wall=11, gb_free=2.8, wall=159755 2021-06-20 15:01:32 | INFO | train_inner | epoch 005: 1976 / 3002 loss=2.465, ppl=5.52, wps=5771, ups=0.09, wpb=64886, bsz=128, num_updates=13903, lr=9.98968e-05, gnorm=1.913, loss_scale=16, train_wall=11, gb_free=2.8, wall=159766 2021-06-20 15:01:43 | INFO | train_inner | epoch 005: 1977 / 3002 loss=2.508, ppl=5.69, wps=5766.9, ups=0.09, wpb=64758, bsz=128, num_updates=13904, lr=9.98968e-05, gnorm=1.937, loss_scale=16, train_wall=11, gb_free=2.8, wall=159777 2021-06-20 15:01:54 | INFO | train_inner | epoch 005: 1978 / 3002 loss=2.499, ppl=5.65, wps=5894.9, ups=0.09, wpb=64936, bsz=128, num_updates=13905, lr=9.98968e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=159788 2021-06-20 15:02:05 | INFO | train_inner | epoch 005: 1979 / 3002 loss=2.365, ppl=5.15, wps=5854, ups=0.09, wpb=64860, bsz=128, num_updates=13906, lr=9.98967e-05, gnorm=1.887, loss_scale=16, train_wall=11, gb_free=2.8, wall=159799 2021-06-20 15:02:16 | INFO | train_inner | epoch 005: 1980 / 3002 loss=2.503, ppl=5.67, wps=5732.6, ups=0.09, wpb=64782, bsz=128, num_updates=13907, lr=9.98967e-05, gnorm=2.001, loss_scale=16, train_wall=11, gb_free=2.8, wall=159810 2021-06-20 15:02:27 | INFO | train_inner | epoch 005: 1981 / 3002 loss=2.489, ppl=5.61, wps=5870.2, ups=0.09, wpb=64842, bsz=128, num_updates=13908, lr=9.98967e-05, gnorm=2.038, loss_scale=16, train_wall=11, gb_free=2.8, wall=159822 2021-06-20 15:02:38 | INFO | train_inner | epoch 005: 1982 / 3002 loss=2.493, ppl=5.63, wps=5947.1, ups=0.09, wpb=64834, bsz=128, num_updates=13909, lr=9.98967e-05, gnorm=2.011, loss_scale=16, train_wall=10, gb_free=2.8, wall=159832 2021-06-20 15:02:49 | INFO | train_inner | epoch 005: 1983 / 3002 loss=2.525, ppl=5.76, wps=5884.4, ups=0.09, wpb=64859, bsz=128, num_updates=13910, lr=9.98967e-05, gnorm=2.038, loss_scale=16, train_wall=11, gb_free=2.8, wall=159843 2021-06-20 15:03:00 | INFO | train_inner | epoch 005: 1984 / 3002 loss=2.549, ppl=5.85, wps=5907, ups=0.09, wpb=64825, bsz=128, num_updates=13911, lr=9.98967e-05, gnorm=1.934, loss_scale=16, train_wall=11, gb_free=2.8, wall=159854 2021-06-20 15:03:11 | INFO | train_inner | epoch 005: 1985 / 3002 loss=2.471, ppl=5.54, wps=5791.5, ups=0.09, wpb=64795, bsz=128, num_updates=13912, lr=9.98967e-05, gnorm=2.008, loss_scale=16, train_wall=11, gb_free=2.8, wall=159866 2021-06-20 15:03:22 | INFO | train_inner | epoch 005: 1986 / 3002 loss=2.473, ppl=5.55, wps=5908.5, ups=0.09, wpb=64839, bsz=128, num_updates=13913, lr=9.98967e-05, gnorm=1.901, loss_scale=16, train_wall=11, gb_free=2.8, wall=159877 2021-06-20 15:03:33 | INFO | train_inner | epoch 005: 1987 / 3002 loss=2.566, ppl=5.92, wps=5840.9, ups=0.09, wpb=64784, bsz=128, num_updates=13914, lr=9.98967e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=159888 2021-06-20 15:03:45 | INFO | train_inner | epoch 005: 1988 / 3002 loss=2.563, ppl=5.91, wps=5753.2, ups=0.09, wpb=64792, bsz=128, num_updates=13915, lr=9.98967e-05, gnorm=1.929, loss_scale=16, train_wall=11, gb_free=2.8, wall=159899 2021-06-20 15:03:56 | INFO | train_inner | epoch 005: 1989 / 3002 loss=2.635, ppl=6.21, wps=5869.4, ups=0.09, wpb=64814, bsz=128, num_updates=13916, lr=9.98967e-05, gnorm=2.015, loss_scale=16, train_wall=11, gb_free=2.8, wall=159910 2021-06-20 15:04:07 | INFO | train_inner | epoch 005: 1990 / 3002 loss=2.374, ppl=5.18, wps=5904.4, ups=0.09, wpb=64919, bsz=128, num_updates=13917, lr=9.98967e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=159921 2021-06-20 15:04:18 | INFO | train_inner | epoch 005: 1991 / 3002 loss=2.688, ppl=6.44, wps=5925, ups=0.09, wpb=64818, bsz=128, num_updates=13918, lr=9.98966e-05, gnorm=2.003, loss_scale=16, train_wall=10, gb_free=2.8, wall=159932 2021-06-20 15:04:29 | INFO | train_inner | epoch 005: 1992 / 3002 loss=2.407, ppl=5.3, wps=5837.2, ups=0.09, wpb=64846, bsz=128, num_updates=13919, lr=9.98966e-05, gnorm=1.927, loss_scale=16, train_wall=11, gb_free=2.8, wall=159943 2021-06-20 15:04:40 | INFO | train_inner | epoch 005: 1993 / 3002 loss=2.573, ppl=5.95, wps=5763, ups=0.09, wpb=64861, bsz=128, num_updates=13920, lr=9.98966e-05, gnorm=1.995, loss_scale=16, train_wall=11, gb_free=2.8, wall=159954 2021-06-20 15:04:51 | INFO | train_inner | epoch 005: 1994 / 3002 loss=2.52, ppl=5.74, wps=5879.7, ups=0.09, wpb=64799, bsz=128, num_updates=13921, lr=9.98966e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=159965 2021-06-20 15:05:02 | INFO | train_inner | epoch 005: 1995 / 3002 loss=2.497, ppl=5.64, wps=5916.5, ups=0.09, wpb=64811, bsz=128, num_updates=13922, lr=9.98966e-05, gnorm=2.022, loss_scale=16, train_wall=11, gb_free=2.8, wall=159976 2021-06-20 15:05:13 | INFO | train_inner | epoch 005: 1996 / 3002 loss=2.46, ppl=5.5, wps=5807.4, ups=0.09, wpb=64825, bsz=128, num_updates=13923, lr=9.98966e-05, gnorm=2.044, loss_scale=16, train_wall=11, gb_free=2.8, wall=159987 2021-06-20 15:05:24 | INFO | train_inner | epoch 005: 1997 / 3002 loss=2.602, ppl=6.07, wps=5904.5, ups=0.09, wpb=64828, bsz=128, num_updates=13924, lr=9.98966e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=159998 2021-06-20 15:05:35 | INFO | train_inner | epoch 005: 1998 / 3002 loss=2.41, ppl=5.31, wps=5799.5, ups=0.09, wpb=64848, bsz=128, num_updates=13925, lr=9.98966e-05, gnorm=1.935, loss_scale=16, train_wall=11, gb_free=2.8, wall=160010 2021-06-20 15:05:46 | INFO | train_inner | epoch 005: 1999 / 3002 loss=2.455, ppl=5.48, wps=5819.8, ups=0.09, wpb=64806, bsz=128, num_updates=13926, lr=9.98966e-05, gnorm=1.87, loss_scale=16, train_wall=11, gb_free=2.8, wall=160021 2021-06-20 15:05:58 | INFO | train_inner | epoch 005: 2000 / 3002 loss=2.602, ppl=6.07, wps=5712.5, ups=0.09, wpb=64751, bsz=128, num_updates=13927, lr=9.98966e-05, gnorm=2.074, loss_scale=16, train_wall=11, gb_free=2.8, wall=160032 2021-06-20 15:06:09 | INFO | train_inner | epoch 005: 2001 / 3002 loss=2.35, ppl=5.1, wps=5865.2, ups=0.09, wpb=64861, bsz=128, num_updates=13928, lr=9.98966e-05, gnorm=1.878, loss_scale=16, train_wall=11, gb_free=2.8, wall=160043 2021-06-20 15:06:20 | INFO | train_inner | epoch 005: 2002 / 3002 loss=2.49, ppl=5.62, wps=5886.2, ups=0.09, wpb=64797, bsz=128, num_updates=13929, lr=9.98966e-05, gnorm=1.88, loss_scale=16, train_wall=11, gb_free=2.8, wall=160054 2021-06-20 15:06:31 | INFO | train_inner | epoch 005: 2003 / 3002 loss=2.472, ppl=5.55, wps=5869.2, ups=0.09, wpb=64841, bsz=128, num_updates=13930, lr=9.98966e-05, gnorm=1.906, loss_scale=16, train_wall=11, gb_free=2.8, wall=160065 2021-06-20 15:06:42 | INFO | train_inner | epoch 005: 2004 / 3002 loss=2.563, ppl=5.91, wps=6042.7, ups=0.09, wpb=64874, bsz=128, num_updates=13931, lr=9.98965e-05, gnorm=1.872, loss_scale=16, train_wall=10, gb_free=2.8, wall=160076 2021-06-20 15:06:53 | INFO | train_inner | epoch 005: 2005 / 3002 loss=2.673, ppl=6.38, wps=5855.3, ups=0.09, wpb=64731, bsz=128, num_updates=13932, lr=9.98965e-05, gnorm=2.065, loss_scale=16, train_wall=11, gb_free=2.8, wall=160087 2021-06-20 15:07:04 | INFO | train_inner | epoch 005: 2006 / 3002 loss=2.564, ppl=5.91, wps=5895, ups=0.09, wpb=64859, bsz=128, num_updates=13933, lr=9.98965e-05, gnorm=1.823, loss_scale=16, train_wall=11, gb_free=2.8, wall=160098 2021-06-20 15:07:15 | INFO | train_inner | epoch 005: 2007 / 3002 loss=2.471, ppl=5.54, wps=5892.3, ups=0.09, wpb=64792, bsz=128, num_updates=13934, lr=9.98965e-05, gnorm=1.921, loss_scale=16, train_wall=11, gb_free=2.8, wall=160109 2021-06-20 15:07:26 | INFO | train_inner | epoch 005: 2008 / 3002 loss=2.508, ppl=5.69, wps=5931.1, ups=0.09, wpb=64841, bsz=128, num_updates=13935, lr=9.98965e-05, gnorm=1.928, loss_scale=16, train_wall=10, gb_free=2.8, wall=160120 2021-06-20 15:07:37 | INFO | train_inner | epoch 005: 2009 / 3002 loss=2.679, ppl=6.4, wps=5816.3, ups=0.09, wpb=64795, bsz=128, num_updates=13936, lr=9.98965e-05, gnorm=1.989, loss_scale=16, train_wall=11, gb_free=2.8, wall=160131 2021-06-20 15:07:48 | INFO | train_inner | epoch 005: 2010 / 3002 loss=2.475, ppl=5.56, wps=5848.1, ups=0.09, wpb=64891, bsz=128, num_updates=13937, lr=9.98965e-05, gnorm=1.917, loss_scale=16, train_wall=11, gb_free=2.8, wall=160142 2021-06-20 15:07:59 | INFO | train_inner | epoch 005: 2011 / 3002 loss=2.56, ppl=5.9, wps=5900.4, ups=0.09, wpb=64830, bsz=128, num_updates=13938, lr=9.98965e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=160153 2021-06-20 15:08:10 | INFO | train_inner | epoch 005: 2012 / 3002 loss=2.441, ppl=5.43, wps=5827.5, ups=0.09, wpb=64877, bsz=128, num_updates=13939, lr=9.98965e-05, gnorm=1.946, loss_scale=16, train_wall=11, gb_free=2.8, wall=160164 2021-06-20 15:08:21 | INFO | train_inner | epoch 005: 2013 / 3002 loss=2.644, ppl=6.25, wps=5907.7, ups=0.09, wpb=64803, bsz=128, num_updates=13940, lr=9.98965e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=160175 2021-06-20 15:08:32 | INFO | train_inner | epoch 005: 2014 / 3002 loss=2.526, ppl=5.76, wps=5846.5, ups=0.09, wpb=64878, bsz=128, num_updates=13941, lr=9.98965e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=160186 2021-06-20 15:08:43 | INFO | train_inner | epoch 005: 2015 / 3002 loss=2.417, ppl=5.34, wps=5900, ups=0.09, wpb=64787, bsz=128, num_updates=13942, lr=9.98965e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=160197 2021-06-20 15:08:54 | INFO | train_inner | epoch 005: 2016 / 3002 loss=2.571, ppl=5.94, wps=5859, ups=0.09, wpb=64817, bsz=128, num_updates=13943, lr=9.98964e-05, gnorm=2.144, loss_scale=16, train_wall=11, gb_free=2.8, wall=160208 2021-06-20 15:09:05 | INFO | train_inner | epoch 005: 2017 / 3002 loss=2.629, ppl=6.19, wps=5868.7, ups=0.09, wpb=64856, bsz=128, num_updates=13944, lr=9.98964e-05, gnorm=2.17, loss_scale=16, train_wall=11, gb_free=2.8, wall=160219 2021-06-20 15:09:16 | INFO | train_inner | epoch 005: 2018 / 3002 loss=2.501, ppl=5.66, wps=5807.5, ups=0.09, wpb=64874, bsz=128, num_updates=13945, lr=9.98964e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=160231 2021-06-20 15:09:27 | INFO | train_inner | epoch 005: 2019 / 3002 loss=2.565, ppl=5.92, wps=5799.8, ups=0.09, wpb=64855, bsz=128, num_updates=13946, lr=9.98964e-05, gnorm=1.923, loss_scale=32, train_wall=11, gb_free=2.8, wall=160242 2021-06-20 15:09:38 | INFO | train_inner | epoch 005: 2020 / 3002 loss=2.542, ppl=5.82, wps=5930.6, ups=0.09, wpb=64829, bsz=128, num_updates=13947, lr=9.98964e-05, gnorm=2.036, loss_scale=32, train_wall=10, gb_free=2.8, wall=160253 2021-06-20 15:09:50 | INFO | train_inner | epoch 005: 2021 / 3002 loss=2.595, ppl=6.04, wps=5803, ups=0.09, wpb=64773, bsz=128, num_updates=13948, lr=9.98964e-05, gnorm=1.968, loss_scale=32, train_wall=11, gb_free=2.8, wall=160264 2021-06-20 15:10:00 | INFO | train_inner | epoch 005: 2022 / 3002 loss=2.505, ppl=5.68, wps=5948.9, ups=0.09, wpb=64820, bsz=128, num_updates=13949, lr=9.98964e-05, gnorm=1.926, loss_scale=32, train_wall=10, gb_free=2.8, wall=160275 2021-06-20 15:10:12 | INFO | train_inner | epoch 005: 2023 / 3002 loss=2.574, ppl=5.95, wps=5795.7, ups=0.09, wpb=64836, bsz=128, num_updates=13950, lr=9.98964e-05, gnorm=1.887, loss_scale=32, train_wall=11, gb_free=2.8, wall=160286 2021-06-20 15:10:23 | INFO | train_inner | epoch 005: 2024 / 3002 loss=2.625, ppl=6.17, wps=5810.3, ups=0.09, wpb=64679, bsz=128, num_updates=13951, lr=9.98964e-05, gnorm=1.949, loss_scale=32, train_wall=11, gb_free=2.8, wall=160297 2021-06-20 15:10:34 | INFO | train_inner | epoch 005: 2025 / 3002 loss=2.56, ppl=5.9, wps=5884.3, ups=0.09, wpb=64764, bsz=128, num_updates=13952, lr=9.98964e-05, gnorm=1.899, loss_scale=32, train_wall=11, gb_free=2.8, wall=160308 2021-06-20 15:10:45 | INFO | train_inner | epoch 005: 2026 / 3002 loss=2.533, ppl=5.79, wps=5829.6, ups=0.09, wpb=64818, bsz=128, num_updates=13953, lr=9.98964e-05, gnorm=1.931, loss_scale=32, train_wall=11, gb_free=2.8, wall=160319 2021-06-20 15:10:56 | INFO | train_inner | epoch 005: 2027 / 3002 loss=2.591, ppl=6.02, wps=5781.4, ups=0.09, wpb=64798, bsz=128, num_updates=13954, lr=9.98964e-05, gnorm=2.007, loss_scale=32, train_wall=11, gb_free=2.8, wall=160330 2021-06-20 15:11:07 | INFO | train_inner | epoch 005: 2028 / 3002 loss=2.502, ppl=5.66, wps=5797.1, ups=0.09, wpb=64745, bsz=128, num_updates=13955, lr=9.98964e-05, gnorm=1.913, loss_scale=32, train_wall=11, gb_free=2.8, wall=160342 2021-06-20 15:11:18 | INFO | train_inner | epoch 005: 2029 / 3002 loss=2.714, ppl=6.56, wps=5843.2, ups=0.09, wpb=64823, bsz=128, num_updates=13956, lr=9.98963e-05, gnorm=2.058, loss_scale=32, train_wall=11, gb_free=2.8, wall=160353 2021-06-20 15:11:29 | INFO | train_inner | epoch 005: 2030 / 3002 loss=2.518, ppl=5.73, wps=5868.1, ups=0.09, wpb=64817, bsz=128, num_updates=13957, lr=9.98963e-05, gnorm=1.945, loss_scale=32, train_wall=11, gb_free=2.8, wall=160364 2021-06-20 15:11:41 | INFO | train_inner | epoch 005: 2031 / 3002 loss=2.534, ppl=5.79, wps=5749.3, ups=0.09, wpb=64829, bsz=128, num_updates=13958, lr=9.98963e-05, gnorm=1.876, loss_scale=32, train_wall=11, gb_free=2.8, wall=160375 2021-06-20 15:11:52 | INFO | train_inner | epoch 005: 2032 / 3002 loss=2.52, ppl=5.74, wps=5838.6, ups=0.09, wpb=64798, bsz=128, num_updates=13959, lr=9.98963e-05, gnorm=2.002, loss_scale=32, train_wall=11, gb_free=2.8, wall=160386 2021-06-20 15:12:03 | INFO | train_inner | epoch 005: 2033 / 3002 loss=2.434, ppl=5.4, wps=5899, ups=0.09, wpb=64815, bsz=128, num_updates=13960, lr=9.98963e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=160397 2021-06-20 15:12:14 | INFO | train_inner | epoch 005: 2034 / 3002 loss=2.457, ppl=5.49, wps=5796.5, ups=0.09, wpb=64826, bsz=128, num_updates=13961, lr=9.98963e-05, gnorm=1.982, loss_scale=32, train_wall=11, gb_free=2.8, wall=160408 2021-06-20 15:12:25 | INFO | train_inner | epoch 005: 2035 / 3002 loss=2.392, ppl=5.25, wps=5787.8, ups=0.09, wpb=64805, bsz=128, num_updates=13962, lr=9.98963e-05, gnorm=2.023, loss_scale=32, train_wall=11, gb_free=2.8, wall=160419 2021-06-20 15:12:36 | INFO | train_inner | epoch 005: 2036 / 3002 loss=2.426, ppl=5.37, wps=5736, ups=0.09, wpb=64750, bsz=128, num_updates=13963, lr=9.98963e-05, gnorm=1.909, loss_scale=32, train_wall=11, gb_free=2.8, wall=160431 2021-06-20 15:12:48 | INFO | train_inner | epoch 005: 2037 / 3002 loss=2.477, ppl=5.57, wps=5813.3, ups=0.09, wpb=64888, bsz=128, num_updates=13964, lr=9.98963e-05, gnorm=2.017, loss_scale=32, train_wall=11, gb_free=2.8, wall=160442 2021-06-20 15:12:59 | INFO | train_inner | epoch 005: 2038 / 3002 loss=2.36, ppl=5.13, wps=5940.1, ups=0.09, wpb=64816, bsz=128, num_updates=13965, lr=9.98963e-05, gnorm=1.951, loss_scale=32, train_wall=10, gb_free=2.8, wall=160453 2021-06-20 15:13:10 | INFO | train_inner | epoch 005: 2039 / 3002 loss=2.541, ppl=5.82, wps=5787.9, ups=0.09, wpb=64859, bsz=128, num_updates=13966, lr=9.98963e-05, gnorm=1.915, loss_scale=32, train_wall=11, gb_free=2.8, wall=160464 2021-06-20 15:13:21 | INFO | train_inner | epoch 005: 2040 / 3002 loss=2.415, ppl=5.33, wps=5871.5, ups=0.09, wpb=64904, bsz=128, num_updates=13967, lr=9.98963e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=160475 2021-06-20 15:13:32 | INFO | train_inner | epoch 005: 2041 / 3002 loss=2.381, ppl=5.21, wps=5789.1, ups=0.09, wpb=64762, bsz=128, num_updates=13968, lr=9.98962e-05, gnorm=1.925, loss_scale=32, train_wall=11, gb_free=2.8, wall=160486 2021-06-20 15:13:43 | INFO | train_inner | epoch 005: 2042 / 3002 loss=2.556, ppl=5.88, wps=5790.9, ups=0.09, wpb=64869, bsz=128, num_updates=13969, lr=9.98962e-05, gnorm=1.987, loss_scale=32, train_wall=11, gb_free=2.8, wall=160497 2021-06-20 15:13:54 | INFO | train_inner | epoch 005: 2043 / 3002 loss=2.655, ppl=6.3, wps=5986.1, ups=0.09, wpb=64878, bsz=128, num_updates=13970, lr=9.98962e-05, gnorm=1.992, loss_scale=32, train_wall=10, gb_free=2.8, wall=160508 2021-06-20 15:14:05 | INFO | train_inner | epoch 005: 2044 / 3002 loss=2.393, ppl=5.25, wps=5811.8, ups=0.09, wpb=64923, bsz=128, num_updates=13971, lr=9.98962e-05, gnorm=2.006, loss_scale=32, train_wall=11, gb_free=2.8, wall=160520 2021-06-20 15:14:16 | INFO | train_inner | epoch 005: 2045 / 3002 loss=2.612, ppl=6.11, wps=5908, ups=0.09, wpb=64765, bsz=128, num_updates=13972, lr=9.98962e-05, gnorm=2.057, loss_scale=32, train_wall=11, gb_free=2.8, wall=160530 2021-06-20 15:14:27 | INFO | train_inner | epoch 005: 2046 / 3002 loss=2.588, ppl=6.01, wps=5910.9, ups=0.09, wpb=64844, bsz=128, num_updates=13973, lr=9.98962e-05, gnorm=1.977, loss_scale=32, train_wall=11, gb_free=2.8, wall=160541 2021-06-20 15:14:38 | INFO | train_inner | epoch 005: 2047 / 3002 loss=2.519, ppl=5.73, wps=5813.5, ups=0.09, wpb=64804, bsz=128, num_updates=13974, lr=9.98962e-05, gnorm=1.949, loss_scale=32, train_wall=11, gb_free=2.8, wall=160553 2021-06-20 15:14:49 | INFO | train_inner | epoch 005: 2048 / 3002 loss=2.427, ppl=5.38, wps=5767.6, ups=0.09, wpb=64854, bsz=128, num_updates=13975, lr=9.98962e-05, gnorm=2.071, loss_scale=32, train_wall=11, gb_free=2.8, wall=160564 2021-06-20 15:15:01 | INFO | train_inner | epoch 005: 2049 / 3002 loss=2.512, ppl=5.7, wps=5806.4, ups=0.09, wpb=64849, bsz=128, num_updates=13976, lr=9.98962e-05, gnorm=2.032, loss_scale=32, train_wall=11, gb_free=2.8, wall=160575 2021-06-20 15:15:12 | INFO | train_inner | epoch 005: 2050 / 3002 loss=2.498, ppl=5.65, wps=5801, ups=0.09, wpb=64735, bsz=128, num_updates=13977, lr=9.98962e-05, gnorm=1.926, loss_scale=32, train_wall=11, gb_free=2.8, wall=160586 2021-06-20 15:15:23 | INFO | train_inner | epoch 005: 2051 / 3002 loss=2.465, ppl=5.52, wps=5873.3, ups=0.09, wpb=64844, bsz=128, num_updates=13978, lr=9.98962e-05, gnorm=1.905, loss_scale=32, train_wall=11, gb_free=2.8, wall=160597 2021-06-20 15:15:34 | INFO | train_inner | epoch 005: 2052 / 3002 loss=2.311, ppl=4.96, wps=5757.5, ups=0.09, wpb=64831, bsz=128, num_updates=13979, lr=9.98962e-05, gnorm=2.058, loss_scale=32, train_wall=11, gb_free=2.8, wall=160608 2021-06-20 15:15:45 | INFO | train_inner | epoch 005: 2053 / 3002 loss=2.441, ppl=5.43, wps=5886.7, ups=0.09, wpb=64808, bsz=128, num_updates=13980, lr=9.98962e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=160619 2021-06-20 15:15:56 | INFO | train_inner | epoch 005: 2054 / 3002 loss=2.469, ppl=5.54, wps=5766, ups=0.09, wpb=64761, bsz=128, num_updates=13981, lr=9.98961e-05, gnorm=2.003, loss_scale=32, train_wall=11, gb_free=2.8, wall=160631 2021-06-20 15:16:08 | INFO | train_inner | epoch 005: 2055 / 3002 loss=2.586, ppl=6, wps=5759.1, ups=0.09, wpb=64886, bsz=128, num_updates=13982, lr=9.98961e-05, gnorm=2.059, loss_scale=32, train_wall=11, gb_free=2.8, wall=160642 2021-06-20 15:16:19 | INFO | train_inner | epoch 005: 2056 / 3002 loss=2.522, ppl=5.75, wps=5911.8, ups=0.09, wpb=64769, bsz=128, num_updates=13983, lr=9.98961e-05, gnorm=1.936, loss_scale=32, train_wall=10, gb_free=2.8, wall=160653 2021-06-20 15:16:30 | INFO | train_inner | epoch 005: 2057 / 3002 loss=2.599, ppl=6.06, wps=5810.9, ups=0.09, wpb=64819, bsz=128, num_updates=13984, lr=9.98961e-05, gnorm=2.114, loss_scale=32, train_wall=11, gb_free=2.8, wall=160664 2021-06-20 15:16:41 | INFO | train_inner | epoch 005: 2058 / 3002 loss=2.485, ppl=5.6, wps=5754.3, ups=0.09, wpb=64798, bsz=128, num_updates=13985, lr=9.98961e-05, gnorm=1.935, loss_scale=32, train_wall=11, gb_free=2.8, wall=160675 2021-06-20 15:16:52 | INFO | train_inner | epoch 005: 2059 / 3002 loss=2.566, ppl=5.92, wps=5773.9, ups=0.09, wpb=64797, bsz=128, num_updates=13986, lr=9.98961e-05, gnorm=1.914, loss_scale=32, train_wall=11, gb_free=2.8, wall=160687 2021-06-20 15:17:03 | INFO | train_inner | epoch 005: 2060 / 3002 loss=2.586, ppl=6.01, wps=5941.9, ups=0.09, wpb=64824, bsz=128, num_updates=13987, lr=9.98961e-05, gnorm=2.053, loss_scale=32, train_wall=10, gb_free=2.8, wall=160697 2021-06-20 15:17:14 | INFO | train_inner | epoch 005: 2061 / 3002 loss=2.584, ppl=6, wps=5943, ups=0.09, wpb=64790, bsz=128, num_updates=13988, lr=9.98961e-05, gnorm=1.977, loss_scale=32, train_wall=10, gb_free=2.8, wall=160708 2021-06-20 15:17:25 | INFO | train_inner | epoch 005: 2062 / 3002 loss=2.407, ppl=5.3, wps=5840.9, ups=0.09, wpb=64817, bsz=128, num_updates=13989, lr=9.98961e-05, gnorm=1.983, loss_scale=32, train_wall=11, gb_free=2.8, wall=160719 2021-06-20 15:17:36 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 2021-06-20 15:17:47 | INFO | train_inner | epoch 005: 2064 / 3002 loss=2.509, ppl=5.69, wps=2968.3, ups=0.05, wpb=64873, bsz=128, num_updates=13990, lr=9.98961e-05, gnorm=2.002, loss_scale=16, train_wall=21, gb_free=2.8, wall=160741 2021-06-20 15:17:58 | INFO | train_inner | epoch 005: 2065 / 3002 loss=2.642, ppl=6.24, wps=5791.1, ups=0.09, wpb=64831, bsz=128, num_updates=13991, lr=9.98961e-05, gnorm=1.991, loss_scale=16, train_wall=11, gb_free=2.8, wall=160753 2021-06-20 15:18:09 | INFO | train_inner | epoch 005: 2066 / 3002 loss=2.496, ppl=5.64, wps=5860, ups=0.09, wpb=64878, bsz=128, num_updates=13992, lr=9.98961e-05, gnorm=2.072, loss_scale=16, train_wall=11, gb_free=2.8, wall=160764 2021-06-20 15:18:21 | INFO | train_inner | epoch 005: 2067 / 3002 loss=2.547, ppl=5.84, wps=5722.2, ups=0.09, wpb=64860, bsz=128, num_updates=13993, lr=9.9896e-05, gnorm=1.975, loss_scale=16, train_wall=11, gb_free=2.8, wall=160775 2021-06-20 15:18:32 | INFO | train_inner | epoch 005: 2068 / 3002 loss=2.439, ppl=5.42, wps=5816.7, ups=0.09, wpb=64821, bsz=128, num_updates=13994, lr=9.9896e-05, gnorm=1.945, loss_scale=16, train_wall=11, gb_free=2.8, wall=160786 2021-06-20 15:18:43 | INFO | train_inner | epoch 005: 2069 / 3002 loss=2.467, ppl=5.53, wps=5879.5, ups=0.09, wpb=64820, bsz=128, num_updates=13995, lr=9.9896e-05, gnorm=2.004, loss_scale=16, train_wall=11, gb_free=2.8, wall=160797 2021-06-20 15:18:54 | INFO | train_inner | epoch 005: 2070 / 3002 loss=2.391, ppl=5.24, wps=5804.1, ups=0.09, wpb=64833, bsz=128, num_updates=13996, lr=9.9896e-05, gnorm=1.884, loss_scale=16, train_wall=11, gb_free=2.8, wall=160808 2021-06-20 15:19:05 | INFO | train_inner | epoch 005: 2071 / 3002 loss=2.733, ppl=6.65, wps=5792.6, ups=0.09, wpb=64802, bsz=128, num_updates=13997, lr=9.9896e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=160819 2021-06-20 15:19:16 | INFO | train_inner | epoch 005: 2072 / 3002 loss=2.471, ppl=5.54, wps=5870.3, ups=0.09, wpb=64830, bsz=128, num_updates=13998, lr=9.9896e-05, gnorm=2.012, loss_scale=16, train_wall=11, gb_free=2.8, wall=160830 2021-06-20 15:19:27 | INFO | train_inner | epoch 005: 2073 / 3002 loss=2.405, ppl=5.3, wps=5793.5, ups=0.09, wpb=64877, bsz=128, num_updates=13999, lr=9.9896e-05, gnorm=1.927, loss_scale=16, train_wall=11, gb_free=2.8, wall=160842 2021-06-20 15:19:38 | INFO | train_inner | epoch 005: 2074 / 3002 loss=2.557, ppl=5.89, wps=5969, ups=0.09, wpb=64890, bsz=128, num_updates=14000, lr=9.9896e-05, gnorm=1.909, loss_scale=16, train_wall=10, gb_free=2.8, wall=160853 2021-06-20 15:19:49 | INFO | train_inner | epoch 005: 2075 / 3002 loss=2.401, ppl=5.28, wps=5847.9, ups=0.09, wpb=64902, bsz=128, num_updates=14001, lr=9.9896e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=160864 2021-06-20 15:20:01 | INFO | train_inner | epoch 005: 2076 / 3002 loss=2.58, ppl=5.98, wps=5777.8, ups=0.09, wpb=64811, bsz=128, num_updates=14002, lr=9.9896e-05, gnorm=1.987, loss_scale=16, train_wall=11, gb_free=2.8, wall=160875 2021-06-20 15:20:12 | INFO | train_inner | epoch 005: 2077 / 3002 loss=2.567, ppl=5.92, wps=5897.3, ups=0.09, wpb=64778, bsz=128, num_updates=14003, lr=9.9896e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=160886 2021-06-20 15:20:23 | INFO | train_inner | epoch 005: 2078 / 3002 loss=2.508, ppl=5.69, wps=5778.5, ups=0.09, wpb=64884, bsz=128, num_updates=14004, lr=9.9896e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=160897 2021-06-20 15:20:34 | INFO | train_inner | epoch 005: 2079 / 3002 loss=2.334, ppl=5.04, wps=5798.2, ups=0.09, wpb=64879, bsz=128, num_updates=14005, lr=9.9896e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=160908 2021-06-20 15:20:45 | INFO | train_inner | epoch 005: 2080 / 3002 loss=2.481, ppl=5.58, wps=5800, ups=0.09, wpb=64864, bsz=128, num_updates=14006, lr=9.98959e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=160919 2021-06-20 15:20:56 | INFO | train_inner | epoch 005: 2081 / 3002 loss=2.485, ppl=5.6, wps=5730.3, ups=0.09, wpb=64822, bsz=128, num_updates=14007, lr=9.98959e-05, gnorm=1.908, loss_scale=16, train_wall=11, gb_free=2.8, wall=160931 2021-06-20 15:21:07 | INFO | train_inner | epoch 005: 2082 / 3002 loss=2.573, ppl=5.95, wps=5903.7, ups=0.09, wpb=64812, bsz=128, num_updates=14008, lr=9.98959e-05, gnorm=2.103, loss_scale=16, train_wall=11, gb_free=2.8, wall=160942 2021-06-20 15:21:18 | INFO | train_inner | epoch 005: 2083 / 3002 loss=2.383, ppl=5.21, wps=5927, ups=0.09, wpb=64941, bsz=128, num_updates=14009, lr=9.98959e-05, gnorm=1.883, loss_scale=16, train_wall=11, gb_free=2.8, wall=160953 2021-06-20 15:21:30 | INFO | train_inner | epoch 005: 2084 / 3002 loss=2.342, ppl=5.07, wps=5707.8, ups=0.09, wpb=64889, bsz=128, num_updates=14010, lr=9.98959e-05, gnorm=1.865, loss_scale=16, train_wall=11, gb_free=2.8, wall=160964 2021-06-20 15:21:41 | INFO | train_inner | epoch 005: 2085 / 3002 loss=2.398, ppl=5.27, wps=5988.4, ups=0.09, wpb=64896, bsz=128, num_updates=14011, lr=9.98959e-05, gnorm=1.85, loss_scale=16, train_wall=10, gb_free=2.8, wall=160975 2021-06-20 15:21:52 | INFO | train_inner | epoch 005: 2086 / 3002 loss=2.494, ppl=5.63, wps=5866.7, ups=0.09, wpb=64799, bsz=128, num_updates=14012, lr=9.98959e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=160986 2021-06-20 15:22:03 | INFO | train_inner | epoch 005: 2087 / 3002 loss=2.492, ppl=5.63, wps=5724.8, ups=0.09, wpb=64941, bsz=128, num_updates=14013, lr=9.98959e-05, gnorm=1.988, loss_scale=16, train_wall=11, gb_free=2.8, wall=160997 2021-06-20 15:22:14 | INFO | train_inner | epoch 005: 2088 / 3002 loss=2.401, ppl=5.28, wps=5892.5, ups=0.09, wpb=64881, bsz=128, num_updates=14014, lr=9.98959e-05, gnorm=2.031, loss_scale=16, train_wall=11, gb_free=2.8, wall=161008 2021-06-20 15:22:25 | INFO | train_inner | epoch 005: 2089 / 3002 loss=2.469, ppl=5.54, wps=5887.3, ups=0.09, wpb=64786, bsz=128, num_updates=14015, lr=9.98959e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=161019 2021-06-20 15:22:36 | INFO | train_inner | epoch 005: 2090 / 3002 loss=2.53, ppl=5.78, wps=5804.7, ups=0.09, wpb=64849, bsz=128, num_updates=14016, lr=9.98959e-05, gnorm=2.055, loss_scale=16, train_wall=11, gb_free=2.8, wall=161031 2021-06-20 15:22:47 | INFO | train_inner | epoch 005: 2091 / 3002 loss=2.487, ppl=5.61, wps=5833.8, ups=0.09, wpb=64907, bsz=128, num_updates=14017, lr=9.98959e-05, gnorm=1.891, loss_scale=16, train_wall=11, gb_free=2.8, wall=161042 2021-06-20 15:22:58 | INFO | train_inner | epoch 005: 2092 / 3002 loss=2.504, ppl=5.67, wps=5928.3, ups=0.09, wpb=64896, bsz=128, num_updates=14018, lr=9.98958e-05, gnorm=1.85, loss_scale=16, train_wall=10, gb_free=2.8, wall=161053 2021-06-20 15:23:10 | INFO | train_inner | epoch 005: 2093 / 3002 loss=2.538, ppl=5.81, wps=5754.3, ups=0.09, wpb=64830, bsz=128, num_updates=14019, lr=9.98958e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=161064 2021-06-20 15:23:20 | INFO | train_inner | epoch 005: 2094 / 3002 loss=2.469, ppl=5.54, wps=5906.3, ups=0.09, wpb=64765, bsz=128, num_updates=14020, lr=9.98958e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=161075 2021-06-20 15:23:31 | INFO | train_inner | epoch 005: 2095 / 3002 loss=2.434, ppl=5.4, wps=5905.5, ups=0.09, wpb=64885, bsz=128, num_updates=14021, lr=9.98958e-05, gnorm=2.036, loss_scale=16, train_wall=11, gb_free=2.8, wall=161086 2021-06-20 15:23:43 | INFO | train_inner | epoch 005: 2096 / 3002 loss=2.479, ppl=5.57, wps=5843.1, ups=0.09, wpb=64786, bsz=128, num_updates=14022, lr=9.98958e-05, gnorm=2.049, loss_scale=16, train_wall=11, gb_free=2.8, wall=161097 2021-06-20 15:23:53 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 15:24:05 | INFO | train_inner | epoch 005: 2098 / 3002 loss=2.463, ppl=5.52, wps=2940.7, ups=0.05, wpb=64880, bsz=128, num_updates=14023, lr=9.98958e-05, gnorm=2.104, loss_scale=8, train_wall=21, gb_free=2.8, wall=161119 2021-06-20 15:24:16 | INFO | train_inner | epoch 005: 2099 / 3002 loss=2.513, ppl=5.71, wps=5794.1, ups=0.09, wpb=64744, bsz=128, num_updates=14024, lr=9.98958e-05, gnorm=1.907, loss_scale=8, train_wall=11, gb_free=2.8, wall=161130 2021-06-20 15:24:27 | INFO | train_inner | epoch 005: 2100 / 3002 loss=2.47, ppl=5.54, wps=5978.9, ups=0.09, wpb=64805, bsz=128, num_updates=14025, lr=9.98958e-05, gnorm=1.875, loss_scale=8, train_wall=10, gb_free=2.8, wall=161141 2021-06-20 15:24:37 | INFO | train_inner | epoch 005: 2101 / 3002 loss=2.55, ppl=5.86, wps=5993.5, ups=0.09, wpb=64883, bsz=128, num_updates=14026, lr=9.98958e-05, gnorm=2.035, loss_scale=8, train_wall=10, gb_free=2.8, wall=161152 2021-06-20 15:24:49 | INFO | train_inner | epoch 005: 2102 / 3002 loss=2.526, ppl=5.76, wps=5847.5, ups=0.09, wpb=64814, bsz=128, num_updates=14027, lr=9.98958e-05, gnorm=2.091, loss_scale=8, train_wall=11, gb_free=2.8, wall=161163 2021-06-20 15:25:00 | INFO | train_inner | epoch 005: 2103 / 3002 loss=2.433, ppl=5.4, wps=5861.7, ups=0.09, wpb=64880, bsz=128, num_updates=14028, lr=9.98958e-05, gnorm=1.856, loss_scale=8, train_wall=11, gb_free=2.8, wall=161174 2021-06-20 15:25:11 | INFO | train_inner | epoch 005: 2104 / 3002 loss=2.467, ppl=5.53, wps=5816.1, ups=0.09, wpb=64840, bsz=128, num_updates=14029, lr=9.98958e-05, gnorm=1.881, loss_scale=8, train_wall=11, gb_free=2.8, wall=161185 2021-06-20 15:25:22 | INFO | train_inner | epoch 005: 2105 / 3002 loss=2.328, ppl=5.02, wps=5858.7, ups=0.09, wpb=64805, bsz=128, num_updates=14030, lr=9.98958e-05, gnorm=1.853, loss_scale=8, train_wall=11, gb_free=2.8, wall=161196 2021-06-20 15:25:33 | INFO | train_inner | epoch 005: 2106 / 3002 loss=2.476, ppl=5.56, wps=5807, ups=0.09, wpb=64877, bsz=128, num_updates=14031, lr=9.98957e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=161207 2021-06-20 15:25:44 | INFO | train_inner | epoch 005: 2107 / 3002 loss=2.487, ppl=5.61, wps=5807.6, ups=0.09, wpb=64857, bsz=128, num_updates=14032, lr=9.98957e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=161218 2021-06-20 15:25:55 | INFO | train_inner | epoch 005: 2108 / 3002 loss=2.48, ppl=5.58, wps=6029.8, ups=0.09, wpb=64893, bsz=128, num_updates=14033, lr=9.98957e-05, gnorm=1.974, loss_scale=8, train_wall=10, gb_free=2.8, wall=161229 2021-06-20 15:26:06 | INFO | train_inner | epoch 005: 2109 / 3002 loss=2.492, ppl=5.62, wps=5882.5, ups=0.09, wpb=64792, bsz=128, num_updates=14034, lr=9.98957e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=161240 2021-06-20 15:26:17 | INFO | train_inner | epoch 005: 2110 / 3002 loss=2.462, ppl=5.51, wps=5902.7, ups=0.09, wpb=64870, bsz=128, num_updates=14035, lr=9.98957e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=161251 2021-06-20 15:26:28 | INFO | train_inner | epoch 005: 2111 / 3002 loss=2.69, ppl=6.45, wps=5842, ups=0.09, wpb=64823, bsz=128, num_updates=14036, lr=9.98957e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=161262 2021-06-20 15:26:39 | INFO | train_inner | epoch 005: 2112 / 3002 loss=2.534, ppl=5.79, wps=5860.9, ups=0.09, wpb=64846, bsz=128, num_updates=14037, lr=9.98957e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=161273 2021-06-20 15:26:50 | INFO | train_inner | epoch 005: 2113 / 3002 loss=2.517, ppl=5.72, wps=5757, ups=0.09, wpb=64879, bsz=128, num_updates=14038, lr=9.98957e-05, gnorm=1.892, loss_scale=8, train_wall=11, gb_free=2.8, wall=161285 2021-06-20 15:27:01 | INFO | train_inner | epoch 005: 2114 / 3002 loss=2.606, ppl=6.09, wps=5869.7, ups=0.09, wpb=64841, bsz=128, num_updates=14039, lr=9.98957e-05, gnorm=2.303, loss_scale=8, train_wall=11, gb_free=2.8, wall=161296 2021-06-20 15:27:12 | INFO | train_inner | epoch 005: 2115 / 3002 loss=2.473, ppl=5.55, wps=5982.9, ups=0.09, wpb=64881, bsz=128, num_updates=14040, lr=9.98957e-05, gnorm=1.984, loss_scale=8, train_wall=10, gb_free=2.8, wall=161307 2021-06-20 15:27:23 | INFO | train_inner | epoch 005: 2116 / 3002 loss=2.624, ppl=6.17, wps=5822.3, ups=0.09, wpb=64779, bsz=128, num_updates=14041, lr=9.98957e-05, gnorm=2.136, loss_scale=8, train_wall=11, gb_free=2.8, wall=161318 2021-06-20 15:27:35 | INFO | train_inner | epoch 005: 2117 / 3002 loss=2.418, ppl=5.34, wps=5757.4, ups=0.09, wpb=64829, bsz=128, num_updates=14042, lr=9.98957e-05, gnorm=1.877, loss_scale=8, train_wall=11, gb_free=2.8, wall=161329 2021-06-20 15:27:46 | INFO | train_inner | epoch 005: 2118 / 3002 loss=2.468, ppl=5.53, wps=5788.4, ups=0.09, wpb=64887, bsz=128, num_updates=14043, lr=9.98956e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=161340 2021-06-20 15:27:57 | INFO | train_inner | epoch 005: 2119 / 3002 loss=2.663, ppl=6.33, wps=5892.8, ups=0.09, wpb=64797, bsz=128, num_updates=14044, lr=9.98956e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=161351 2021-06-20 15:28:08 | INFO | train_inner | epoch 005: 2120 / 3002 loss=2.556, ppl=5.88, wps=5907.3, ups=0.09, wpb=64829, bsz=128, num_updates=14045, lr=9.98956e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=161362 2021-06-20 15:28:19 | INFO | train_inner | epoch 005: 2121 / 3002 loss=2.603, ppl=6.08, wps=5856, ups=0.09, wpb=64851, bsz=128, num_updates=14046, lr=9.98956e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=161373 2021-06-20 15:28:30 | INFO | train_inner | epoch 005: 2122 / 3002 loss=2.515, ppl=5.72, wps=5913, ups=0.09, wpb=64848, bsz=128, num_updates=14047, lr=9.98956e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=161384 2021-06-20 15:28:41 | INFO | train_inner | epoch 005: 2123 / 3002 loss=2.426, ppl=5.37, wps=5866.9, ups=0.09, wpb=64784, bsz=128, num_updates=14048, lr=9.98956e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=161395 2021-06-20 15:28:52 | INFO | train_inner | epoch 005: 2124 / 3002 loss=2.394, ppl=5.26, wps=5818.7, ups=0.09, wpb=64879, bsz=128, num_updates=14049, lr=9.98956e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=161406 2021-06-20 15:29:03 | INFO | train_inner | epoch 005: 2125 / 3002 loss=2.618, ppl=6.14, wps=5792.5, ups=0.09, wpb=64815, bsz=128, num_updates=14050, lr=9.98956e-05, gnorm=2.062, loss_scale=8, train_wall=11, gb_free=2.8, wall=161418 2021-06-20 15:29:14 | INFO | train_inner | epoch 005: 2126 / 3002 loss=2.482, ppl=5.59, wps=5891.8, ups=0.09, wpb=64820, bsz=128, num_updates=14051, lr=9.98956e-05, gnorm=2.295, loss_scale=8, train_wall=11, gb_free=2.8, wall=161429 2021-06-20 15:29:25 | INFO | train_inner | epoch 005: 2127 / 3002 loss=2.431, ppl=5.39, wps=5889.6, ups=0.09, wpb=64805, bsz=128, num_updates=14052, lr=9.98956e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=161440 2021-06-20 15:29:36 | INFO | train_inner | epoch 005: 2128 / 3002 loss=2.454, ppl=5.48, wps=5782.1, ups=0.09, wpb=64822, bsz=128, num_updates=14053, lr=9.98956e-05, gnorm=1.87, loss_scale=8, train_wall=11, gb_free=2.8, wall=161451 2021-06-20 15:29:47 | INFO | train_inner | epoch 005: 2129 / 3002 loss=2.473, ppl=5.55, wps=5942.2, ups=0.09, wpb=64860, bsz=128, num_updates=14054, lr=9.98956e-05, gnorm=1.941, loss_scale=8, train_wall=10, gb_free=2.8, wall=161462 2021-06-20 15:29:59 | INFO | train_inner | epoch 005: 2130 / 3002 loss=2.318, ppl=4.98, wps=5804.6, ups=0.09, wpb=64863, bsz=128, num_updates=14055, lr=9.98956e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=161473 2021-06-20 15:30:10 | INFO | train_inner | epoch 005: 2131 / 3002 loss=2.529, ppl=5.77, wps=5870.7, ups=0.09, wpb=64793, bsz=128, num_updates=14056, lr=9.98955e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=161484 2021-06-20 15:30:21 | INFO | train_inner | epoch 005: 2132 / 3002 loss=2.607, ppl=6.09, wps=5834.4, ups=0.09, wpb=64830, bsz=128, num_updates=14057, lr=9.98955e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=161495 2021-06-20 15:30:32 | INFO | train_inner | epoch 005: 2133 / 3002 loss=2.462, ppl=5.51, wps=5888.4, ups=0.09, wpb=64842, bsz=128, num_updates=14058, lr=9.98955e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=161506 2021-06-20 15:30:43 | INFO | train_inner | epoch 005: 2134 / 3002 loss=2.461, ppl=5.51, wps=5847.4, ups=0.09, wpb=64848, bsz=128, num_updates=14059, lr=9.98955e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=161517 2021-06-20 15:30:54 | INFO | train_inner | epoch 005: 2135 / 3002 loss=2.457, ppl=5.49, wps=5824.8, ups=0.09, wpb=64818, bsz=128, num_updates=14060, lr=9.98955e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=161528 2021-06-20 15:31:05 | INFO | train_inner | epoch 005: 2136 / 3002 loss=2.493, ppl=5.63, wps=5734.4, ups=0.09, wpb=64795, bsz=128, num_updates=14061, lr=9.98955e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=161540 2021-06-20 15:31:16 | INFO | train_inner | epoch 005: 2137 / 3002 loss=2.447, ppl=5.45, wps=5952, ups=0.09, wpb=64852, bsz=128, num_updates=14062, lr=9.98955e-05, gnorm=1.926, loss_scale=8, train_wall=10, gb_free=2.8, wall=161550 2021-06-20 15:31:27 | INFO | train_inner | epoch 005: 2138 / 3002 loss=2.44, ppl=5.43, wps=5792.3, ups=0.09, wpb=64860, bsz=128, num_updates=14063, lr=9.98955e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=161562 2021-06-20 15:31:38 | INFO | train_inner | epoch 005: 2139 / 3002 loss=2.477, ppl=5.57, wps=5816, ups=0.09, wpb=64846, bsz=128, num_updates=14064, lr=9.98955e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=161573 2021-06-20 15:31:50 | INFO | train_inner | epoch 005: 2140 / 3002 loss=2.503, ppl=5.67, wps=5840.8, ups=0.09, wpb=64847, bsz=128, num_updates=14065, lr=9.98955e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=161584 2021-06-20 15:32:01 | INFO | train_inner | epoch 005: 2141 / 3002 loss=2.432, ppl=5.4, wps=5882.9, ups=0.09, wpb=64852, bsz=128, num_updates=14066, lr=9.98955e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=161595 2021-06-20 15:32:12 | INFO | train_inner | epoch 005: 2142 / 3002 loss=2.539, ppl=5.81, wps=5790, ups=0.09, wpb=64789, bsz=128, num_updates=14067, lr=9.98955e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=161606 2021-06-20 15:32:23 | INFO | train_inner | epoch 005: 2143 / 3002 loss=2.596, ppl=6.05, wps=5775, ups=0.09, wpb=64858, bsz=128, num_updates=14068, lr=9.98954e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=161617 2021-06-20 15:32:34 | INFO | train_inner | epoch 005: 2144 / 3002 loss=2.546, ppl=5.84, wps=5906.8, ups=0.09, wpb=64748, bsz=128, num_updates=14069, lr=9.98954e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=161628 2021-06-20 15:32:45 | INFO | train_inner | epoch 005: 2145 / 3002 loss=2.599, ppl=6.06, wps=5873.1, ups=0.09, wpb=64855, bsz=128, num_updates=14070, lr=9.98954e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=161639 2021-06-20 15:32:56 | INFO | train_inner | epoch 005: 2146 / 3002 loss=2.45, ppl=5.46, wps=5879.2, ups=0.09, wpb=64755, bsz=128, num_updates=14071, lr=9.98954e-05, gnorm=2.056, loss_scale=8, train_wall=11, gb_free=2.8, wall=161650 2021-06-20 15:33:07 | INFO | train_inner | epoch 005: 2147 / 3002 loss=2.5, ppl=5.66, wps=5905.6, ups=0.09, wpb=64853, bsz=128, num_updates=14072, lr=9.98954e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=161661 2021-06-20 15:33:18 | INFO | train_inner | epoch 005: 2148 / 3002 loss=2.56, ppl=5.9, wps=5915, ups=0.09, wpb=64805, bsz=128, num_updates=14073, lr=9.98954e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=161672 2021-06-20 15:33:29 | INFO | train_inner | epoch 005: 2149 / 3002 loss=2.522, ppl=5.74, wps=5711.3, ups=0.09, wpb=64840, bsz=128, num_updates=14074, lr=9.98954e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=161684 2021-06-20 15:33:40 | INFO | train_inner | epoch 005: 2150 / 3002 loss=2.556, ppl=5.88, wps=5802.7, ups=0.09, wpb=64790, bsz=128, num_updates=14075, lr=9.98954e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=161695 2021-06-20 15:33:51 | INFO | train_inner | epoch 005: 2151 / 3002 loss=2.489, ppl=5.61, wps=5921.7, ups=0.09, wpb=64813, bsz=128, num_updates=14076, lr=9.98954e-05, gnorm=1.997, loss_scale=8, train_wall=10, gb_free=2.8, wall=161706 2021-06-20 15:34:03 | INFO | train_inner | epoch 005: 2152 / 3002 loss=2.483, ppl=5.59, wps=5790.5, ups=0.09, wpb=64854, bsz=128, num_updates=14077, lr=9.98954e-05, gnorm=2.106, loss_scale=8, train_wall=11, gb_free=2.8, wall=161717 2021-06-20 15:34:14 | INFO | train_inner | epoch 005: 2153 / 3002 loss=2.421, ppl=5.36, wps=5754.3, ups=0.09, wpb=64940, bsz=128, num_updates=14078, lr=9.98954e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=161728 2021-06-20 15:34:25 | INFO | train_inner | epoch 005: 2154 / 3002 loss=2.527, ppl=5.76, wps=5894, ups=0.09, wpb=64873, bsz=128, num_updates=14079, lr=9.98954e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=161739 2021-06-20 15:34:36 | INFO | train_inner | epoch 005: 2155 / 3002 loss=2.53, ppl=5.77, wps=5771.9, ups=0.09, wpb=64857, bsz=128, num_updates=14080, lr=9.98954e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=161751 2021-06-20 15:34:47 | INFO | train_inner | epoch 005: 2156 / 3002 loss=2.735, ppl=6.66, wps=5861.4, ups=0.09, wpb=64711, bsz=128, num_updates=14081, lr=9.98953e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=161762 2021-06-20 15:34:58 | INFO | train_inner | epoch 005: 2157 / 3002 loss=2.567, ppl=5.92, wps=5875.1, ups=0.09, wpb=64836, bsz=128, num_updates=14082, lr=9.98953e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=161773 2021-06-20 15:35:09 | INFO | train_inner | epoch 005: 2158 / 3002 loss=2.491, ppl=5.62, wps=5944.1, ups=0.09, wpb=64791, bsz=128, num_updates=14083, lr=9.98953e-05, gnorm=1.965, loss_scale=8, train_wall=10, gb_free=2.8, wall=161783 2021-06-20 15:35:20 | INFO | train_inner | epoch 005: 2159 / 3002 loss=2.618, ppl=6.14, wps=5863.2, ups=0.09, wpb=64819, bsz=128, num_updates=14084, lr=9.98953e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=161795 2021-06-20 15:35:31 | INFO | train_inner | epoch 005: 2160 / 3002 loss=2.686, ppl=6.43, wps=5778.5, ups=0.09, wpb=64854, bsz=128, num_updates=14085, lr=9.98953e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=161806 2021-06-20 15:35:42 | INFO | train_inner | epoch 005: 2161 / 3002 loss=2.388, ppl=5.24, wps=5919.3, ups=0.09, wpb=64910, bsz=128, num_updates=14086, lr=9.98953e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=161817 2021-06-20 15:35:53 | INFO | train_inner | epoch 005: 2162 / 3002 loss=2.703, ppl=6.51, wps=5858.3, ups=0.09, wpb=64810, bsz=128, num_updates=14087, lr=9.98953e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=161828 2021-06-20 15:36:05 | INFO | train_inner | epoch 005: 2163 / 3002 loss=2.569, ppl=5.93, wps=5847.8, ups=0.09, wpb=64854, bsz=128, num_updates=14088, lr=9.98953e-05, gnorm=1.919, loss_scale=8, train_wall=11, gb_free=2.8, wall=161839 2021-06-20 15:36:16 | INFO | train_inner | epoch 005: 2164 / 3002 loss=2.599, ppl=6.06, wps=5777.4, ups=0.09, wpb=64829, bsz=128, num_updates=14089, lr=9.98953e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=161850 2021-06-20 15:36:27 | INFO | train_inner | epoch 005: 2165 / 3002 loss=2.529, ppl=5.77, wps=5876.6, ups=0.09, wpb=64751, bsz=128, num_updates=14090, lr=9.98953e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=161861 2021-06-20 15:36:38 | INFO | train_inner | epoch 005: 2166 / 3002 loss=2.662, ppl=6.33, wps=5890.2, ups=0.09, wpb=64754, bsz=128, num_updates=14091, lr=9.98953e-05, gnorm=2.1, loss_scale=8, train_wall=11, gb_free=2.8, wall=161872 2021-06-20 15:36:49 | INFO | train_inner | epoch 005: 2167 / 3002 loss=2.446, ppl=5.45, wps=5783.2, ups=0.09, wpb=64803, bsz=128, num_updates=14092, lr=9.98953e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=161883 2021-06-20 15:37:00 | INFO | train_inner | epoch 005: 2168 / 3002 loss=2.559, ppl=5.89, wps=5656, ups=0.09, wpb=64810, bsz=128, num_updates=14093, lr=9.98952e-05, gnorm=2.211, loss_scale=8, train_wall=11, gb_free=2.8, wall=161895 2021-06-20 15:37:12 | INFO | train_inner | epoch 005: 2169 / 3002 loss=2.5, ppl=5.66, wps=5810.2, ups=0.09, wpb=64792, bsz=128, num_updates=14094, lr=9.98952e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=161906 2021-06-20 15:37:23 | INFO | train_inner | epoch 005: 2170 / 3002 loss=2.587, ppl=6.01, wps=5813.2, ups=0.09, wpb=64816, bsz=128, num_updates=14095, lr=9.98952e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=161917 2021-06-20 15:37:34 | INFO | train_inner | epoch 005: 2171 / 3002 loss=2.51, ppl=5.7, wps=5883.5, ups=0.09, wpb=64804, bsz=128, num_updates=14096, lr=9.98952e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=161928 2021-06-20 15:37:45 | INFO | train_inner | epoch 005: 2172 / 3002 loss=2.416, ppl=5.34, wps=5817, ups=0.09, wpb=64842, bsz=128, num_updates=14097, lr=9.98952e-05, gnorm=1.908, loss_scale=8, train_wall=11, gb_free=2.8, wall=161939 2021-06-20 15:37:56 | INFO | train_inner | epoch 005: 2173 / 3002 loss=2.55, ppl=5.86, wps=5889.6, ups=0.09, wpb=64810, bsz=128, num_updates=14098, lr=9.98952e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=161950 2021-06-20 15:38:07 | INFO | train_inner | epoch 005: 2174 / 3002 loss=2.387, ppl=5.23, wps=5820.6, ups=0.09, wpb=64870, bsz=128, num_updates=14099, lr=9.98952e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=161961 2021-06-20 15:38:18 | INFO | train_inner | epoch 005: 2175 / 3002 loss=2.377, ppl=5.19, wps=5768.8, ups=0.09, wpb=64906, bsz=128, num_updates=14100, lr=9.98952e-05, gnorm=2.076, loss_scale=8, train_wall=11, gb_free=2.8, wall=161973 2021-06-20 15:38:29 | INFO | train_inner | epoch 005: 2176 / 3002 loss=2.487, ppl=5.6, wps=5906.3, ups=0.09, wpb=64890, bsz=128, num_updates=14101, lr=9.98952e-05, gnorm=2.009, loss_scale=8, train_wall=10, gb_free=2.8, wall=161984 2021-06-20 15:38:40 | INFO | train_inner | epoch 005: 2177 / 3002 loss=2.624, ppl=6.16, wps=5787.3, ups=0.09, wpb=64811, bsz=128, num_updates=14102, lr=9.98952e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=161995 2021-06-20 15:38:51 | INFO | train_inner | epoch 005: 2178 / 3002 loss=2.518, ppl=5.73, wps=5928.7, ups=0.09, wpb=64863, bsz=128, num_updates=14103, lr=9.98952e-05, gnorm=2.119, loss_scale=8, train_wall=10, gb_free=2.8, wall=162006 2021-06-20 15:39:03 | INFO | train_inner | epoch 005: 2179 / 3002 loss=2.488, ppl=5.61, wps=5856.4, ups=0.09, wpb=64854, bsz=128, num_updates=14104, lr=9.98952e-05, gnorm=2.004, loss_scale=8, train_wall=11, gb_free=2.8, wall=162017 2021-06-20 15:39:14 | INFO | train_inner | epoch 005: 2180 / 3002 loss=2.427, ppl=5.38, wps=5838.1, ups=0.09, wpb=64862, bsz=128, num_updates=14105, lr=9.98952e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=162028 2021-06-20 15:39:25 | INFO | train_inner | epoch 005: 2181 / 3002 loss=2.335, ppl=5.05, wps=5949.7, ups=0.09, wpb=64884, bsz=128, num_updates=14106, lr=9.98951e-05, gnorm=2.004, loss_scale=8, train_wall=10, gb_free=2.8, wall=162039 2021-06-20 15:39:36 | INFO | train_inner | epoch 005: 2182 / 3002 loss=2.529, ppl=5.77, wps=5791.4, ups=0.09, wpb=64869, bsz=128, num_updates=14107, lr=9.98951e-05, gnorm=2.18, loss_scale=8, train_wall=11, gb_free=2.8, wall=162050 2021-06-20 15:39:47 | INFO | train_inner | epoch 005: 2183 / 3002 loss=2.555, ppl=5.88, wps=5782.9, ups=0.09, wpb=64846, bsz=128, num_updates=14108, lr=9.98951e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=162061 2021-06-20 15:39:58 | INFO | train_inner | epoch 005: 2184 / 3002 loss=2.571, ppl=5.94, wps=5927.2, ups=0.09, wpb=64799, bsz=128, num_updates=14109, lr=9.98951e-05, gnorm=2.03, loss_scale=8, train_wall=10, gb_free=2.8, wall=162072 2021-06-20 15:40:09 | INFO | train_inner | epoch 005: 2185 / 3002 loss=2.519, ppl=5.73, wps=5834.7, ups=0.09, wpb=64866, bsz=128, num_updates=14110, lr=9.98951e-05, gnorm=2.012, loss_scale=8, train_wall=11, gb_free=2.8, wall=162083 2021-06-20 15:40:20 | INFO | train_inner | epoch 005: 2186 / 3002 loss=2.561, ppl=5.9, wps=5868.4, ups=0.09, wpb=64807, bsz=128, num_updates=14111, lr=9.98951e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=162094 2021-06-20 15:40:31 | INFO | train_inner | epoch 005: 2187 / 3002 loss=2.648, ppl=6.27, wps=5886.4, ups=0.09, wpb=64821, bsz=128, num_updates=14112, lr=9.98951e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=162105 2021-06-20 15:40:42 | INFO | train_inner | epoch 005: 2188 / 3002 loss=2.584, ppl=6, wps=5828.9, ups=0.09, wpb=64848, bsz=128, num_updates=14113, lr=9.98951e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=162117 2021-06-20 15:40:53 | INFO | train_inner | epoch 005: 2189 / 3002 loss=2.423, ppl=5.36, wps=5840.5, ups=0.09, wpb=64852, bsz=128, num_updates=14114, lr=9.98951e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=162128 2021-06-20 15:41:04 | INFO | train_inner | epoch 005: 2190 / 3002 loss=2.453, ppl=5.47, wps=5867.9, ups=0.09, wpb=64871, bsz=128, num_updates=14115, lr=9.98951e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=162139 2021-06-20 15:41:15 | INFO | train_inner | epoch 005: 2191 / 3002 loss=2.594, ppl=6.04, wps=5881.5, ups=0.09, wpb=64934, bsz=128, num_updates=14116, lr=9.98951e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=162150 2021-06-20 15:41:27 | INFO | train_inner | epoch 005: 2192 / 3002 loss=2.59, ppl=6.02, wps=5816.1, ups=0.09, wpb=64821, bsz=128, num_updates=14117, lr=9.98951e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=162161 2021-06-20 15:41:38 | INFO | train_inner | epoch 005: 2193 / 3002 loss=2.367, ppl=5.16, wps=5877, ups=0.09, wpb=64818, bsz=128, num_updates=14118, lr=9.9895e-05, gnorm=1.832, loss_scale=8, train_wall=11, gb_free=2.8, wall=162172 2021-06-20 15:41:49 | INFO | train_inner | epoch 005: 2194 / 3002 loss=2.42, ppl=5.35, wps=5814.9, ups=0.09, wpb=64795, bsz=128, num_updates=14119, lr=9.9895e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=162183 2021-06-20 15:42:00 | INFO | train_inner | epoch 005: 2195 / 3002 loss=2.34, ppl=5.06, wps=5766.5, ups=0.09, wpb=64820, bsz=128, num_updates=14120, lr=9.9895e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=162194 2021-06-20 15:42:11 | INFO | train_inner | epoch 005: 2196 / 3002 loss=2.355, ppl=5.12, wps=5920.8, ups=0.09, wpb=64805, bsz=128, num_updates=14121, lr=9.9895e-05, gnorm=1.929, loss_scale=8, train_wall=10, gb_free=2.8, wall=162205 2021-06-20 15:42:22 | INFO | train_inner | epoch 005: 2197 / 3002 loss=2.354, ppl=5.11, wps=5872.2, ups=0.09, wpb=64859, bsz=128, num_updates=14122, lr=9.9895e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=162216 2021-06-20 15:42:33 | INFO | train_inner | epoch 005: 2198 / 3002 loss=2.529, ppl=5.77, wps=5811.4, ups=0.09, wpb=64756, bsz=128, num_updates=14123, lr=9.9895e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=162227 2021-06-20 15:42:44 | INFO | train_inner | epoch 005: 2199 / 3002 loss=2.641, ppl=6.24, wps=5866.2, ups=0.09, wpb=64853, bsz=128, num_updates=14124, lr=9.9895e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=162238 2021-06-20 15:42:55 | INFO | train_inner | epoch 005: 2200 / 3002 loss=2.634, ppl=6.21, wps=5794.4, ups=0.09, wpb=64839, bsz=128, num_updates=14125, lr=9.9895e-05, gnorm=2.083, loss_scale=8, train_wall=11, gb_free=2.8, wall=162250 2021-06-20 15:43:06 | INFO | train_inner | epoch 005: 2201 / 3002 loss=2.478, ppl=5.57, wps=5912.4, ups=0.09, wpb=64811, bsz=128, num_updates=14126, lr=9.9895e-05, gnorm=1.926, loss_scale=8, train_wall=10, gb_free=2.8, wall=162261 2021-06-20 15:43:17 | INFO | train_inner | epoch 005: 2202 / 3002 loss=2.562, ppl=5.91, wps=5802, ups=0.09, wpb=64776, bsz=128, num_updates=14127, lr=9.9895e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=162272 2021-06-20 15:43:29 | INFO | train_inner | epoch 005: 2203 / 3002 loss=2.492, ppl=5.63, wps=5769.9, ups=0.09, wpb=64772, bsz=128, num_updates=14128, lr=9.9895e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=162283 2021-06-20 15:43:40 | INFO | train_inner | epoch 005: 2204 / 3002 loss=2.689, ppl=6.45, wps=5864.2, ups=0.09, wpb=64733, bsz=128, num_updates=14129, lr=9.9895e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=162294 2021-06-20 15:43:51 | INFO | train_inner | epoch 005: 2205 / 3002 loss=2.487, ppl=5.61, wps=5850.5, ups=0.09, wpb=64848, bsz=128, num_updates=14130, lr=9.9895e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=162305 2021-06-20 15:44:02 | INFO | train_inner | epoch 005: 2206 / 3002 loss=2.52, ppl=5.73, wps=5803.3, ups=0.09, wpb=64776, bsz=128, num_updates=14131, lr=9.98949e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=162316 2021-06-20 15:44:13 | INFO | train_inner | epoch 005: 2207 / 3002 loss=2.645, ppl=6.26, wps=5811, ups=0.09, wpb=64740, bsz=128, num_updates=14132, lr=9.98949e-05, gnorm=2.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=162327 2021-06-20 15:44:24 | INFO | train_inner | epoch 005: 2208 / 3002 loss=2.517, ppl=5.72, wps=6067, ups=0.09, wpb=64839, bsz=128, num_updates=14133, lr=9.98949e-05, gnorm=2.022, loss_scale=8, train_wall=10, gb_free=2.8, wall=162338 2021-06-20 15:44:35 | INFO | train_inner | epoch 005: 2209 / 3002 loss=2.437, ppl=5.42, wps=5789.7, ups=0.09, wpb=64844, bsz=128, num_updates=14134, lr=9.98949e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=162349 2021-06-20 15:44:46 | INFO | train_inner | epoch 005: 2210 / 3002 loss=2.592, ppl=6.03, wps=5770.8, ups=0.09, wpb=64810, bsz=128, num_updates=14135, lr=9.98949e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=162361 2021-06-20 15:44:57 | INFO | train_inner | epoch 005: 2211 / 3002 loss=2.552, ppl=5.86, wps=5815.6, ups=0.09, wpb=64717, bsz=128, num_updates=14136, lr=9.98949e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=162372 2021-06-20 15:45:09 | INFO | train_inner | epoch 005: 2212 / 3002 loss=2.586, ppl=6.01, wps=5687.8, ups=0.09, wpb=64829, bsz=128, num_updates=14137, lr=9.98949e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=162383 2021-06-20 15:45:20 | INFO | train_inner | epoch 005: 2213 / 3002 loss=2.589, ppl=6.02, wps=5961.8, ups=0.09, wpb=64793, bsz=128, num_updates=14138, lr=9.98949e-05, gnorm=1.9, loss_scale=8, train_wall=10, gb_free=2.8, wall=162394 2021-06-20 15:45:30 | INFO | train_inner | epoch 005: 2214 / 3002 loss=2.431, ppl=5.39, wps=5962.6, ups=0.09, wpb=64832, bsz=128, num_updates=14139, lr=9.98949e-05, gnorm=1.877, loss_scale=8, train_wall=10, gb_free=2.8, wall=162405 2021-06-20 15:45:42 | INFO | train_inner | epoch 005: 2215 / 3002 loss=2.56, ppl=5.9, wps=5866.5, ups=0.09, wpb=64852, bsz=128, num_updates=14140, lr=9.98949e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=162416 2021-06-20 15:45:53 | INFO | train_inner | epoch 005: 2216 / 3002 loss=2.525, ppl=5.75, wps=5872.4, ups=0.09, wpb=64751, bsz=128, num_updates=14141, lr=9.98949e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=162427 2021-06-20 15:46:04 | INFO | train_inner | epoch 005: 2217 / 3002 loss=2.521, ppl=5.74, wps=5750, ups=0.09, wpb=64883, bsz=128, num_updates=14142, lr=9.98949e-05, gnorm=1.863, loss_scale=8, train_wall=11, gb_free=2.8, wall=162438 2021-06-20 15:46:15 | INFO | train_inner | epoch 005: 2218 / 3002 loss=2.459, ppl=5.5, wps=5796.8, ups=0.09, wpb=64904, bsz=128, num_updates=14143, lr=9.98948e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=162449 2021-06-20 15:46:26 | INFO | train_inner | epoch 005: 2219 / 3002 loss=2.545, ppl=5.84, wps=5749.4, ups=0.09, wpb=64694, bsz=128, num_updates=14144, lr=9.98948e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=162461 2021-06-20 15:46:37 | INFO | train_inner | epoch 005: 2220 / 3002 loss=2.627, ppl=6.18, wps=5890.6, ups=0.09, wpb=64823, bsz=128, num_updates=14145, lr=9.98948e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=162472 2021-06-20 15:46:48 | INFO | train_inner | epoch 005: 2221 / 3002 loss=2.542, ppl=5.82, wps=5871.7, ups=0.09, wpb=64896, bsz=128, num_updates=14146, lr=9.98948e-05, gnorm=2.108, loss_scale=8, train_wall=11, gb_free=2.8, wall=162483 2021-06-20 15:47:00 | INFO | train_inner | epoch 005: 2222 / 3002 loss=2.597, ppl=6.05, wps=5751.6, ups=0.09, wpb=64780, bsz=128, num_updates=14147, lr=9.98948e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=162494 2021-06-20 15:47:11 | INFO | train_inner | epoch 005: 2223 / 3002 loss=2.362, ppl=5.14, wps=5873.5, ups=0.09, wpb=64872, bsz=128, num_updates=14148, lr=9.98948e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=162505 2021-06-20 15:47:22 | INFO | train_inner | epoch 005: 2224 / 3002 loss=2.505, ppl=5.67, wps=5768.1, ups=0.09, wpb=64806, bsz=128, num_updates=14149, lr=9.98948e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=162516 2021-06-20 15:47:33 | INFO | train_inner | epoch 005: 2225 / 3002 loss=2.509, ppl=5.69, wps=5897, ups=0.09, wpb=64839, bsz=128, num_updates=14150, lr=9.98948e-05, gnorm=1.864, loss_scale=16, train_wall=11, gb_free=2.8, wall=162527 2021-06-20 15:47:44 | INFO | train_inner | epoch 005: 2226 / 3002 loss=2.575, ppl=5.96, wps=5880.8, ups=0.09, wpb=64877, bsz=128, num_updates=14151, lr=9.98948e-05, gnorm=2.051, loss_scale=16, train_wall=11, gb_free=2.8, wall=162538 2021-06-20 15:47:55 | INFO | train_inner | epoch 005: 2227 / 3002 loss=2.486, ppl=5.6, wps=5794.8, ups=0.09, wpb=64845, bsz=128, num_updates=14152, lr=9.98948e-05, gnorm=1.968, loss_scale=16, train_wall=11, gb_free=2.8, wall=162549 2021-06-20 15:48:06 | INFO | train_inner | epoch 005: 2228 / 3002 loss=2.683, ppl=6.42, wps=5838.7, ups=0.09, wpb=64757, bsz=128, num_updates=14153, lr=9.98948e-05, gnorm=1.905, loss_scale=16, train_wall=11, gb_free=2.8, wall=162561 2021-06-20 15:48:17 | INFO | train_inner | epoch 005: 2229 / 3002 loss=2.634, ppl=6.21, wps=5854.9, ups=0.09, wpb=64773, bsz=128, num_updates=14154, lr=9.98948e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=162572 2021-06-20 15:48:28 | INFO | train_inner | epoch 005: 2230 / 3002 loss=2.371, ppl=5.17, wps=5828.8, ups=0.09, wpb=64783, bsz=128, num_updates=14155, lr=9.98948e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=162583 2021-06-20 15:48:39 | INFO | train_inner | epoch 005: 2231 / 3002 loss=2.465, ppl=5.52, wps=5863.6, ups=0.09, wpb=64818, bsz=128, num_updates=14156, lr=9.98947e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=162594 2021-06-20 15:48:50 | INFO | train_inner | epoch 005: 2232 / 3002 loss=2.456, ppl=5.49, wps=5872.2, ups=0.09, wpb=64826, bsz=128, num_updates=14157, lr=9.98947e-05, gnorm=1.996, loss_scale=16, train_wall=11, gb_free=2.8, wall=162605 2021-06-20 15:49:01 | INFO | train_inner | epoch 005: 2233 / 3002 loss=2.388, ppl=5.24, wps=5926.1, ups=0.09, wpb=64907, bsz=128, num_updates=14158, lr=9.98947e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=162616 2021-06-20 15:49:13 | INFO | train_inner | epoch 005: 2234 / 3002 loss=2.508, ppl=5.69, wps=5734, ups=0.09, wpb=64757, bsz=128, num_updates=14159, lr=9.98947e-05, gnorm=1.917, loss_scale=16, train_wall=11, gb_free=2.8, wall=162627 2021-06-20 15:49:24 | INFO | train_inner | epoch 005: 2235 / 3002 loss=2.597, ppl=6.05, wps=5779.1, ups=0.09, wpb=64844, bsz=128, num_updates=14160, lr=9.98947e-05, gnorm=1.964, loss_scale=16, train_wall=11, gb_free=2.8, wall=162638 2021-06-20 15:49:35 | INFO | train_inner | epoch 005: 2236 / 3002 loss=2.444, ppl=5.44, wps=5745.9, ups=0.09, wpb=64895, bsz=128, num_updates=14161, lr=9.98947e-05, gnorm=1.953, loss_scale=16, train_wall=11, gb_free=2.8, wall=162650 2021-06-20 15:49:46 | INFO | train_inner | epoch 005: 2237 / 3002 loss=2.533, ppl=5.79, wps=5977.6, ups=0.09, wpb=64889, bsz=128, num_updates=14162, lr=9.98947e-05, gnorm=1.961, loss_scale=16, train_wall=10, gb_free=2.8, wall=162660 2021-06-20 15:49:57 | INFO | train_inner | epoch 005: 2238 / 3002 loss=2.465, ppl=5.52, wps=5833.1, ups=0.09, wpb=64758, bsz=128, num_updates=14163, lr=9.98947e-05, gnorm=2.073, loss_scale=16, train_wall=11, gb_free=2.8, wall=162672 2021-06-20 15:50:08 | INFO | train_inner | epoch 005: 2239 / 3002 loss=2.449, ppl=5.46, wps=5870.1, ups=0.09, wpb=64776, bsz=128, num_updates=14164, lr=9.98947e-05, gnorm=1.956, loss_scale=16, train_wall=11, gb_free=2.8, wall=162683 2021-06-20 15:50:19 | INFO | train_inner | epoch 005: 2240 / 3002 loss=2.41, ppl=5.32, wps=5932.6, ups=0.09, wpb=64905, bsz=128, num_updates=14165, lr=9.98947e-05, gnorm=2.166, loss_scale=16, train_wall=10, gb_free=2.8, wall=162694 2021-06-20 15:50:30 | INFO | train_inner | epoch 005: 2241 / 3002 loss=2.483, ppl=5.59, wps=5844.1, ups=0.09, wpb=64808, bsz=128, num_updates=14166, lr=9.98947e-05, gnorm=1.859, loss_scale=16, train_wall=11, gb_free=2.8, wall=162705 2021-06-20 15:50:41 | INFO | train_inner | epoch 005: 2242 / 3002 loss=2.567, ppl=5.93, wps=5873.9, ups=0.09, wpb=64842, bsz=128, num_updates=14167, lr=9.98947e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=162716 2021-06-20 15:50:52 | INFO | train_inner | epoch 005: 2243 / 3002 loss=2.427, ppl=5.38, wps=6047.3, ups=0.09, wpb=64873, bsz=128, num_updates=14168, lr=9.98946e-05, gnorm=1.898, loss_scale=16, train_wall=10, gb_free=2.8, wall=162726 2021-06-20 15:51:03 | INFO | train_inner | epoch 005: 2244 / 3002 loss=2.641, ppl=6.24, wps=5940.2, ups=0.09, wpb=64903, bsz=128, num_updates=14169, lr=9.98946e-05, gnorm=1.876, loss_scale=16, train_wall=10, gb_free=2.8, wall=162737 2021-06-20 15:51:14 | INFO | train_inner | epoch 005: 2245 / 3002 loss=2.496, ppl=5.64, wps=5876, ups=0.09, wpb=64880, bsz=128, num_updates=14170, lr=9.98946e-05, gnorm=1.927, loss_scale=16, train_wall=11, gb_free=2.8, wall=162748 2021-06-20 15:51:25 | INFO | train_inner | epoch 005: 2246 / 3002 loss=2.513, ppl=5.71, wps=5868.2, ups=0.09, wpb=64841, bsz=128, num_updates=14171, lr=9.98946e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=162759 2021-06-20 15:51:36 | INFO | train_inner | epoch 005: 2247 / 3002 loss=2.551, ppl=5.86, wps=5835.9, ups=0.09, wpb=64794, bsz=128, num_updates=14172, lr=9.98946e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=162770 2021-06-20 15:51:47 | INFO | train_inner | epoch 005: 2248 / 3002 loss=2.728, ppl=6.62, wps=5939.6, ups=0.09, wpb=64801, bsz=128, num_updates=14173, lr=9.98946e-05, gnorm=2.031, loss_scale=16, train_wall=10, gb_free=2.8, wall=162781 2021-06-20 15:51:58 | INFO | train_inner | epoch 005: 2249 / 3002 loss=2.415, ppl=5.33, wps=5887.5, ups=0.09, wpb=64955, bsz=128, num_updates=14174, lr=9.98946e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=162792 2021-06-20 15:52:09 | INFO | train_inner | epoch 005: 2250 / 3002 loss=2.505, ppl=5.67, wps=5867.4, ups=0.09, wpb=64774, bsz=128, num_updates=14175, lr=9.98946e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=162803 2021-06-20 15:52:20 | INFO | train_inner | epoch 005: 2251 / 3002 loss=2.507, ppl=5.68, wps=5862.3, ups=0.09, wpb=64899, bsz=128, num_updates=14176, lr=9.98946e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=162815 2021-06-20 15:52:31 | INFO | train_inner | epoch 005: 2252 / 3002 loss=2.561, ppl=5.9, wps=5842.2, ups=0.09, wpb=64786, bsz=128, num_updates=14177, lr=9.98946e-05, gnorm=1.914, loss_scale=16, train_wall=11, gb_free=2.8, wall=162826 2021-06-20 15:52:42 | INFO | train_inner | epoch 005: 2253 / 3002 loss=2.412, ppl=5.32, wps=5859.5, ups=0.09, wpb=64948, bsz=128, num_updates=14178, lr=9.98946e-05, gnorm=1.898, loss_scale=16, train_wall=11, gb_free=2.8, wall=162837 2021-06-20 15:52:53 | INFO | train_inner | epoch 005: 2254 / 3002 loss=2.649, ppl=6.27, wps=5847.4, ups=0.09, wpb=64798, bsz=128, num_updates=14179, lr=9.98946e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=162848 2021-06-20 15:53:05 | INFO | train_inner | epoch 005: 2255 / 3002 loss=2.47, ppl=5.54, wps=5777, ups=0.09, wpb=64798, bsz=128, num_updates=14180, lr=9.98946e-05, gnorm=2.023, loss_scale=16, train_wall=11, gb_free=2.8, wall=162859 2021-06-20 15:53:16 | INFO | train_inner | epoch 005: 2256 / 3002 loss=2.587, ppl=6.01, wps=5846.3, ups=0.09, wpb=64753, bsz=128, num_updates=14181, lr=9.98945e-05, gnorm=1.994, loss_scale=16, train_wall=11, gb_free=2.8, wall=162870 2021-06-20 15:53:27 | INFO | train_inner | epoch 005: 2257 / 3002 loss=2.538, ppl=5.81, wps=5756.4, ups=0.09, wpb=64775, bsz=128, num_updates=14182, lr=9.98945e-05, gnorm=2.001, loss_scale=16, train_wall=11, gb_free=2.8, wall=162881 2021-06-20 15:53:38 | INFO | train_inner | epoch 005: 2258 / 3002 loss=2.592, ppl=6.03, wps=5905.7, ups=0.09, wpb=64847, bsz=128, num_updates=14183, lr=9.98945e-05, gnorm=2.047, loss_scale=16, train_wall=11, gb_free=2.8, wall=162892 2021-06-20 15:53:49 | INFO | train_inner | epoch 005: 2259 / 3002 loss=2.676, ppl=6.39, wps=5820.4, ups=0.09, wpb=64792, bsz=128, num_updates=14184, lr=9.98945e-05, gnorm=1.979, loss_scale=16, train_wall=11, gb_free=2.8, wall=162903 2021-06-20 15:54:00 | INFO | train_inner | epoch 005: 2260 / 3002 loss=2.416, ppl=5.34, wps=5960.5, ups=0.09, wpb=64811, bsz=128, num_updates=14185, lr=9.98945e-05, gnorm=1.932, loss_scale=16, train_wall=10, gb_free=2.8, wall=162914 2021-06-20 15:54:11 | INFO | train_inner | epoch 005: 2261 / 3002 loss=2.462, ppl=5.51, wps=5827.9, ups=0.09, wpb=64820, bsz=128, num_updates=14186, lr=9.98945e-05, gnorm=2.045, loss_scale=16, train_wall=11, gb_free=2.8, wall=162925 2021-06-20 15:54:22 | INFO | train_inner | epoch 005: 2262 / 3002 loss=2.427, ppl=5.38, wps=5901, ups=0.09, wpb=64824, bsz=128, num_updates=14187, lr=9.98945e-05, gnorm=1.838, loss_scale=16, train_wall=11, gb_free=2.8, wall=162936 2021-06-20 15:54:33 | INFO | train_inner | epoch 005: 2263 / 3002 loss=2.501, ppl=5.66, wps=5939.4, ups=0.09, wpb=64773, bsz=128, num_updates=14188, lr=9.98945e-05, gnorm=1.848, loss_scale=16, train_wall=10, gb_free=2.8, wall=162947 2021-06-20 15:54:44 | INFO | train_inner | epoch 005: 2264 / 3002 loss=2.588, ppl=6.01, wps=5846.9, ups=0.09, wpb=64838, bsz=128, num_updates=14189, lr=9.98945e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=162958 2021-06-20 15:54:55 | INFO | train_inner | epoch 005: 2265 / 3002 loss=2.539, ppl=5.81, wps=5798, ups=0.09, wpb=64833, bsz=128, num_updates=14190, lr=9.98945e-05, gnorm=2.429, loss_scale=16, train_wall=11, gb_free=2.8, wall=162970 2021-06-20 15:55:06 | INFO | train_inner | epoch 005: 2266 / 3002 loss=2.535, ppl=5.8, wps=5843.9, ups=0.09, wpb=64699, bsz=128, num_updates=14191, lr=9.98945e-05, gnorm=2.064, loss_scale=16, train_wall=11, gb_free=2.8, wall=162981 2021-06-20 15:55:18 | INFO | train_inner | epoch 005: 2267 / 3002 loss=2.541, ppl=5.82, wps=5802.9, ups=0.09, wpb=64800, bsz=128, num_updates=14192, lr=9.98945e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=162992 2021-06-20 15:55:28 | INFO | train_inner | epoch 005: 2268 / 3002 loss=2.68, ppl=6.41, wps=5927.8, ups=0.09, wpb=64849, bsz=128, num_updates=14193, lr=9.98944e-05, gnorm=2.203, loss_scale=16, train_wall=10, gb_free=2.8, wall=163003 2021-06-20 15:55:39 | INFO | train_inner | epoch 005: 2269 / 3002 loss=2.54, ppl=5.81, wps=6010.4, ups=0.09, wpb=64841, bsz=128, num_updates=14194, lr=9.98944e-05, gnorm=2.725, loss_scale=16, train_wall=10, gb_free=2.8, wall=163014 2021-06-20 15:55:50 | INFO | train_inner | epoch 005: 2270 / 3002 loss=2.573, ppl=5.95, wps=5910.1, ups=0.09, wpb=64858, bsz=128, num_updates=14195, lr=9.98944e-05, gnorm=1.939, loss_scale=16, train_wall=11, gb_free=2.8, wall=163025 2021-06-20 15:56:01 | INFO | train_inner | epoch 005: 2271 / 3002 loss=2.613, ppl=6.12, wps=5875.8, ups=0.09, wpb=64917, bsz=128, num_updates=14196, lr=9.98944e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=163036 2021-06-20 15:56:12 | INFO | train_inner | epoch 005: 2272 / 3002 loss=2.558, ppl=5.89, wps=5822.5, ups=0.09, wpb=64883, bsz=128, num_updates=14197, lr=9.98944e-05, gnorm=2.122, loss_scale=16, train_wall=11, gb_free=2.8, wall=163047 2021-06-20 15:56:24 | INFO | train_inner | epoch 005: 2273 / 3002 loss=2.751, ppl=6.73, wps=5675.4, ups=0.09, wpb=64751, bsz=128, num_updates=14198, lr=9.98944e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=163058 2021-06-20 15:56:35 | INFO | train_inner | epoch 005: 2274 / 3002 loss=2.479, ppl=5.58, wps=5835.1, ups=0.09, wpb=64819, bsz=128, num_updates=14199, lr=9.98944e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=163069 2021-06-20 15:56:46 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 15:56:57 | INFO | train_inner | epoch 005: 2276 / 3002 loss=2.603, ppl=6.07, wps=2889.1, ups=0.04, wpb=64810, bsz=128, num_updates=14200, lr=9.98944e-05, gnorm=1.887, loss_scale=8, train_wall=22, gb_free=2.8, wall=163092 2021-06-20 15:57:08 | INFO | train_inner | epoch 005: 2277 / 3002 loss=2.447, ppl=5.45, wps=5898.8, ups=0.09, wpb=64840, bsz=128, num_updates=14201, lr=9.98944e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=163103 2021-06-20 15:57:20 | INFO | train_inner | epoch 005: 2278 / 3002 loss=2.439, ppl=5.42, wps=5788.2, ups=0.09, wpb=64878, bsz=128, num_updates=14202, lr=9.98944e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=163114 2021-06-20 15:57:31 | INFO | train_inner | epoch 005: 2279 / 3002 loss=2.564, ppl=5.91, wps=5836.6, ups=0.09, wpb=64800, bsz=128, num_updates=14203, lr=9.98944e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=163125 2021-06-20 15:57:42 | INFO | train_inner | epoch 005: 2280 / 3002 loss=2.588, ppl=6.01, wps=5880.3, ups=0.09, wpb=64858, bsz=128, num_updates=14204, lr=9.98944e-05, gnorm=2.125, loss_scale=8, train_wall=11, gb_free=2.8, wall=163136 2021-06-20 15:57:53 | INFO | train_inner | epoch 005: 2281 / 3002 loss=2.527, ppl=5.76, wps=5762, ups=0.09, wpb=64818, bsz=128, num_updates=14205, lr=9.98944e-05, gnorm=1.889, loss_scale=8, train_wall=11, gb_free=2.8, wall=163147 2021-06-20 15:58:04 | INFO | train_inner | epoch 005: 2282 / 3002 loss=2.513, ppl=5.71, wps=5824, ups=0.09, wpb=64853, bsz=128, num_updates=14206, lr=9.98943e-05, gnorm=2.083, loss_scale=8, train_wall=11, gb_free=2.8, wall=163158 2021-06-20 15:58:15 | INFO | train_inner | epoch 005: 2283 / 3002 loss=2.536, ppl=5.8, wps=5821.5, ups=0.09, wpb=64810, bsz=128, num_updates=14207, lr=9.98943e-05, gnorm=1.896, loss_scale=8, train_wall=11, gb_free=2.8, wall=163170 2021-06-20 15:58:26 | INFO | train_inner | epoch 005: 2284 / 3002 loss=2.509, ppl=5.69, wps=5937, ups=0.09, wpb=64804, bsz=128, num_updates=14208, lr=9.98943e-05, gnorm=1.873, loss_scale=8, train_wall=10, gb_free=2.8, wall=163180 2021-06-20 15:58:37 | INFO | train_inner | epoch 005: 2285 / 3002 loss=2.589, ppl=6.02, wps=5841.7, ups=0.09, wpb=64799, bsz=128, num_updates=14209, lr=9.98943e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=163192 2021-06-20 15:58:48 | INFO | train_inner | epoch 005: 2286 / 3002 loss=2.526, ppl=5.76, wps=5827.6, ups=0.09, wpb=64789, bsz=128, num_updates=14210, lr=9.98943e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=163203 2021-06-20 15:58:59 | INFO | train_inner | epoch 005: 2287 / 3002 loss=2.424, ppl=5.37, wps=5874.2, ups=0.09, wpb=64875, bsz=128, num_updates=14211, lr=9.98943e-05, gnorm=1.877, loss_scale=8, train_wall=11, gb_free=2.8, wall=163214 2021-06-20 15:59:11 | INFO | train_inner | epoch 005: 2288 / 3002 loss=2.509, ppl=5.69, wps=5673.5, ups=0.09, wpb=64827, bsz=128, num_updates=14212, lr=9.98943e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=163225 2021-06-20 15:59:22 | INFO | train_inner | epoch 005: 2289 / 3002 loss=2.483, ppl=5.59, wps=5863.9, ups=0.09, wpb=64827, bsz=128, num_updates=14213, lr=9.98943e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=163236 2021-06-20 15:59:33 | INFO | train_inner | epoch 005: 2290 / 3002 loss=2.568, ppl=5.93, wps=5951.1, ups=0.09, wpb=64839, bsz=128, num_updates=14214, lr=9.98943e-05, gnorm=1.993, loss_scale=8, train_wall=10, gb_free=2.8, wall=163247 2021-06-20 15:59:44 | INFO | train_inner | epoch 005: 2291 / 3002 loss=2.596, ppl=6.05, wps=5798.8, ups=0.09, wpb=64822, bsz=128, num_updates=14215, lr=9.98943e-05, gnorm=2.075, loss_scale=8, train_wall=11, gb_free=2.8, wall=163258 2021-06-20 15:59:55 | INFO | train_inner | epoch 005: 2292 / 3002 loss=2.466, ppl=5.53, wps=5769.7, ups=0.09, wpb=64775, bsz=128, num_updates=14216, lr=9.98943e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=163270 2021-06-20 16:00:06 | INFO | train_inner | epoch 005: 2293 / 3002 loss=2.471, ppl=5.54, wps=5923.2, ups=0.09, wpb=64713, bsz=128, num_updates=14217, lr=9.98943e-05, gnorm=2.129, loss_scale=8, train_wall=10, gb_free=2.8, wall=163280 2021-06-20 16:00:17 | INFO | train_inner | epoch 005: 2294 / 3002 loss=2.471, ppl=5.54, wps=5831.4, ups=0.09, wpb=64911, bsz=128, num_updates=14218, lr=9.98942e-05, gnorm=1.919, loss_scale=8, train_wall=11, gb_free=2.8, wall=163292 2021-06-20 16:00:28 | INFO | train_inner | epoch 005: 2295 / 3002 loss=2.785, ppl=6.89, wps=5891.2, ups=0.09, wpb=64714, bsz=128, num_updates=14219, lr=9.98942e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=163303 2021-06-20 16:00:39 | INFO | train_inner | epoch 005: 2296 / 3002 loss=2.417, ppl=5.34, wps=5904.8, ups=0.09, wpb=64909, bsz=128, num_updates=14220, lr=9.98942e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=163314 2021-06-20 16:00:50 | INFO | train_inner | epoch 005: 2297 / 3002 loss=2.462, ppl=5.51, wps=5867.3, ups=0.09, wpb=64733, bsz=128, num_updates=14221, lr=9.98942e-05, gnorm=1.895, loss_scale=8, train_wall=11, gb_free=2.8, wall=163325 2021-06-20 16:01:01 | INFO | train_inner | epoch 005: 2298 / 3002 loss=2.462, ppl=5.51, wps=5769.3, ups=0.09, wpb=64828, bsz=128, num_updates=14222, lr=9.98942e-05, gnorm=2.078, loss_scale=8, train_wall=11, gb_free=2.8, wall=163336 2021-06-20 16:01:12 | INFO | train_inner | epoch 005: 2299 / 3002 loss=2.389, ppl=5.24, wps=5879.9, ups=0.09, wpb=64801, bsz=128, num_updates=14223, lr=9.98942e-05, gnorm=4.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=163347 2021-06-20 16:01:24 | INFO | train_inner | epoch 005: 2300 / 3002 loss=2.38, ppl=5.2, wps=5767.3, ups=0.09, wpb=64925, bsz=128, num_updates=14224, lr=9.98942e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=163358 2021-06-20 16:01:35 | INFO | train_inner | epoch 005: 2301 / 3002 loss=2.499, ppl=5.65, wps=5972.2, ups=0.09, wpb=64847, bsz=128, num_updates=14225, lr=9.98942e-05, gnorm=1.99, loss_scale=8, train_wall=10, gb_free=2.8, wall=163369 2021-06-20 16:01:46 | INFO | train_inner | epoch 005: 2302 / 3002 loss=2.516, ppl=5.72, wps=5799.8, ups=0.09, wpb=64817, bsz=128, num_updates=14226, lr=9.98942e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=163380 2021-06-20 16:01:57 | INFO | train_inner | epoch 005: 2303 / 3002 loss=2.606, ppl=6.09, wps=5763, ups=0.09, wpb=64774, bsz=128, num_updates=14227, lr=9.98942e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=163391 2021-06-20 16:02:08 | INFO | train_inner | epoch 005: 2304 / 3002 loss=2.457, ppl=5.49, wps=5855.3, ups=0.09, wpb=64764, bsz=128, num_updates=14228, lr=9.98942e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=163402 2021-06-20 16:02:19 | INFO | train_inner | epoch 005: 2305 / 3002 loss=2.44, ppl=5.43, wps=5825.6, ups=0.09, wpb=64836, bsz=128, num_updates=14229, lr=9.98942e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=163414 2021-06-20 16:02:30 | INFO | train_inner | epoch 005: 2306 / 3002 loss=2.52, ppl=5.74, wps=5883.1, ups=0.09, wpb=64898, bsz=128, num_updates=14230, lr=9.98942e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=163425 2021-06-20 16:02:41 | INFO | train_inner | epoch 005: 2307 / 3002 loss=2.555, ppl=5.88, wps=5806.9, ups=0.09, wpb=64814, bsz=128, num_updates=14231, lr=9.98941e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=163436 2021-06-20 16:02:52 | INFO | train_inner | epoch 005: 2308 / 3002 loss=2.544, ppl=5.83, wps=5849.1, ups=0.09, wpb=64760, bsz=128, num_updates=14232, lr=9.98941e-05, gnorm=1.874, loss_scale=8, train_wall=11, gb_free=2.8, wall=163447 2021-06-20 16:03:04 | INFO | train_inner | epoch 005: 2309 / 3002 loss=2.422, ppl=5.36, wps=5777.2, ups=0.09, wpb=64888, bsz=128, num_updates=14233, lr=9.98941e-05, gnorm=1.907, loss_scale=8, train_wall=11, gb_free=2.8, wall=163458 2021-06-20 16:03:15 | INFO | train_inner | epoch 005: 2310 / 3002 loss=2.465, ppl=5.52, wps=5793.1, ups=0.09, wpb=64762, bsz=128, num_updates=14234, lr=9.98941e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=163469 2021-06-20 16:03:26 | INFO | train_inner | epoch 005: 2311 / 3002 loss=2.596, ppl=6.05, wps=5818.3, ups=0.09, wpb=64817, bsz=128, num_updates=14235, lr=9.98941e-05, gnorm=1.994, loss_scale=8, train_wall=11, gb_free=2.8, wall=163480 2021-06-20 16:03:37 | INFO | train_inner | epoch 005: 2312 / 3002 loss=2.562, ppl=5.9, wps=5751.9, ups=0.09, wpb=64796, bsz=128, num_updates=14236, lr=9.98941e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=163492 2021-06-20 16:03:48 | INFO | train_inner | epoch 005: 2313 / 3002 loss=2.501, ppl=5.66, wps=5825, ups=0.09, wpb=64810, bsz=128, num_updates=14237, lr=9.98941e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=163503 2021-06-20 16:04:00 | INFO | train_inner | epoch 005: 2314 / 3002 loss=2.514, ppl=5.71, wps=5854.4, ups=0.09, wpb=64887, bsz=128, num_updates=14238, lr=9.98941e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=163514 2021-06-20 16:04:11 | INFO | train_inner | epoch 005: 2315 / 3002 loss=2.475, ppl=5.56, wps=5833.7, ups=0.09, wpb=64753, bsz=128, num_updates=14239, lr=9.98941e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=163525 2021-06-20 16:04:22 | INFO | train_inner | epoch 005: 2316 / 3002 loss=2.366, ppl=5.15, wps=5944, ups=0.09, wpb=64839, bsz=128, num_updates=14240, lr=9.98941e-05, gnorm=1.935, loss_scale=8, train_wall=10, gb_free=2.8, wall=163536 2021-06-20 16:04:33 | INFO | train_inner | epoch 005: 2317 / 3002 loss=2.404, ppl=5.29, wps=5810.5, ups=0.09, wpb=64834, bsz=128, num_updates=14241, lr=9.98941e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=163547 2021-06-20 16:04:44 | INFO | train_inner | epoch 005: 2318 / 3002 loss=2.428, ppl=5.38, wps=5794.7, ups=0.09, wpb=64777, bsz=128, num_updates=14242, lr=9.98941e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=163558 2021-06-20 16:04:55 | INFO | train_inner | epoch 005: 2319 / 3002 loss=2.482, ppl=5.59, wps=5800.6, ups=0.09, wpb=64875, bsz=128, num_updates=14243, lr=9.9894e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=163569 2021-06-20 16:05:06 | INFO | train_inner | epoch 005: 2320 / 3002 loss=2.554, ppl=5.87, wps=5862.4, ups=0.09, wpb=64788, bsz=128, num_updates=14244, lr=9.9894e-05, gnorm=2.133, loss_scale=8, train_wall=11, gb_free=2.8, wall=163580 2021-06-20 16:05:17 | INFO | train_inner | epoch 005: 2321 / 3002 loss=2.471, ppl=5.54, wps=5868.5, ups=0.09, wpb=64859, bsz=128, num_updates=14245, lr=9.9894e-05, gnorm=2.011, loss_scale=8, train_wall=11, gb_free=2.8, wall=163591 2021-06-20 16:05:28 | INFO | train_inner | epoch 005: 2322 / 3002 loss=2.414, ppl=5.33, wps=5847.5, ups=0.09, wpb=64829, bsz=128, num_updates=14246, lr=9.9894e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=163603 2021-06-20 16:05:40 | INFO | train_inner | epoch 005: 2323 / 3002 loss=2.642, ppl=6.24, wps=5738.4, ups=0.09, wpb=64848, bsz=128, num_updates=14247, lr=9.9894e-05, gnorm=2.078, loss_scale=8, train_wall=11, gb_free=2.8, wall=163614 2021-06-20 16:05:50 | INFO | train_inner | epoch 005: 2324 / 3002 loss=2.604, ppl=6.08, wps=5937, ups=0.09, wpb=64792, bsz=128, num_updates=14248, lr=9.9894e-05, gnorm=2.2, loss_scale=8, train_wall=10, gb_free=2.8, wall=163625 2021-06-20 16:06:01 | INFO | train_inner | epoch 005: 2325 / 3002 loss=2.558, ppl=5.89, wps=5966.8, ups=0.09, wpb=64856, bsz=128, num_updates=14249, lr=9.9894e-05, gnorm=1.977, loss_scale=8, train_wall=10, gb_free=2.8, wall=163636 2021-06-20 16:06:12 | INFO | train_inner | epoch 005: 2326 / 3002 loss=2.444, ppl=5.44, wps=5844.6, ups=0.09, wpb=64853, bsz=128, num_updates=14250, lr=9.9894e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=163647 2021-06-20 16:06:24 | INFO | train_inner | epoch 005: 2327 / 3002 loss=2.514, ppl=5.71, wps=5783.1, ups=0.09, wpb=64822, bsz=128, num_updates=14251, lr=9.9894e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=163658 2021-06-20 16:06:35 | INFO | train_inner | epoch 005: 2328 / 3002 loss=2.555, ppl=5.88, wps=5768.2, ups=0.09, wpb=64883, bsz=128, num_updates=14252, lr=9.9894e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=163669 2021-06-20 16:06:46 | INFO | train_inner | epoch 005: 2329 / 3002 loss=2.56, ppl=5.9, wps=5658.7, ups=0.09, wpb=64796, bsz=128, num_updates=14253, lr=9.9894e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=163681 2021-06-20 16:06:57 | INFO | train_inner | epoch 005: 2330 / 3002 loss=2.369, ppl=5.17, wps=5914.5, ups=0.09, wpb=64812, bsz=128, num_updates=14254, lr=9.9894e-05, gnorm=1.954, loss_scale=8, train_wall=10, gb_free=2.8, wall=163692 2021-06-20 16:07:08 | INFO | train_inner | epoch 005: 2331 / 3002 loss=2.543, ppl=5.83, wps=5904.1, ups=0.09, wpb=64951, bsz=128, num_updates=14255, lr=9.9894e-05, gnorm=1.995, loss_scale=8, train_wall=10, gb_free=2.8, wall=163703 2021-06-20 16:07:19 | INFO | train_inner | epoch 005: 2332 / 3002 loss=2.632, ppl=6.2, wps=5829.7, ups=0.09, wpb=64861, bsz=128, num_updates=14256, lr=9.98939e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=163714 2021-06-20 16:07:30 | INFO | train_inner | epoch 005: 2333 / 3002 loss=2.401, ppl=5.28, wps=5915.2, ups=0.09, wpb=64792, bsz=128, num_updates=14257, lr=9.98939e-05, gnorm=1.993, loss_scale=8, train_wall=10, gb_free=2.8, wall=163725 2021-06-20 16:07:42 | INFO | train_inner | epoch 005: 2334 / 3002 loss=2.33, ppl=5.03, wps=5663.3, ups=0.09, wpb=64892, bsz=128, num_updates=14258, lr=9.98939e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=163736 2021-06-20 16:07:53 | INFO | train_inner | epoch 005: 2335 / 3002 loss=2.466, ppl=5.52, wps=5866.9, ups=0.09, wpb=64796, bsz=128, num_updates=14259, lr=9.98939e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=163747 2021-06-20 16:08:04 | INFO | train_inner | epoch 005: 2336 / 3002 loss=2.481, ppl=5.58, wps=5910.7, ups=0.09, wpb=64816, bsz=128, num_updates=14260, lr=9.98939e-05, gnorm=2.063, loss_scale=8, train_wall=11, gb_free=2.8, wall=163758 2021-06-20 16:08:15 | INFO | train_inner | epoch 005: 2337 / 3002 loss=2.471, ppl=5.54, wps=5934.5, ups=0.09, wpb=64830, bsz=128, num_updates=14261, lr=9.98939e-05, gnorm=2.048, loss_scale=8, train_wall=10, gb_free=2.8, wall=163769 2021-06-20 16:08:26 | INFO | train_inner | epoch 005: 2338 / 3002 loss=2.544, ppl=5.83, wps=5797.7, ups=0.09, wpb=64805, bsz=128, num_updates=14262, lr=9.98939e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=163780 2021-06-20 16:08:37 | INFO | train_inner | epoch 005: 2339 / 3002 loss=2.795, ppl=6.94, wps=5891.2, ups=0.09, wpb=64895, bsz=128, num_updates=14263, lr=9.98939e-05, gnorm=2.163, loss_scale=8, train_wall=11, gb_free=2.8, wall=163791 2021-06-20 16:08:48 | INFO | train_inner | epoch 005: 2340 / 3002 loss=2.349, ppl=5.1, wps=5837.9, ups=0.09, wpb=64774, bsz=128, num_updates=14264, lr=9.98939e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=163802 2021-06-20 16:08:59 | INFO | train_inner | epoch 005: 2341 / 3002 loss=2.403, ppl=5.29, wps=5777.4, ups=0.09, wpb=64901, bsz=128, num_updates=14265, lr=9.98939e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=163814 2021-06-20 16:09:10 | INFO | train_inner | epoch 005: 2342 / 3002 loss=2.629, ppl=6.19, wps=5846.5, ups=0.09, wpb=64834, bsz=128, num_updates=14266, lr=9.98939e-05, gnorm=2.064, loss_scale=8, train_wall=11, gb_free=2.8, wall=163825 2021-06-20 16:09:22 | INFO | train_inner | epoch 005: 2343 / 3002 loss=2.535, ppl=5.8, wps=5791.3, ups=0.09, wpb=64794, bsz=128, num_updates=14267, lr=9.98939e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=163836 2021-06-20 16:09:33 | INFO | train_inner | epoch 005: 2344 / 3002 loss=2.397, ppl=5.27, wps=5818.3, ups=0.09, wpb=64897, bsz=128, num_updates=14268, lr=9.98938e-05, gnorm=2.044, loss_scale=8, train_wall=11, gb_free=2.8, wall=163847 2021-06-20 16:09:44 | INFO | train_inner | epoch 005: 2345 / 3002 loss=2.575, ppl=5.96, wps=5792.3, ups=0.09, wpb=64867, bsz=128, num_updates=14269, lr=9.98938e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=163858 2021-06-20 16:09:55 | INFO | train_inner | epoch 005: 2346 / 3002 loss=2.472, ppl=5.55, wps=5862.6, ups=0.09, wpb=64887, bsz=128, num_updates=14270, lr=9.98938e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=163869 2021-06-20 16:10:06 | INFO | train_inner | epoch 005: 2347 / 3002 loss=2.458, ppl=5.49, wps=5760.8, ups=0.09, wpb=64789, bsz=128, num_updates=14271, lr=9.98938e-05, gnorm=1.855, loss_scale=8, train_wall=11, gb_free=2.8, wall=163881 2021-06-20 16:10:17 | INFO | train_inner | epoch 005: 2348 / 3002 loss=2.447, ppl=5.45, wps=5821.3, ups=0.09, wpb=64782, bsz=128, num_updates=14272, lr=9.98938e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=163892 2021-06-20 16:10:28 | INFO | train_inner | epoch 005: 2349 / 3002 loss=2.578, ppl=5.97, wps=5824, ups=0.09, wpb=64838, bsz=128, num_updates=14273, lr=9.98938e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=163903 2021-06-20 16:10:40 | INFO | train_inner | epoch 005: 2350 / 3002 loss=2.472, ppl=5.55, wps=5863, ups=0.09, wpb=64795, bsz=128, num_updates=14274, lr=9.98938e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=163914 2021-06-20 16:10:51 | INFO | train_inner | epoch 005: 2351 / 3002 loss=2.535, ppl=5.79, wps=5879.4, ups=0.09, wpb=64750, bsz=128, num_updates=14275, lr=9.98938e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=163925 2021-06-20 16:11:02 | INFO | train_inner | epoch 005: 2352 / 3002 loss=2.48, ppl=5.58, wps=5913.7, ups=0.09, wpb=64833, bsz=128, num_updates=14276, lr=9.98938e-05, gnorm=2.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=163936 2021-06-20 16:11:13 | INFO | train_inner | epoch 005: 2353 / 3002 loss=2.392, ppl=5.25, wps=5878.5, ups=0.09, wpb=64917, bsz=128, num_updates=14277, lr=9.98938e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=163947 2021-06-20 16:11:24 | INFO | train_inner | epoch 005: 2354 / 3002 loss=2.425, ppl=5.37, wps=5801.2, ups=0.09, wpb=64781, bsz=128, num_updates=14278, lr=9.98938e-05, gnorm=1.885, loss_scale=8, train_wall=11, gb_free=2.8, wall=163958 2021-06-20 16:11:35 | INFO | train_inner | epoch 005: 2355 / 3002 loss=2.516, ppl=5.72, wps=5843.4, ups=0.09, wpb=64733, bsz=128, num_updates=14279, lr=9.98938e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=163969 2021-06-20 16:11:46 | INFO | train_inner | epoch 005: 2356 / 3002 loss=2.517, ppl=5.72, wps=5854.9, ups=0.09, wpb=64846, bsz=128, num_updates=14280, lr=9.98938e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=163980 2021-06-20 16:11:57 | INFO | train_inner | epoch 005: 2357 / 3002 loss=2.649, ppl=6.27, wps=5847.8, ups=0.09, wpb=64876, bsz=128, num_updates=14281, lr=9.98937e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=163991 2021-06-20 16:12:08 | INFO | train_inner | epoch 005: 2358 / 3002 loss=2.496, ppl=5.64, wps=5893.3, ups=0.09, wpb=64874, bsz=128, num_updates=14282, lr=9.98937e-05, gnorm=1.864, loss_scale=8, train_wall=11, gb_free=2.8, wall=164002 2021-06-20 16:12:19 | INFO | train_inner | epoch 005: 2359 / 3002 loss=2.49, ppl=5.62, wps=5885.5, ups=0.09, wpb=64860, bsz=128, num_updates=14283, lr=9.98937e-05, gnorm=1.894, loss_scale=8, train_wall=11, gb_free=2.8, wall=164013 2021-06-20 16:12:30 | INFO | train_inner | epoch 005: 2360 / 3002 loss=2.524, ppl=5.75, wps=5888.9, ups=0.09, wpb=64823, bsz=128, num_updates=14284, lr=9.98937e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=164024 2021-06-20 16:12:41 | INFO | train_inner | epoch 005: 2361 / 3002 loss=2.615, ppl=6.13, wps=5816, ups=0.09, wpb=64876, bsz=128, num_updates=14285, lr=9.98937e-05, gnorm=2.209, loss_scale=8, train_wall=11, gb_free=2.8, wall=164036 2021-06-20 16:12:52 | INFO | train_inner | epoch 005: 2362 / 3002 loss=2.56, ppl=5.9, wps=5800.7, ups=0.09, wpb=64806, bsz=128, num_updates=14286, lr=9.98937e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=164047 2021-06-20 16:13:04 | INFO | train_inner | epoch 005: 2363 / 3002 loss=2.661, ppl=6.33, wps=5785.5, ups=0.09, wpb=64806, bsz=128, num_updates=14287, lr=9.98937e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=164058 2021-06-20 16:13:15 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-20 16:13:26 | INFO | train_inner | epoch 005: 2365 / 3002 loss=2.65, ppl=6.27, wps=2944, ups=0.05, wpb=64822, bsz=128, num_updates=14288, lr=9.98937e-05, gnorm=1.929, loss_scale=4, train_wall=21, gb_free=2.8, wall=164080 2021-06-20 16:13:37 | INFO | train_inner | epoch 005: 2366 / 3002 loss=2.546, ppl=5.84, wps=5740.2, ups=0.09, wpb=64783, bsz=128, num_updates=14289, lr=9.98937e-05, gnorm=1.938, loss_scale=4, train_wall=11, gb_free=2.8, wall=164091 2021-06-20 16:13:48 | INFO | train_inner | epoch 005: 2367 / 3002 loss=2.49, ppl=5.62, wps=5904.6, ups=0.09, wpb=64965, bsz=128, num_updates=14290, lr=9.98937e-05, gnorm=1.951, loss_scale=4, train_wall=11, gb_free=2.8, wall=164102 2021-06-20 16:13:59 | INFO | train_inner | epoch 005: 2368 / 3002 loss=2.469, ppl=5.54, wps=5932.4, ups=0.09, wpb=64791, bsz=128, num_updates=14291, lr=9.98937e-05, gnorm=2.023, loss_scale=4, train_wall=10, gb_free=2.8, wall=164113 2021-06-20 16:14:10 | INFO | train_inner | epoch 005: 2369 / 3002 loss=2.437, ppl=5.42, wps=5934.9, ups=0.09, wpb=64861, bsz=128, num_updates=14292, lr=9.98937e-05, gnorm=1.918, loss_scale=4, train_wall=10, gb_free=2.8, wall=164124 2021-06-20 16:14:21 | INFO | train_inner | epoch 005: 2370 / 3002 loss=2.576, ppl=5.96, wps=5832.5, ups=0.09, wpb=64957, bsz=128, num_updates=14293, lr=9.98936e-05, gnorm=2.039, loss_scale=4, train_wall=11, gb_free=2.8, wall=164135 2021-06-20 16:14:32 | INFO | train_inner | epoch 005: 2371 / 3002 loss=2.488, ppl=5.61, wps=5899, ups=0.09, wpb=64791, bsz=128, num_updates=14294, lr=9.98936e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=164146 2021-06-20 16:14:43 | INFO | train_inner | epoch 005: 2372 / 3002 loss=2.621, ppl=6.15, wps=5840.7, ups=0.09, wpb=64846, bsz=128, num_updates=14295, lr=9.98936e-05, gnorm=1.944, loss_scale=4, train_wall=11, gb_free=2.8, wall=164157 2021-06-20 16:14:54 | INFO | train_inner | epoch 005: 2373 / 3002 loss=2.613, ppl=6.12, wps=5839.5, ups=0.09, wpb=64848, bsz=128, num_updates=14296, lr=9.98936e-05, gnorm=1.936, loss_scale=4, train_wall=11, gb_free=2.8, wall=164168 2021-06-20 16:15:05 | INFO | train_inner | epoch 005: 2374 / 3002 loss=2.728, ppl=6.63, wps=5930.1, ups=0.09, wpb=64783, bsz=128, num_updates=14297, lr=9.98936e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=164179 2021-06-20 16:15:16 | INFO | train_inner | epoch 005: 2375 / 3002 loss=2.415, ppl=5.33, wps=5891.9, ups=0.09, wpb=64874, bsz=128, num_updates=14298, lr=9.98936e-05, gnorm=1.922, loss_scale=4, train_wall=11, gb_free=2.8, wall=164190 2021-06-20 16:15:27 | INFO | train_inner | epoch 005: 2376 / 3002 loss=2.425, ppl=5.37, wps=5830.4, ups=0.09, wpb=64905, bsz=128, num_updates=14299, lr=9.98936e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=164201 2021-06-20 16:15:38 | INFO | train_inner | epoch 005: 2377 / 3002 loss=2.443, ppl=5.44, wps=5925.8, ups=0.09, wpb=64879, bsz=128, num_updates=14300, lr=9.98936e-05, gnorm=1.972, loss_scale=4, train_wall=11, gb_free=2.8, wall=164212 2021-06-20 16:15:49 | INFO | train_inner | epoch 005: 2378 / 3002 loss=2.602, ppl=6.07, wps=5838.3, ups=0.09, wpb=64850, bsz=128, num_updates=14301, lr=9.98936e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=164223 2021-06-20 16:16:00 | INFO | train_inner | epoch 005: 2379 / 3002 loss=2.546, ppl=5.84, wps=5777.1, ups=0.09, wpb=64832, bsz=128, num_updates=14302, lr=9.98936e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=164235 2021-06-20 16:16:12 | INFO | train_inner | epoch 005: 2380 / 3002 loss=2.402, ppl=5.28, wps=5752.6, ups=0.09, wpb=64742, bsz=128, num_updates=14303, lr=9.98936e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=164246 2021-06-20 16:16:23 | INFO | train_inner | epoch 005: 2381 / 3002 loss=2.39, ppl=5.24, wps=5785.1, ups=0.09, wpb=64790, bsz=128, num_updates=14304, lr=9.98936e-05, gnorm=1.877, loss_scale=4, train_wall=11, gb_free=2.8, wall=164257 2021-06-20 16:16:34 | INFO | train_inner | epoch 005: 2382 / 3002 loss=2.507, ppl=5.68, wps=5944, ups=0.09, wpb=64784, bsz=128, num_updates=14305, lr=9.98936e-05, gnorm=1.901, loss_scale=4, train_wall=10, gb_free=2.8, wall=164268 2021-06-20 16:16:45 | INFO | train_inner | epoch 005: 2383 / 3002 loss=2.352, ppl=5.1, wps=5999.5, ups=0.09, wpb=64838, bsz=128, num_updates=14306, lr=9.98935e-05, gnorm=1.96, loss_scale=4, train_wall=10, gb_free=2.8, wall=164279 2021-06-20 16:16:56 | INFO | train_inner | epoch 005: 2384 / 3002 loss=2.27, ppl=4.82, wps=5808.2, ups=0.09, wpb=64851, bsz=128, num_updates=14307, lr=9.98935e-05, gnorm=1.885, loss_scale=4, train_wall=11, gb_free=2.8, wall=164290 2021-06-20 16:17:07 | INFO | train_inner | epoch 005: 2385 / 3002 loss=2.427, ppl=5.38, wps=5826.7, ups=0.09, wpb=64849, bsz=128, num_updates=14308, lr=9.98935e-05, gnorm=2.174, loss_scale=4, train_wall=11, gb_free=2.8, wall=164301 2021-06-20 16:17:18 | INFO | train_inner | epoch 005: 2386 / 3002 loss=2.612, ppl=6.11, wps=5827.9, ups=0.09, wpb=64803, bsz=128, num_updates=14309, lr=9.98935e-05, gnorm=2.011, loss_scale=4, train_wall=11, gb_free=2.8, wall=164312 2021-06-20 16:17:29 | INFO | train_inner | epoch 005: 2387 / 3002 loss=2.636, ppl=6.22, wps=5751.9, ups=0.09, wpb=64886, bsz=128, num_updates=14310, lr=9.98935e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=164324 2021-06-20 16:17:40 | INFO | train_inner | epoch 005: 2388 / 3002 loss=2.502, ppl=5.67, wps=5819.3, ups=0.09, wpb=64813, bsz=128, num_updates=14311, lr=9.98935e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=164335 2021-06-20 16:17:51 | INFO | train_inner | epoch 005: 2389 / 3002 loss=2.584, ppl=6, wps=5923, ups=0.09, wpb=64800, bsz=128, num_updates=14312, lr=9.98935e-05, gnorm=1.924, loss_scale=4, train_wall=10, gb_free=2.8, wall=164346 2021-06-20 16:18:02 | INFO | train_inner | epoch 005: 2390 / 3002 loss=2.462, ppl=5.51, wps=5822.6, ups=0.09, wpb=64885, bsz=128, num_updates=14313, lr=9.98935e-05, gnorm=1.953, loss_scale=4, train_wall=11, gb_free=2.8, wall=164357 2021-06-20 16:18:14 | INFO | train_inner | epoch 005: 2391 / 3002 loss=2.393, ppl=5.25, wps=5797.2, ups=0.09, wpb=64855, bsz=128, num_updates=14314, lr=9.98935e-05, gnorm=1.84, loss_scale=4, train_wall=11, gb_free=2.8, wall=164368 2021-06-20 16:18:25 | INFO | train_inner | epoch 005: 2392 / 3002 loss=2.509, ppl=5.69, wps=5939, ups=0.09, wpb=64842, bsz=128, num_updates=14315, lr=9.98935e-05, gnorm=1.906, loss_scale=4, train_wall=10, gb_free=2.8, wall=164379 2021-06-20 16:18:36 | INFO | train_inner | epoch 005: 2393 / 3002 loss=2.472, ppl=5.55, wps=5768.6, ups=0.09, wpb=64785, bsz=128, num_updates=14316, lr=9.98935e-05, gnorm=1.957, loss_scale=4, train_wall=11, gb_free=2.8, wall=164390 2021-06-20 16:18:47 | INFO | train_inner | epoch 005: 2394 / 3002 loss=2.367, ppl=5.16, wps=5825.5, ups=0.09, wpb=64832, bsz=128, num_updates=14317, lr=9.98935e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=164401 2021-06-20 16:18:58 | INFO | train_inner | epoch 005: 2395 / 3002 loss=2.471, ppl=5.55, wps=5818.8, ups=0.09, wpb=64825, bsz=128, num_updates=14318, lr=9.98934e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=164412 2021-06-20 16:19:09 | INFO | train_inner | epoch 005: 2396 / 3002 loss=2.52, ppl=5.74, wps=5893.4, ups=0.09, wpb=64883, bsz=128, num_updates=14319, lr=9.98934e-05, gnorm=1.906, loss_scale=4, train_wall=11, gb_free=2.8, wall=164423 2021-06-20 16:19:20 | INFO | train_inner | epoch 005: 2397 / 3002 loss=2.518, ppl=5.73, wps=5810, ups=0.09, wpb=64799, bsz=128, num_updates=14320, lr=9.98934e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=164435 2021-06-20 16:19:32 | INFO | train_inner | epoch 005: 2398 / 3002 loss=2.381, ppl=5.21, wps=5744.7, ups=0.09, wpb=64783, bsz=128, num_updates=14321, lr=9.98934e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=164446 2021-06-20 16:19:43 | INFO | train_inner | epoch 005: 2399 / 3002 loss=2.505, ppl=5.67, wps=5784.8, ups=0.09, wpb=64783, bsz=128, num_updates=14322, lr=9.98934e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=164457 2021-06-20 16:19:54 | INFO | train_inner | epoch 005: 2400 / 3002 loss=2.612, ppl=6.11, wps=5914.4, ups=0.09, wpb=64936, bsz=128, num_updates=14323, lr=9.98934e-05, gnorm=1.966, loss_scale=4, train_wall=11, gb_free=2.8, wall=164468 2021-06-20 16:20:05 | INFO | train_inner | epoch 005: 2401 / 3002 loss=2.525, ppl=5.75, wps=5916.6, ups=0.09, wpb=64805, bsz=128, num_updates=14324, lr=9.98934e-05, gnorm=3.706, loss_scale=4, train_wall=10, gb_free=2.8, wall=164479 2021-06-20 16:20:16 | INFO | train_inner | epoch 005: 2402 / 3002 loss=2.425, ppl=5.37, wps=5861.8, ups=0.09, wpb=64729, bsz=128, num_updates=14325, lr=9.98934e-05, gnorm=2.001, loss_scale=4, train_wall=11, gb_free=2.8, wall=164490 2021-06-20 16:20:27 | INFO | train_inner | epoch 005: 2403 / 3002 loss=2.538, ppl=5.81, wps=5886.8, ups=0.09, wpb=64791, bsz=128, num_updates=14326, lr=9.98934e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=164501 2021-06-20 16:20:38 | INFO | train_inner | epoch 005: 2404 / 3002 loss=2.525, ppl=5.75, wps=5787.8, ups=0.09, wpb=64829, bsz=128, num_updates=14327, lr=9.98934e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=164512 2021-06-20 16:20:49 | INFO | train_inner | epoch 005: 2405 / 3002 loss=2.525, ppl=5.76, wps=5793, ups=0.09, wpb=64858, bsz=128, num_updates=14328, lr=9.98934e-05, gnorm=1.986, loss_scale=4, train_wall=11, gb_free=2.8, wall=164523 2021-06-20 16:21:00 | INFO | train_inner | epoch 005: 2406 / 3002 loss=2.417, ppl=5.34, wps=5863.8, ups=0.09, wpb=64796, bsz=128, num_updates=14329, lr=9.98934e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=164534 2021-06-20 16:21:11 | INFO | train_inner | epoch 005: 2407 / 3002 loss=2.482, ppl=5.59, wps=5869.1, ups=0.09, wpb=64855, bsz=128, num_updates=14330, lr=9.98934e-05, gnorm=1.986, loss_scale=4, train_wall=11, gb_free=2.8, wall=164546 2021-06-20 16:21:22 | INFO | train_inner | epoch 005: 2408 / 3002 loss=2.601, ppl=6.07, wps=5898.1, ups=0.09, wpb=64796, bsz=128, num_updates=14331, lr=9.98933e-05, gnorm=1.895, loss_scale=4, train_wall=11, gb_free=2.8, wall=164557 2021-06-20 16:21:33 | INFO | train_inner | epoch 005: 2409 / 3002 loss=2.352, ppl=5.1, wps=5993.5, ups=0.09, wpb=64818, bsz=128, num_updates=14332, lr=9.98933e-05, gnorm=1.97, loss_scale=4, train_wall=10, gb_free=2.8, wall=164567 2021-06-20 16:21:44 | INFO | train_inner | epoch 005: 2410 / 3002 loss=2.534, ppl=5.79, wps=5895.4, ups=0.09, wpb=64907, bsz=128, num_updates=14333, lr=9.98933e-05, gnorm=1.915, loss_scale=4, train_wall=11, gb_free=2.8, wall=164578 2021-06-20 16:21:55 | INFO | train_inner | epoch 005: 2411 / 3002 loss=2.545, ppl=5.84, wps=5969.5, ups=0.09, wpb=64789, bsz=128, num_updates=14334, lr=9.98933e-05, gnorm=1.971, loss_scale=4, train_wall=10, gb_free=2.8, wall=164589 2021-06-20 16:22:06 | INFO | train_inner | epoch 005: 2412 / 3002 loss=2.56, ppl=5.9, wps=6000.7, ups=0.09, wpb=64816, bsz=128, num_updates=14335, lr=9.98933e-05, gnorm=1.954, loss_scale=4, train_wall=10, gb_free=2.8, wall=164600 2021-06-20 16:22:17 | INFO | train_inner | epoch 005: 2413 / 3002 loss=2.583, ppl=5.99, wps=5949, ups=0.09, wpb=64856, bsz=128, num_updates=14336, lr=9.98933e-05, gnorm=1.9, loss_scale=4, train_wall=10, gb_free=2.8, wall=164611 2021-06-20 16:22:28 | INFO | train_inner | epoch 005: 2414 / 3002 loss=2.684, ppl=6.43, wps=5796.1, ups=0.09, wpb=64798, bsz=128, num_updates=14337, lr=9.98933e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=164622 2021-06-20 16:22:39 | INFO | train_inner | epoch 005: 2415 / 3002 loss=2.6, ppl=6.06, wps=5888.9, ups=0.09, wpb=64877, bsz=128, num_updates=14338, lr=9.98933e-05, gnorm=1.975, loss_scale=4, train_wall=11, gb_free=2.8, wall=164633 2021-06-20 16:22:50 | INFO | train_inner | epoch 005: 2416 / 3002 loss=2.55, ppl=5.86, wps=5771.2, ups=0.09, wpb=64772, bsz=128, num_updates=14339, lr=9.98933e-05, gnorm=2.083, loss_scale=4, train_wall=11, gb_free=2.8, wall=164644 2021-06-20 16:23:01 | INFO | train_inner | epoch 005: 2417 / 3002 loss=2.576, ppl=5.96, wps=5686.6, ups=0.09, wpb=64768, bsz=128, num_updates=14340, lr=9.98933e-05, gnorm=1.986, loss_scale=4, train_wall=11, gb_free=2.8, wall=164656 2021-06-20 16:23:13 | INFO | train_inner | epoch 005: 2418 / 3002 loss=2.576, ppl=5.96, wps=5815.6, ups=0.09, wpb=64896, bsz=128, num_updates=14341, lr=9.98933e-05, gnorm=1.919, loss_scale=4, train_wall=11, gb_free=2.8, wall=164667 2021-06-20 16:23:24 | INFO | train_inner | epoch 005: 2419 / 3002 loss=2.549, ppl=5.85, wps=5896.4, ups=0.09, wpb=64834, bsz=128, num_updates=14342, lr=9.98933e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=164678 2021-06-20 16:23:34 | INFO | train_inner | epoch 005: 2420 / 3002 loss=2.334, ppl=5.04, wps=5941.3, ups=0.09, wpb=64824, bsz=128, num_updates=14343, lr=9.98932e-05, gnorm=1.885, loss_scale=4, train_wall=10, gb_free=2.8, wall=164689 2021-06-20 16:23:46 | INFO | train_inner | epoch 005: 2421 / 3002 loss=2.348, ppl=5.09, wps=5833.5, ups=0.09, wpb=64879, bsz=128, num_updates=14344, lr=9.98932e-05, gnorm=1.935, loss_scale=4, train_wall=11, gb_free=2.8, wall=164700 2021-06-20 16:23:57 | INFO | train_inner | epoch 005: 2422 / 3002 loss=2.657, ppl=6.31, wps=5849.6, ups=0.09, wpb=64810, bsz=128, num_updates=14345, lr=9.98932e-05, gnorm=2.122, loss_scale=4, train_wall=11, gb_free=2.8, wall=164711 2021-06-20 16:24:08 | INFO | train_inner | epoch 005: 2423 / 3002 loss=2.571, ppl=5.94, wps=5927.6, ups=0.09, wpb=64880, bsz=128, num_updates=14346, lr=9.98932e-05, gnorm=2.033, loss_scale=4, train_wall=10, gb_free=2.8, wall=164722 2021-06-20 16:24:19 | INFO | train_inner | epoch 005: 2424 / 3002 loss=2.434, ppl=5.4, wps=5802.6, ups=0.09, wpb=64820, bsz=128, num_updates=14347, lr=9.98932e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=164733 2021-06-20 16:24:30 | INFO | train_inner | epoch 005: 2425 / 3002 loss=2.394, ppl=5.25, wps=5835.5, ups=0.09, wpb=64922, bsz=128, num_updates=14348, lr=9.98932e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=164744 2021-06-20 16:24:41 | INFO | train_inner | epoch 005: 2426 / 3002 loss=2.507, ppl=5.69, wps=5727.7, ups=0.09, wpb=64802, bsz=128, num_updates=14349, lr=9.98932e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=164756 2021-06-20 16:24:52 | INFO | train_inner | epoch 005: 2427 / 3002 loss=2.5, ppl=5.66, wps=5813.9, ups=0.09, wpb=64812, bsz=128, num_updates=14350, lr=9.98932e-05, gnorm=1.969, loss_scale=4, train_wall=11, gb_free=2.8, wall=164767 2021-06-20 16:25:04 | INFO | train_inner | epoch 005: 2428 / 3002 loss=2.488, ppl=5.61, wps=5714.8, ups=0.09, wpb=64814, bsz=128, num_updates=14351, lr=9.98932e-05, gnorm=2.007, loss_scale=4, train_wall=11, gb_free=2.8, wall=164778 2021-06-20 16:25:15 | INFO | train_inner | epoch 005: 2429 / 3002 loss=2.361, ppl=5.14, wps=5845.2, ups=0.09, wpb=64827, bsz=128, num_updates=14352, lr=9.98932e-05, gnorm=1.944, loss_scale=4, train_wall=11, gb_free=2.8, wall=164789 2021-06-20 16:25:26 | INFO | train_inner | epoch 005: 2430 / 3002 loss=2.445, ppl=5.44, wps=5820.7, ups=0.09, wpb=64747, bsz=128, num_updates=14353, lr=9.98932e-05, gnorm=1.936, loss_scale=4, train_wall=11, gb_free=2.8, wall=164800 2021-06-20 16:25:37 | INFO | train_inner | epoch 005: 2431 / 3002 loss=2.567, ppl=5.93, wps=5910, ups=0.09, wpb=64958, bsz=128, num_updates=14354, lr=9.98932e-05, gnorm=2.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=164811 2021-06-20 16:25:48 | INFO | train_inner | epoch 005: 2432 / 3002 loss=2.5, ppl=5.66, wps=5800.1, ups=0.09, wpb=64833, bsz=128, num_updates=14355, lr=9.98932e-05, gnorm=2.111, loss_scale=4, train_wall=11, gb_free=2.8, wall=164822 2021-06-20 16:25:59 | INFO | train_inner | epoch 005: 2433 / 3002 loss=2.429, ppl=5.39, wps=5851.2, ups=0.09, wpb=64838, bsz=128, num_updates=14356, lr=9.98931e-05, gnorm=1.909, loss_scale=4, train_wall=11, gb_free=2.8, wall=164833 2021-06-20 16:26:10 | INFO | train_inner | epoch 005: 2434 / 3002 loss=2.477, ppl=5.57, wps=5947.4, ups=0.09, wpb=64758, bsz=128, num_updates=14357, lr=9.98931e-05, gnorm=1.938, loss_scale=4, train_wall=10, gb_free=2.8, wall=164844 2021-06-20 16:26:21 | INFO | train_inner | epoch 005: 2435 / 3002 loss=2.577, ppl=5.97, wps=5807.4, ups=0.09, wpb=64712, bsz=128, num_updates=14358, lr=9.98931e-05, gnorm=2.073, loss_scale=4, train_wall=11, gb_free=2.8, wall=164856 2021-06-20 16:26:32 | INFO | train_inner | epoch 005: 2436 / 3002 loss=2.546, ppl=5.84, wps=5889, ups=0.09, wpb=64764, bsz=128, num_updates=14359, lr=9.98931e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=164867 2021-06-20 16:26:43 | INFO | train_inner | epoch 005: 2437 / 3002 loss=2.398, ppl=5.27, wps=5847.6, ups=0.09, wpb=64833, bsz=128, num_updates=14360, lr=9.98931e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=164878 2021-06-20 16:26:54 | INFO | train_inner | epoch 005: 2438 / 3002 loss=2.371, ppl=5.17, wps=5996, ups=0.09, wpb=64951, bsz=128, num_updates=14361, lr=9.98931e-05, gnorm=2.05, loss_scale=4, train_wall=10, gb_free=2.8, wall=164888 2021-06-20 16:27:05 | INFO | train_inner | epoch 005: 2439 / 3002 loss=2.484, ppl=5.59, wps=5703.4, ups=0.09, wpb=64784, bsz=128, num_updates=14362, lr=9.98931e-05, gnorm=1.977, loss_scale=4, train_wall=11, gb_free=2.8, wall=164900 2021-06-20 16:27:16 | INFO | train_inner | epoch 005: 2440 / 3002 loss=2.256, ppl=4.78, wps=5915, ups=0.09, wpb=64802, bsz=128, num_updates=14363, lr=9.98931e-05, gnorm=2.01, loss_scale=4, train_wall=11, gb_free=2.8, wall=164911 2021-06-20 16:27:28 | INFO | train_inner | epoch 005: 2441 / 3002 loss=2.546, ppl=5.84, wps=5798, ups=0.09, wpb=64812, bsz=128, num_updates=14364, lr=9.98931e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=164922 2021-06-20 16:27:39 | INFO | train_inner | epoch 005: 2442 / 3002 loss=2.561, ppl=5.9, wps=5833.2, ups=0.09, wpb=64865, bsz=128, num_updates=14365, lr=9.98931e-05, gnorm=1.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=164933 2021-06-20 16:27:50 | INFO | train_inner | epoch 005: 2443 / 3002 loss=2.675, ppl=6.39, wps=5814.1, ups=0.09, wpb=64812, bsz=128, num_updates=14366, lr=9.98931e-05, gnorm=2.026, loss_scale=4, train_wall=11, gb_free=2.8, wall=164944 2021-06-20 16:28:01 | INFO | train_inner | epoch 005: 2444 / 3002 loss=2.562, ppl=5.91, wps=5948.8, ups=0.09, wpb=64757, bsz=128, num_updates=14367, lr=9.98931e-05, gnorm=2.039, loss_scale=4, train_wall=10, gb_free=2.8, wall=164955 2021-06-20 16:28:12 | INFO | train_inner | epoch 005: 2445 / 3002 loss=2.397, ppl=5.27, wps=5777, ups=0.09, wpb=64767, bsz=128, num_updates=14368, lr=9.9893e-05, gnorm=2.083, loss_scale=4, train_wall=11, gb_free=2.8, wall=164966 2021-06-20 16:28:23 | INFO | train_inner | epoch 005: 2446 / 3002 loss=2.58, ppl=5.98, wps=5829.4, ups=0.09, wpb=64909, bsz=128, num_updates=14369, lr=9.9893e-05, gnorm=2.53, loss_scale=4, train_wall=11, gb_free=2.8, wall=164977 2021-06-20 16:28:34 | INFO | train_inner | epoch 005: 2447 / 3002 loss=2.416, ppl=5.34, wps=5865.3, ups=0.09, wpb=64925, bsz=128, num_updates=14370, lr=9.9893e-05, gnorm=1.983, loss_scale=4, train_wall=11, gb_free=2.8, wall=164988 2021-06-20 16:28:45 | INFO | train_inner | epoch 005: 2448 / 3002 loss=2.416, ppl=5.34, wps=5749.8, ups=0.09, wpb=64737, bsz=128, num_updates=14371, lr=9.9893e-05, gnorm=2.007, loss_scale=4, train_wall=11, gb_free=2.8, wall=165000 2021-06-20 16:28:57 | INFO | train_inner | epoch 005: 2449 / 3002 loss=2.447, ppl=5.45, wps=5787.9, ups=0.09, wpb=64797, bsz=128, num_updates=14372, lr=9.9893e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=165011 2021-06-20 16:29:08 | INFO | train_inner | epoch 005: 2450 / 3002 loss=2.607, ppl=6.09, wps=5763.4, ups=0.09, wpb=64830, bsz=128, num_updates=14373, lr=9.9893e-05, gnorm=34.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=165022 2021-06-20 16:29:19 | INFO | train_inner | epoch 005: 2451 / 3002 loss=2.635, ppl=6.21, wps=5878, ups=0.09, wpb=64834, bsz=128, num_updates=14374, lr=9.9893e-05, gnorm=2.022, loss_scale=4, train_wall=11, gb_free=2.8, wall=165033 2021-06-20 16:29:30 | INFO | train_inner | epoch 005: 2452 / 3002 loss=2.515, ppl=5.72, wps=5811.8, ups=0.09, wpb=64825, bsz=128, num_updates=14375, lr=9.9893e-05, gnorm=2, loss_scale=4, train_wall=11, gb_free=2.8, wall=165044 2021-06-20 16:29:41 | INFO | train_inner | epoch 005: 2453 / 3002 loss=2.645, ppl=6.26, wps=5985.6, ups=0.09, wpb=64843, bsz=128, num_updates=14376, lr=9.9893e-05, gnorm=2.061, loss_scale=4, train_wall=10, gb_free=2.8, wall=165055 2021-06-20 16:29:52 | INFO | train_inner | epoch 005: 2454 / 3002 loss=2.51, ppl=5.7, wps=5840.8, ups=0.09, wpb=64874, bsz=128, num_updates=14377, lr=9.9893e-05, gnorm=2.015, loss_scale=4, train_wall=11, gb_free=2.8, wall=165066 2021-06-20 16:30:03 | INFO | train_inner | epoch 005: 2455 / 3002 loss=2.471, ppl=5.55, wps=5865.6, ups=0.09, wpb=64838, bsz=128, num_updates=14378, lr=9.9893e-05, gnorm=2.057, loss_scale=4, train_wall=11, gb_free=2.8, wall=165077 2021-06-20 16:30:14 | INFO | train_inner | epoch 005: 2456 / 3002 loss=2.55, ppl=5.86, wps=5803.3, ups=0.09, wpb=64769, bsz=128, num_updates=14379, lr=9.9893e-05, gnorm=2.156, loss_scale=4, train_wall=11, gb_free=2.8, wall=165089 2021-06-20 16:30:25 | INFO | train_inner | epoch 005: 2457 / 3002 loss=2.476, ppl=5.56, wps=5831.5, ups=0.09, wpb=64826, bsz=128, num_updates=14380, lr=9.9893e-05, gnorm=2.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=165100 2021-06-20 16:30:36 | INFO | train_inner | epoch 005: 2458 / 3002 loss=2.639, ppl=6.23, wps=5847.7, ups=0.09, wpb=64824, bsz=128, num_updates=14381, lr=9.98929e-05, gnorm=2.09, loss_scale=4, train_wall=11, gb_free=2.8, wall=165111 2021-06-20 16:30:48 | INFO | train_inner | epoch 005: 2459 / 3002 loss=2.508, ppl=5.69, wps=5848.6, ups=0.09, wpb=64923, bsz=128, num_updates=14382, lr=9.98929e-05, gnorm=2.057, loss_scale=4, train_wall=11, gb_free=2.8, wall=165122 2021-06-20 16:30:59 | INFO | train_inner | epoch 005: 2460 / 3002 loss=2.444, ppl=5.44, wps=5854.5, ups=0.09, wpb=64894, bsz=128, num_updates=14383, lr=9.98929e-05, gnorm=2.116, loss_scale=4, train_wall=11, gb_free=2.8, wall=165133 2021-06-20 16:31:10 | INFO | train_inner | epoch 005: 2461 / 3002 loss=2.485, ppl=5.6, wps=5886.5, ups=0.09, wpb=64767, bsz=128, num_updates=14384, lr=9.98929e-05, gnorm=2.106, loss_scale=4, train_wall=11, gb_free=2.8, wall=165144 2021-06-20 16:31:21 | INFO | train_inner | epoch 005: 2462 / 3002 loss=2.504, ppl=5.67, wps=5824.5, ups=0.09, wpb=64636, bsz=128, num_updates=14385, lr=9.98929e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=165155 2021-06-20 16:31:32 | INFO | train_inner | epoch 005: 2463 / 3002 loss=2.59, ppl=6.02, wps=5932.8, ups=0.09, wpb=64968, bsz=128, num_updates=14386, lr=9.98929e-05, gnorm=2.079, loss_scale=4, train_wall=11, gb_free=2.8, wall=165166 2021-06-20 16:31:43 | INFO | train_inner | epoch 005: 2464 / 3002 loss=2.474, ppl=5.56, wps=5861.9, ups=0.09, wpb=64789, bsz=128, num_updates=14387, lr=9.98929e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=165177 2021-06-20 16:31:54 | INFO | train_inner | epoch 005: 2465 / 3002 loss=2.578, ppl=5.97, wps=5911.2, ups=0.09, wpb=64778, bsz=128, num_updates=14388, lr=9.98929e-05, gnorm=2.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=165188 2021-06-20 16:32:05 | INFO | train_inner | epoch 005: 2466 / 3002 loss=2.441, ppl=5.43, wps=5783.7, ups=0.09, wpb=64848, bsz=128, num_updates=14389, lr=9.98929e-05, gnorm=2.04, loss_scale=4, train_wall=11, gb_free=2.8, wall=165199 2021-06-20 16:32:16 | INFO | train_inner | epoch 005: 2467 / 3002 loss=2.628, ppl=6.18, wps=5876.1, ups=0.09, wpb=64811, bsz=128, num_updates=14390, lr=9.98929e-05, gnorm=1.99, loss_scale=4, train_wall=11, gb_free=2.8, wall=165210 2021-06-20 16:32:27 | INFO | train_inner | epoch 005: 2468 / 3002 loss=2.589, ppl=6.02, wps=5784.2, ups=0.09, wpb=64892, bsz=128, num_updates=14391, lr=9.98929e-05, gnorm=2.026, loss_scale=4, train_wall=11, gb_free=2.8, wall=165221 2021-06-20 16:32:38 | INFO | train_inner | epoch 005: 2469 / 3002 loss=2.578, ppl=5.97, wps=5834.9, ups=0.09, wpb=64825, bsz=128, num_updates=14392, lr=9.98929e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=165233 2021-06-20 16:32:49 | INFO | train_inner | epoch 005: 2470 / 3002 loss=2.457, ppl=5.49, wps=5767.5, ups=0.09, wpb=64825, bsz=128, num_updates=14393, lr=9.98928e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=165244 2021-06-20 16:33:00 | INFO | train_inner | epoch 005: 2471 / 3002 loss=2.658, ppl=6.31, wps=5903, ups=0.09, wpb=64874, bsz=128, num_updates=14394, lr=9.98928e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=165255 2021-06-20 16:33:11 | INFO | train_inner | epoch 005: 2472 / 3002 loss=2.41, ppl=5.31, wps=5953.3, ups=0.09, wpb=64827, bsz=128, num_updates=14395, lr=9.98928e-05, gnorm=1.964, loss_scale=4, train_wall=10, gb_free=2.8, wall=165266 2021-06-20 16:33:22 | INFO | train_inner | epoch 005: 2473 / 3002 loss=2.532, ppl=5.78, wps=5863.4, ups=0.09, wpb=64870, bsz=128, num_updates=14396, lr=9.98928e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=165277 2021-06-20 16:33:34 | INFO | train_inner | epoch 005: 2474 / 3002 loss=2.54, ppl=5.82, wps=5846.5, ups=0.09, wpb=64821, bsz=128, num_updates=14397, lr=9.98928e-05, gnorm=2.062, loss_scale=4, train_wall=11, gb_free=2.8, wall=165288 2021-06-20 16:33:45 | INFO | train_inner | epoch 005: 2475 / 3002 loss=2.48, ppl=5.58, wps=5827.4, ups=0.09, wpb=64850, bsz=128, num_updates=14398, lr=9.98928e-05, gnorm=1.916, loss_scale=4, train_wall=11, gb_free=2.8, wall=165299 2021-06-20 16:33:56 | INFO | train_inner | epoch 005: 2476 / 3002 loss=2.563, ppl=5.91, wps=5835.7, ups=0.09, wpb=64879, bsz=128, num_updates=14399, lr=9.98928e-05, gnorm=1.892, loss_scale=4, train_wall=11, gb_free=2.8, wall=165310 2021-06-20 16:34:07 | INFO | train_inner | epoch 005: 2477 / 3002 loss=2.609, ppl=6.1, wps=5967.3, ups=0.09, wpb=64873, bsz=128, num_updates=14400, lr=9.98928e-05, gnorm=1.973, loss_scale=4, train_wall=10, gb_free=2.8, wall=165321 2021-06-20 16:34:18 | INFO | train_inner | epoch 005: 2478 / 3002 loss=2.536, ppl=5.8, wps=5860, ups=0.09, wpb=64771, bsz=128, num_updates=14401, lr=9.98928e-05, gnorm=2.079, loss_scale=4, train_wall=11, gb_free=2.8, wall=165332 2021-06-20 16:34:29 | INFO | train_inner | epoch 005: 2479 / 3002 loss=2.437, ppl=5.42, wps=5891.7, ups=0.09, wpb=64871, bsz=128, num_updates=14402, lr=9.98928e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=165343 2021-06-20 16:34:40 | INFO | train_inner | epoch 005: 2480 / 3002 loss=2.493, ppl=5.63, wps=5969.2, ups=0.09, wpb=64786, bsz=128, num_updates=14403, lr=9.98928e-05, gnorm=1.969, loss_scale=4, train_wall=10, gb_free=2.8, wall=165354 2021-06-20 16:34:51 | INFO | train_inner | epoch 005: 2481 / 3002 loss=2.506, ppl=5.68, wps=5840.7, ups=0.09, wpb=64848, bsz=128, num_updates=14404, lr=9.98928e-05, gnorm=1.881, loss_scale=4, train_wall=11, gb_free=2.8, wall=165365 2021-06-20 16:35:02 | INFO | train_inner | epoch 005: 2482 / 3002 loss=2.328, ppl=5.02, wps=5928.5, ups=0.09, wpb=64841, bsz=128, num_updates=14405, lr=9.98928e-05, gnorm=1.888, loss_scale=4, train_wall=11, gb_free=2.8, wall=165376 2021-06-20 16:35:13 | INFO | train_inner | epoch 005: 2483 / 3002 loss=2.519, ppl=5.73, wps=5890.5, ups=0.09, wpb=64889, bsz=128, num_updates=14406, lr=9.98927e-05, gnorm=1.932, loss_scale=4, train_wall=11, gb_free=2.8, wall=165387 2021-06-20 16:35:24 | INFO | train_inner | epoch 005: 2484 / 3002 loss=2.517, ppl=5.72, wps=5788.1, ups=0.09, wpb=64797, bsz=128, num_updates=14407, lr=9.98927e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=165398 2021-06-20 16:35:35 | INFO | train_inner | epoch 005: 2485 / 3002 loss=2.656, ppl=6.3, wps=5832.8, ups=0.09, wpb=64803, bsz=128, num_updates=14408, lr=9.98927e-05, gnorm=1.948, loss_scale=4, train_wall=11, gb_free=2.8, wall=165409 2021-06-20 16:35:46 | INFO | train_inner | epoch 005: 2486 / 3002 loss=2.544, ppl=5.83, wps=5846.1, ups=0.09, wpb=64735, bsz=128, num_updates=14409, lr=9.98927e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=165420 2021-06-20 16:35:57 | INFO | train_inner | epoch 005: 2487 / 3002 loss=2.439, ppl=5.42, wps=5910.1, ups=0.09, wpb=64781, bsz=128, num_updates=14410, lr=9.98927e-05, gnorm=1.95, loss_scale=4, train_wall=11, gb_free=2.8, wall=165431 2021-06-20 16:36:08 | INFO | train_inner | epoch 005: 2488 / 3002 loss=2.597, ppl=6.05, wps=5727.9, ups=0.09, wpb=64848, bsz=128, num_updates=14411, lr=9.98927e-05, gnorm=1.936, loss_scale=4, train_wall=11, gb_free=2.8, wall=165443 2021-06-20 16:36:19 | INFO | train_inner | epoch 005: 2489 / 3002 loss=2.52, ppl=5.73, wps=5866.8, ups=0.09, wpb=64774, bsz=128, num_updates=14412, lr=9.98927e-05, gnorm=2.035, loss_scale=4, train_wall=11, gb_free=2.8, wall=165454 2021-06-20 16:36:30 | INFO | train_inner | epoch 005: 2490 / 3002 loss=2.605, ppl=6.08, wps=5821.2, ups=0.09, wpb=64811, bsz=128, num_updates=14413, lr=9.98927e-05, gnorm=1.969, loss_scale=4, train_wall=11, gb_free=2.8, wall=165465 2021-06-20 16:36:42 | INFO | train_inner | epoch 005: 2491 / 3002 loss=2.406, ppl=5.3, wps=5820.6, ups=0.09, wpb=64776, bsz=128, num_updates=14414, lr=9.98927e-05, gnorm=2.33, loss_scale=4, train_wall=11, gb_free=2.8, wall=165476 2021-06-20 16:36:53 | INFO | train_inner | epoch 005: 2492 / 3002 loss=2.471, ppl=5.54, wps=5777, ups=0.09, wpb=64809, bsz=128, num_updates=14415, lr=9.98927e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=165487 2021-06-20 16:37:04 | INFO | train_inner | epoch 005: 2493 / 3002 loss=2.561, ppl=5.9, wps=5716.7, ups=0.09, wpb=64824, bsz=128, num_updates=14416, lr=9.98927e-05, gnorm=2.057, loss_scale=8, train_wall=11, gb_free=2.8, wall=165498 2021-06-20 16:37:15 | INFO | train_inner | epoch 005: 2494 / 3002 loss=2.342, ppl=5.07, wps=5796.8, ups=0.09, wpb=64850, bsz=128, num_updates=14417, lr=9.98927e-05, gnorm=2.039, loss_scale=8, train_wall=11, gb_free=2.8, wall=165510 2021-06-20 16:37:27 | INFO | train_inner | epoch 005: 2495 / 3002 loss=2.659, ppl=6.32, wps=5781.2, ups=0.09, wpb=64800, bsz=128, num_updates=14418, lr=9.98926e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=165521 2021-06-20 16:37:38 | INFO | train_inner | epoch 005: 2496 / 3002 loss=2.523, ppl=5.75, wps=5849, ups=0.09, wpb=64874, bsz=128, num_updates=14419, lr=9.98926e-05, gnorm=1.977, loss_scale=8, train_wall=11, gb_free=2.8, wall=165532 2021-06-20 16:37:49 | INFO | train_inner | epoch 005: 2497 / 3002 loss=2.669, ppl=6.36, wps=5772.1, ups=0.09, wpb=64855, bsz=128, num_updates=14420, lr=9.98926e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=165543 2021-06-20 16:38:00 | INFO | train_inner | epoch 005: 2498 / 3002 loss=2.526, ppl=5.76, wps=5953, ups=0.09, wpb=64796, bsz=128, num_updates=14421, lr=9.98926e-05, gnorm=1.914, loss_scale=8, train_wall=10, gb_free=2.8, wall=165554 2021-06-20 16:38:11 | INFO | train_inner | epoch 005: 2499 / 3002 loss=2.454, ppl=5.48, wps=5837.7, ups=0.09, wpb=64857, bsz=128, num_updates=14422, lr=9.98926e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=165565 2021-06-20 16:38:22 | INFO | train_inner | epoch 005: 2500 / 3002 loss=2.406, ppl=5.3, wps=5773.3, ups=0.09, wpb=64887, bsz=128, num_updates=14423, lr=9.98926e-05, gnorm=1.872, loss_scale=8, train_wall=11, gb_free=2.8, wall=165576 2021-06-20 16:38:33 | INFO | train_inner | epoch 005: 2501 / 3002 loss=2.557, ppl=5.89, wps=5897.5, ups=0.09, wpb=64812, bsz=128, num_updates=14424, lr=9.98926e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=165587 2021-06-20 16:38:44 | INFO | train_inner | epoch 005: 2502 / 3002 loss=2.536, ppl=5.8, wps=5798.6, ups=0.09, wpb=64889, bsz=128, num_updates=14425, lr=9.98926e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=165599 2021-06-20 16:38:55 | INFO | train_inner | epoch 005: 2503 / 3002 loss=2.475, ppl=5.56, wps=5898.1, ups=0.09, wpb=64879, bsz=128, num_updates=14426, lr=9.98926e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=165610 2021-06-20 16:39:06 | INFO | train_inner | epoch 005: 2504 / 3002 loss=2.422, ppl=5.36, wps=5796.6, ups=0.09, wpb=64852, bsz=128, num_updates=14427, lr=9.98926e-05, gnorm=1.985, loss_scale=8, train_wall=11, gb_free=2.8, wall=165621 2021-06-20 16:39:18 | INFO | train_inner | epoch 005: 2505 / 3002 loss=2.388, ppl=5.23, wps=5766.1, ups=0.09, wpb=64770, bsz=128, num_updates=14428, lr=9.98926e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=165632 2021-06-20 16:39:29 | INFO | train_inner | epoch 005: 2506 / 3002 loss=2.543, ppl=5.83, wps=5770.1, ups=0.09, wpb=64743, bsz=128, num_updates=14429, lr=9.98926e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=165643 2021-06-20 16:39:40 | INFO | train_inner | epoch 005: 2507 / 3002 loss=2.543, ppl=5.83, wps=5807.8, ups=0.09, wpb=64754, bsz=128, num_updates=14430, lr=9.98926e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=165654 2021-06-20 16:39:51 | INFO | train_inner | epoch 005: 2508 / 3002 loss=2.494, ppl=5.63, wps=5803.4, ups=0.09, wpb=64825, bsz=128, num_updates=14431, lr=9.98925e-05, gnorm=1.87, loss_scale=8, train_wall=11, gb_free=2.8, wall=165666 2021-06-20 16:40:02 | INFO | train_inner | epoch 005: 2509 / 3002 loss=2.502, ppl=5.67, wps=6003.9, ups=0.09, wpb=64892, bsz=128, num_updates=14432, lr=9.98925e-05, gnorm=1.887, loss_scale=8, train_wall=10, gb_free=2.8, wall=165676 2021-06-20 16:40:13 | INFO | train_inner | epoch 005: 2510 / 3002 loss=2.46, ppl=5.5, wps=5832.5, ups=0.09, wpb=64891, bsz=128, num_updates=14433, lr=9.98925e-05, gnorm=1.994, loss_scale=8, train_wall=11, gb_free=2.8, wall=165687 2021-06-20 16:40:24 | INFO | train_inner | epoch 005: 2511 / 3002 loss=2.548, ppl=5.85, wps=5839.6, ups=0.09, wpb=64882, bsz=128, num_updates=14434, lr=9.98925e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=165699 2021-06-20 16:40:35 | INFO | train_inner | epoch 005: 2512 / 3002 loss=2.641, ppl=6.24, wps=5925.8, ups=0.09, wpb=64835, bsz=128, num_updates=14435, lr=9.98925e-05, gnorm=1.97, loss_scale=8, train_wall=10, gb_free=2.8, wall=165710 2021-06-20 16:40:46 | INFO | train_inner | epoch 005: 2513 / 3002 loss=2.473, ppl=5.55, wps=5873.4, ups=0.09, wpb=64859, bsz=128, num_updates=14436, lr=9.98925e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=165721 2021-06-20 16:40:57 | INFO | train_inner | epoch 005: 2514 / 3002 loss=2.432, ppl=5.4, wps=5794.3, ups=0.09, wpb=64805, bsz=128, num_updates=14437, lr=9.98925e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=165732 2021-06-20 16:41:09 | INFO | train_inner | epoch 005: 2515 / 3002 loss=2.437, ppl=5.41, wps=5826.1, ups=0.09, wpb=64894, bsz=128, num_updates=14438, lr=9.98925e-05, gnorm=1.934, loss_scale=8, train_wall=11, gb_free=2.8, wall=165743 2021-06-20 16:41:20 | INFO | train_inner | epoch 005: 2516 / 3002 loss=2.553, ppl=5.87, wps=5845.6, ups=0.09, wpb=64832, bsz=128, num_updates=14439, lr=9.98925e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=165754 2021-06-20 16:41:31 | INFO | train_inner | epoch 005: 2517 / 3002 loss=2.36, ppl=5.13, wps=5922.9, ups=0.09, wpb=64868, bsz=128, num_updates=14440, lr=9.98925e-05, gnorm=2.017, loss_scale=8, train_wall=10, gb_free=2.8, wall=165765 2021-06-20 16:41:42 | INFO | train_inner | epoch 005: 2518 / 3002 loss=2.557, ppl=5.88, wps=5806.9, ups=0.09, wpb=64841, bsz=128, num_updates=14441, lr=9.98925e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=165776 2021-06-20 16:41:53 | INFO | train_inner | epoch 005: 2519 / 3002 loss=2.417, ppl=5.34, wps=5842.1, ups=0.09, wpb=64891, bsz=128, num_updates=14442, lr=9.98925e-05, gnorm=1.886, loss_scale=8, train_wall=11, gb_free=2.8, wall=165787 2021-06-20 16:42:04 | INFO | train_inner | epoch 005: 2520 / 3002 loss=2.691, ppl=6.46, wps=5909.5, ups=0.09, wpb=64850, bsz=128, num_updates=14443, lr=9.98924e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=165798 2021-06-20 16:42:15 | INFO | train_inner | epoch 005: 2521 / 3002 loss=2.662, ppl=6.33, wps=5809.9, ups=0.09, wpb=64805, bsz=128, num_updates=14444, lr=9.98924e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=165809 2021-06-20 16:42:26 | INFO | train_inner | epoch 005: 2522 / 3002 loss=2.472, ppl=5.55, wps=5942.6, ups=0.09, wpb=64834, bsz=128, num_updates=14445, lr=9.98924e-05, gnorm=1.941, loss_scale=8, train_wall=10, gb_free=2.8, wall=165820 2021-06-20 16:42:37 | INFO | train_inner | epoch 005: 2523 / 3002 loss=2.5, ppl=5.66, wps=5822.7, ups=0.09, wpb=64844, bsz=128, num_updates=14446, lr=9.98924e-05, gnorm=1.924, loss_scale=8, train_wall=11, gb_free=2.8, wall=165831 2021-06-20 16:42:48 | INFO | train_inner | epoch 005: 2524 / 3002 loss=2.817, ppl=7.05, wps=5815.6, ups=0.09, wpb=64754, bsz=128, num_updates=14447, lr=9.98924e-05, gnorm=2.273, loss_scale=8, train_wall=11, gb_free=2.8, wall=165843 2021-06-20 16:42:59 | INFO | train_inner | epoch 005: 2525 / 3002 loss=2.514, ppl=5.71, wps=5828.2, ups=0.09, wpb=64842, bsz=128, num_updates=14448, lr=9.98924e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=165854 2021-06-20 16:43:11 | INFO | train_inner | epoch 005: 2526 / 3002 loss=2.511, ppl=5.7, wps=5780.4, ups=0.09, wpb=64854, bsz=128, num_updates=14449, lr=9.98924e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=165865 2021-06-20 16:43:22 | INFO | train_inner | epoch 005: 2527 / 3002 loss=2.36, ppl=5.13, wps=5873.3, ups=0.09, wpb=64793, bsz=128, num_updates=14450, lr=9.98924e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=165876 2021-06-20 16:43:32 | INFO | train_inner | epoch 005: 2528 / 3002 loss=2.497, ppl=5.65, wps=5955.6, ups=0.09, wpb=64770, bsz=128, num_updates=14451, lr=9.98924e-05, gnorm=2.021, loss_scale=8, train_wall=10, gb_free=2.8, wall=165887 2021-06-20 16:43:44 | INFO | train_inner | epoch 005: 2529 / 3002 loss=2.503, ppl=5.67, wps=5815.8, ups=0.09, wpb=64892, bsz=128, num_updates=14452, lr=9.98924e-05, gnorm=1.985, loss_scale=8, train_wall=11, gb_free=2.8, wall=165898 2021-06-20 16:43:55 | INFO | train_inner | epoch 005: 2530 / 3002 loss=2.384, ppl=5.22, wps=5780.3, ups=0.09, wpb=64836, bsz=128, num_updates=14453, lr=9.98924e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=165909 2021-06-20 16:44:06 | INFO | train_inner | epoch 005: 2531 / 3002 loss=2.497, ppl=5.64, wps=5914, ups=0.09, wpb=64799, bsz=128, num_updates=14454, lr=9.98924e-05, gnorm=1.965, loss_scale=8, train_wall=10, gb_free=2.8, wall=165920 2021-06-20 16:44:17 | INFO | train_inner | epoch 005: 2532 / 3002 loss=2.546, ppl=5.84, wps=5785.3, ups=0.09, wpb=64875, bsz=128, num_updates=14455, lr=9.98924e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=165931 2021-06-20 16:44:28 | INFO | train_inner | epoch 005: 2533 / 3002 loss=2.425, ppl=5.37, wps=5737.9, ups=0.09, wpb=64820, bsz=128, num_updates=14456, lr=9.98923e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=165943 2021-06-20 16:44:39 | INFO | train_inner | epoch 005: 2534 / 3002 loss=2.508, ppl=5.69, wps=5873.9, ups=0.09, wpb=64911, bsz=128, num_updates=14457, lr=9.98923e-05, gnorm=2.414, loss_scale=8, train_wall=11, gb_free=2.8, wall=165954 2021-06-20 16:44:51 | INFO | train_inner | epoch 005: 2535 / 3002 loss=2.678, ppl=6.4, wps=5764.2, ups=0.09, wpb=64785, bsz=128, num_updates=14458, lr=9.98923e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=165965 2021-06-20 16:45:02 | INFO | train_inner | epoch 005: 2536 / 3002 loss=2.425, ppl=5.37, wps=5838.7, ups=0.09, wpb=64809, bsz=128, num_updates=14459, lr=9.98923e-05, gnorm=1.895, loss_scale=8, train_wall=11, gb_free=2.8, wall=165976 2021-06-20 16:45:13 | INFO | train_inner | epoch 005: 2537 / 3002 loss=2.385, ppl=5.22, wps=5785.5, ups=0.09, wpb=64816, bsz=128, num_updates=14460, lr=9.98923e-05, gnorm=1.889, loss_scale=8, train_wall=11, gb_free=2.8, wall=165987 2021-06-20 16:45:24 | INFO | train_inner | epoch 005: 2538 / 3002 loss=2.498, ppl=5.65, wps=5896.5, ups=0.09, wpb=64844, bsz=128, num_updates=14461, lr=9.98923e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=165998 2021-06-20 16:45:35 | INFO | train_inner | epoch 005: 2539 / 3002 loss=2.478, ppl=5.57, wps=5778.4, ups=0.09, wpb=64825, bsz=128, num_updates=14462, lr=9.98923e-05, gnorm=1.885, loss_scale=8, train_wall=11, gb_free=2.8, wall=166009 2021-06-20 16:45:46 | INFO | train_inner | epoch 005: 2540 / 3002 loss=2.633, ppl=6.2, wps=5786, ups=0.09, wpb=64868, bsz=128, num_updates=14463, lr=9.98923e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=166021 2021-06-20 16:45:57 | INFO | train_inner | epoch 005: 2541 / 3002 loss=2.492, ppl=5.63, wps=5886.7, ups=0.09, wpb=64781, bsz=128, num_updates=14464, lr=9.98923e-05, gnorm=1.802, loss_scale=8, train_wall=11, gb_free=2.8, wall=166032 2021-06-20 16:46:09 | INFO | train_inner | epoch 005: 2542 / 3002 loss=2.416, ppl=5.34, wps=5744.7, ups=0.09, wpb=64805, bsz=128, num_updates=14465, lr=9.98923e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=166043 2021-06-20 16:46:20 | INFO | train_inner | epoch 005: 2543 / 3002 loss=2.556, ppl=5.88, wps=5876.7, ups=0.09, wpb=64870, bsz=128, num_updates=14466, lr=9.98923e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=166054 2021-06-20 16:46:31 | INFO | train_inner | epoch 005: 2544 / 3002 loss=2.483, ppl=5.59, wps=5874.4, ups=0.09, wpb=64875, bsz=128, num_updates=14467, lr=9.98923e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=166065 2021-06-20 16:46:42 | INFO | train_inner | epoch 005: 2545 / 3002 loss=2.484, ppl=5.6, wps=5906, ups=0.09, wpb=64890, bsz=128, num_updates=14468, lr=9.98922e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=166076 2021-06-20 16:46:53 | INFO | train_inner | epoch 005: 2546 / 3002 loss=2.38, ppl=5.21, wps=5732.4, ups=0.09, wpb=64891, bsz=128, num_updates=14469, lr=9.98922e-05, gnorm=2.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=166087 2021-06-20 16:47:04 | INFO | train_inner | epoch 005: 2547 / 3002 loss=2.393, ppl=5.25, wps=5862.6, ups=0.09, wpb=64870, bsz=128, num_updates=14470, lr=9.98922e-05, gnorm=2, loss_scale=8, train_wall=11, gb_free=2.8, wall=166098 2021-06-20 16:47:15 | INFO | train_inner | epoch 005: 2548 / 3002 loss=2.456, ppl=5.49, wps=5804.8, ups=0.09, wpb=64741, bsz=128, num_updates=14471, lr=9.98922e-05, gnorm=3.501, loss_scale=8, train_wall=11, gb_free=2.8, wall=166110 2021-06-20 16:47:27 | INFO | train_inner | epoch 005: 2549 / 3002 loss=2.321, ppl=5, wps=5744.9, ups=0.09, wpb=64890, bsz=128, num_updates=14472, lr=9.98922e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=166121 2021-06-20 16:47:38 | INFO | train_inner | epoch 005: 2550 / 3002 loss=2.512, ppl=5.7, wps=5730.7, ups=0.09, wpb=64855, bsz=128, num_updates=14473, lr=9.98922e-05, gnorm=1.95, loss_scale=8, train_wall=11, gb_free=2.8, wall=166132 2021-06-20 16:47:49 | INFO | train_inner | epoch 005: 2551 / 3002 loss=2.452, ppl=5.47, wps=5863.2, ups=0.09, wpb=64798, bsz=128, num_updates=14474, lr=9.98922e-05, gnorm=2.272, loss_scale=8, train_wall=11, gb_free=2.8, wall=166143 2021-06-20 16:48:00 | INFO | train_inner | epoch 005: 2552 / 3002 loss=2.469, ppl=5.54, wps=5866.6, ups=0.09, wpb=64814, bsz=128, num_updates=14475, lr=9.98922e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=166154 2021-06-20 16:48:11 | INFO | train_inner | epoch 005: 2553 / 3002 loss=2.451, ppl=5.47, wps=5842.7, ups=0.09, wpb=64796, bsz=128, num_updates=14476, lr=9.98922e-05, gnorm=1.899, loss_scale=8, train_wall=11, gb_free=2.8, wall=166165 2021-06-20 16:48:22 | INFO | train_inner | epoch 005: 2554 / 3002 loss=2.446, ppl=5.45, wps=5841.7, ups=0.09, wpb=64894, bsz=128, num_updates=14477, lr=9.98922e-05, gnorm=1.915, loss_scale=8, train_wall=11, gb_free=2.8, wall=166176 2021-06-20 16:48:33 | INFO | train_inner | epoch 005: 2555 / 3002 loss=2.454, ppl=5.48, wps=5720.1, ups=0.09, wpb=64775, bsz=128, num_updates=14478, lr=9.98922e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=166188 2021-06-20 16:48:44 | INFO | train_inner | epoch 005: 2556 / 3002 loss=2.454, ppl=5.48, wps=5936.3, ups=0.09, wpb=64836, bsz=128, num_updates=14479, lr=9.98922e-05, gnorm=1.925, loss_scale=8, train_wall=10, gb_free=2.8, wall=166199 2021-06-20 16:48:56 | INFO | train_inner | epoch 005: 2557 / 3002 loss=2.461, ppl=5.5, wps=5757.7, ups=0.09, wpb=64889, bsz=128, num_updates=14480, lr=9.98922e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=166210 2021-06-20 16:49:07 | INFO | train_inner | epoch 005: 2558 / 3002 loss=2.555, ppl=5.88, wps=5787.8, ups=0.09, wpb=64817, bsz=128, num_updates=14481, lr=9.98921e-05, gnorm=2.052, loss_scale=8, train_wall=11, gb_free=2.8, wall=166221 2021-06-20 16:49:18 | INFO | train_inner | epoch 005: 2559 / 3002 loss=2.506, ppl=5.68, wps=5705.7, ups=0.09, wpb=64819, bsz=128, num_updates=14482, lr=9.98921e-05, gnorm=2.015, loss_scale=8, train_wall=11, gb_free=2.8, wall=166233 2021-06-20 16:49:29 | INFO | train_inner | epoch 005: 2560 / 3002 loss=2.577, ppl=5.97, wps=5825.9, ups=0.09, wpb=64801, bsz=128, num_updates=14483, lr=9.98921e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=166244 2021-06-20 16:49:40 | INFO | train_inner | epoch 005: 2561 / 3002 loss=2.527, ppl=5.76, wps=5879.8, ups=0.09, wpb=64856, bsz=128, num_updates=14484, lr=9.98921e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=166255 2021-06-20 16:49:51 | INFO | train_inner | epoch 005: 2562 / 3002 loss=2.476, ppl=5.56, wps=5884.8, ups=0.09, wpb=64776, bsz=128, num_updates=14485, lr=9.98921e-05, gnorm=1.884, loss_scale=8, train_wall=11, gb_free=2.8, wall=166266 2021-06-20 16:50:03 | INFO | train_inner | epoch 005: 2563 / 3002 loss=2.404, ppl=5.29, wps=5770.5, ups=0.09, wpb=64847, bsz=128, num_updates=14486, lr=9.98921e-05, gnorm=2.081, loss_scale=8, train_wall=11, gb_free=2.8, wall=166277 2021-06-20 16:50:14 | INFO | train_inner | epoch 005: 2564 / 3002 loss=2.674, ppl=6.38, wps=5832.8, ups=0.09, wpb=64851, bsz=128, num_updates=14487, lr=9.98921e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=166288 2021-06-20 16:50:25 | INFO | train_inner | epoch 005: 2565 / 3002 loss=2.441, ppl=5.43, wps=5885.6, ups=0.09, wpb=64797, bsz=128, num_updates=14488, lr=9.98921e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=166299 2021-06-20 16:50:36 | INFO | train_inner | epoch 005: 2566 / 3002 loss=2.438, ppl=5.42, wps=5874.2, ups=0.09, wpb=64788, bsz=128, num_updates=14489, lr=9.98921e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=166310 2021-06-20 16:50:47 | INFO | train_inner | epoch 005: 2567 / 3002 loss=2.401, ppl=5.28, wps=5784.3, ups=0.09, wpb=64851, bsz=128, num_updates=14490, lr=9.98921e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=166321 2021-06-20 16:50:58 | INFO | train_inner | epoch 005: 2568 / 3002 loss=2.611, ppl=6.11, wps=5927.1, ups=0.09, wpb=64862, bsz=128, num_updates=14491, lr=9.98921e-05, gnorm=2.002, loss_scale=8, train_wall=10, gb_free=2.8, wall=166332 2021-06-20 16:51:09 | INFO | train_inner | epoch 005: 2569 / 3002 loss=2.308, ppl=4.95, wps=5913.5, ups=0.09, wpb=64853, bsz=128, num_updates=14492, lr=9.98921e-05, gnorm=1.987, loss_scale=8, train_wall=10, gb_free=2.8, wall=166343 2021-06-20 16:51:20 | INFO | train_inner | epoch 005: 2570 / 3002 loss=2.517, ppl=5.73, wps=5977.6, ups=0.09, wpb=64787, bsz=128, num_updates=14493, lr=9.9892e-05, gnorm=2.063, loss_scale=8, train_wall=10, gb_free=2.8, wall=166354 2021-06-20 16:51:31 | INFO | train_inner | epoch 005: 2571 / 3002 loss=2.591, ppl=6.02, wps=5947.2, ups=0.09, wpb=64856, bsz=128, num_updates=14494, lr=9.9892e-05, gnorm=1.996, loss_scale=8, train_wall=10, gb_free=2.8, wall=166365 2021-06-20 16:51:42 | INFO | train_inner | epoch 005: 2572 / 3002 loss=2.509, ppl=5.69, wps=5918.4, ups=0.09, wpb=64818, bsz=128, num_updates=14495, lr=9.9892e-05, gnorm=1.922, loss_scale=8, train_wall=10, gb_free=2.8, wall=166376 2021-06-20 16:51:53 | INFO | train_inner | epoch 005: 2573 / 3002 loss=2.369, ppl=5.17, wps=5807.4, ups=0.09, wpb=64832, bsz=128, num_updates=14496, lr=9.9892e-05, gnorm=1.867, loss_scale=8, train_wall=11, gb_free=2.8, wall=166387 2021-06-20 16:52:04 | INFO | train_inner | epoch 005: 2574 / 3002 loss=2.485, ppl=5.6, wps=5833.1, ups=0.09, wpb=64852, bsz=128, num_updates=14497, lr=9.9892e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=166398 2021-06-20 16:52:15 | INFO | train_inner | epoch 005: 2575 / 3002 loss=2.469, ppl=5.54, wps=5811.1, ups=0.09, wpb=64864, bsz=128, num_updates=14498, lr=9.9892e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=166409 2021-06-20 16:52:26 | INFO | train_inner | epoch 005: 2576 / 3002 loss=2.515, ppl=5.72, wps=5893.1, ups=0.09, wpb=64778, bsz=128, num_updates=14499, lr=9.9892e-05, gnorm=1.878, loss_scale=8, train_wall=11, gb_free=2.8, wall=166420 2021-06-20 16:52:37 | INFO | train_inner | epoch 005: 2577 / 3002 loss=2.495, ppl=5.64, wps=5916.4, ups=0.09, wpb=64801, bsz=128, num_updates=14500, lr=9.9892e-05, gnorm=1.959, loss_scale=8, train_wall=10, gb_free=2.8, wall=166431 2021-06-20 16:52:48 | INFO | train_inner | epoch 005: 2578 / 3002 loss=2.44, ppl=5.43, wps=5677.7, ups=0.09, wpb=64839, bsz=128, num_updates=14501, lr=9.9892e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=166443 2021-06-20 16:53:00 | INFO | train_inner | epoch 005: 2579 / 3002 loss=2.586, ppl=6, wps=5746.2, ups=0.09, wpb=64805, bsz=128, num_updates=14502, lr=9.9892e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=166454 2021-06-20 16:53:11 | INFO | train_inner | epoch 005: 2580 / 3002 loss=2.553, ppl=5.87, wps=5986.2, ups=0.09, wpb=64914, bsz=128, num_updates=14503, lr=9.9892e-05, gnorm=2.079, loss_scale=8, train_wall=10, gb_free=2.8, wall=166465 2021-06-20 16:53:21 | INFO | train_inner | epoch 005: 2581 / 3002 loss=2.481, ppl=5.58, wps=5957.3, ups=0.09, wpb=64870, bsz=128, num_updates=14504, lr=9.9892e-05, gnorm=1.97, loss_scale=8, train_wall=10, gb_free=2.8, wall=166476 2021-06-20 16:53:33 | INFO | train_inner | epoch 005: 2582 / 3002 loss=2.378, ppl=5.2, wps=5834.9, ups=0.09, wpb=64816, bsz=128, num_updates=14505, lr=9.9892e-05, gnorm=1.843, loss_scale=8, train_wall=11, gb_free=2.8, wall=166487 2021-06-20 16:53:43 | INFO | train_inner | epoch 005: 2583 / 3002 loss=2.434, ppl=5.4, wps=6020.2, ups=0.09, wpb=64860, bsz=128, num_updates=14506, lr=9.98919e-05, gnorm=1.948, loss_scale=8, train_wall=10, gb_free=2.8, wall=166498 2021-06-20 16:53:54 | INFO | train_inner | epoch 005: 2584 / 3002 loss=2.472, ppl=5.55, wps=6028.7, ups=0.09, wpb=64792, bsz=128, num_updates=14507, lr=9.98919e-05, gnorm=1.94, loss_scale=8, train_wall=10, gb_free=2.8, wall=166508 2021-06-20 16:54:05 | INFO | train_inner | epoch 005: 2585 / 3002 loss=2.596, ppl=6.05, wps=5878.8, ups=0.09, wpb=64890, bsz=128, num_updates=14508, lr=9.98919e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=166519 2021-06-20 16:54:16 | INFO | train_inner | epoch 005: 2586 / 3002 loss=2.61, ppl=6.1, wps=5787.5, ups=0.09, wpb=64810, bsz=128, num_updates=14509, lr=9.98919e-05, gnorm=2.055, loss_scale=8, train_wall=11, gb_free=2.8, wall=166531 2021-06-20 16:54:27 | INFO | train_inner | epoch 005: 2587 / 3002 loss=2.415, ppl=5.33, wps=5808.1, ups=0.09, wpb=64854, bsz=128, num_updates=14510, lr=9.98919e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=166542 2021-06-20 16:54:39 | INFO | train_inner | epoch 005: 2588 / 3002 loss=2.571, ppl=5.94, wps=5838.7, ups=0.09, wpb=64830, bsz=128, num_updates=14511, lr=9.98919e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=166553 2021-06-20 16:54:50 | INFO | train_inner | epoch 005: 2589 / 3002 loss=2.374, ppl=5.18, wps=5718.1, ups=0.09, wpb=64889, bsz=128, num_updates=14512, lr=9.98919e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=166564 2021-06-20 16:55:01 | INFO | train_inner | epoch 005: 2590 / 3002 loss=2.375, ppl=5.19, wps=5832.4, ups=0.09, wpb=64799, bsz=128, num_updates=14513, lr=9.98919e-05, gnorm=1.882, loss_scale=8, train_wall=11, gb_free=2.8, wall=166575 2021-06-20 16:55:12 | INFO | train_inner | epoch 005: 2591 / 3002 loss=2.507, ppl=5.69, wps=5911.3, ups=0.09, wpb=64824, bsz=128, num_updates=14514, lr=9.98919e-05, gnorm=1.851, loss_scale=8, train_wall=11, gb_free=2.8, wall=166586 2021-06-20 16:55:23 | INFO | train_inner | epoch 005: 2592 / 3002 loss=2.593, ppl=6.03, wps=5871.4, ups=0.09, wpb=64806, bsz=128, num_updates=14515, lr=9.98919e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=166597 2021-06-20 16:55:34 | INFO | train_inner | epoch 005: 2593 / 3002 loss=2.526, ppl=5.76, wps=5847.6, ups=0.09, wpb=64825, bsz=128, num_updates=14516, lr=9.98919e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=166608 2021-06-20 16:55:45 | INFO | train_inner | epoch 005: 2594 / 3002 loss=2.485, ppl=5.6, wps=5970.4, ups=0.09, wpb=64856, bsz=128, num_updates=14517, lr=9.98919e-05, gnorm=1.89, loss_scale=8, train_wall=10, gb_free=2.8, wall=166619 2021-06-20 16:55:56 | INFO | train_inner | epoch 005: 2595 / 3002 loss=2.502, ppl=5.67, wps=5850.6, ups=0.09, wpb=64879, bsz=128, num_updates=14518, lr=9.98918e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=166630 2021-06-20 16:56:07 | INFO | train_inner | epoch 005: 2596 / 3002 loss=2.472, ppl=5.55, wps=5860, ups=0.09, wpb=64853, bsz=128, num_updates=14519, lr=9.98918e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=166641 2021-06-20 16:56:18 | INFO | train_inner | epoch 005: 2597 / 3002 loss=2.639, ppl=6.23, wps=5843.9, ups=0.09, wpb=64774, bsz=128, num_updates=14520, lr=9.98918e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=166653 2021-06-20 16:56:30 | INFO | train_inner | epoch 005: 2598 / 3002 loss=2.288, ppl=4.89, wps=5690.7, ups=0.09, wpb=64841, bsz=128, num_updates=14521, lr=9.98918e-05, gnorm=1.877, loss_scale=8, train_wall=11, gb_free=2.8, wall=166664 2021-06-20 16:56:41 | INFO | train_inner | epoch 005: 2599 / 3002 loss=2.42, ppl=5.35, wps=5825.4, ups=0.09, wpb=64823, bsz=128, num_updates=14522, lr=9.98918e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=166675 2021-06-20 16:56:52 | INFO | train_inner | epoch 005: 2600 / 3002 loss=2.529, ppl=5.77, wps=5743.4, ups=0.09, wpb=64733, bsz=128, num_updates=14523, lr=9.98918e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=166686 2021-06-20 16:57:03 | INFO | train_inner | epoch 005: 2601 / 3002 loss=2.475, ppl=5.56, wps=5725, ups=0.09, wpb=64869, bsz=128, num_updates=14524, lr=9.98918e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=166698 2021-06-20 16:57:14 | INFO | train_inner | epoch 005: 2602 / 3002 loss=2.649, ppl=6.27, wps=5846, ups=0.09, wpb=64825, bsz=128, num_updates=14525, lr=9.98918e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=166709 2021-06-20 16:57:26 | INFO | train_inner | epoch 005: 2603 / 3002 loss=2.633, ppl=6.2, wps=5795.2, ups=0.09, wpb=64793, bsz=128, num_updates=14526, lr=9.98918e-05, gnorm=2.062, loss_scale=8, train_wall=11, gb_free=2.8, wall=166720 2021-06-20 16:57:37 | INFO | train_inner | epoch 005: 2604 / 3002 loss=2.601, ppl=6.07, wps=5888.2, ups=0.09, wpb=64767, bsz=128, num_updates=14527, lr=9.98918e-05, gnorm=1.968, loss_scale=8, train_wall=11, gb_free=2.8, wall=166731 2021-06-20 16:57:48 | INFO | train_inner | epoch 005: 2605 / 3002 loss=2.384, ppl=5.22, wps=5915.8, ups=0.09, wpb=64885, bsz=128, num_updates=14528, lr=9.98918e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=166742 2021-06-20 16:57:59 | INFO | train_inner | epoch 005: 2606 / 3002 loss=2.37, ppl=5.17, wps=5916.7, ups=0.09, wpb=64850, bsz=128, num_updates=14529, lr=9.98918e-05, gnorm=1.966, loss_scale=8, train_wall=10, gb_free=2.8, wall=166753 2021-06-20 16:58:10 | INFO | train_inner | epoch 005: 2607 / 3002 loss=2.576, ppl=5.96, wps=5866.2, ups=0.09, wpb=64823, bsz=128, num_updates=14530, lr=9.98918e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=166764 2021-06-20 16:58:21 | INFO | train_inner | epoch 005: 2608 / 3002 loss=2.716, ppl=6.57, wps=5845.1, ups=0.09, wpb=64793, bsz=128, num_updates=14531, lr=9.98917e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=166775 2021-06-20 16:58:32 | INFO | train_inner | epoch 005: 2609 / 3002 loss=2.568, ppl=5.93, wps=5886.1, ups=0.09, wpb=64737, bsz=128, num_updates=14532, lr=9.98917e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=166786 2021-06-20 16:58:43 | INFO | train_inner | epoch 005: 2610 / 3002 loss=2.362, ppl=5.14, wps=5877.3, ups=0.09, wpb=64870, bsz=128, num_updates=14533, lr=9.98917e-05, gnorm=1.89, loss_scale=8, train_wall=11, gb_free=2.8, wall=166797 2021-06-20 16:58:54 | INFO | train_inner | epoch 005: 2611 / 3002 loss=2.418, ppl=5.35, wps=5778.5, ups=0.09, wpb=64817, bsz=128, num_updates=14534, lr=9.98917e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=166808 2021-06-20 16:59:05 | INFO | train_inner | epoch 005: 2612 / 3002 loss=2.524, ppl=5.75, wps=5870.5, ups=0.09, wpb=64876, bsz=128, num_updates=14535, lr=9.98917e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=166819 2021-06-20 16:59:16 | INFO | train_inner | epoch 005: 2613 / 3002 loss=2.571, ppl=5.94, wps=5864.1, ups=0.09, wpb=64828, bsz=128, num_updates=14536, lr=9.98917e-05, gnorm=1.95, loss_scale=8, train_wall=11, gb_free=2.8, wall=166830 2021-06-20 16:59:27 | INFO | train_inner | epoch 005: 2614 / 3002 loss=2.721, ppl=6.59, wps=5834.8, ups=0.09, wpb=64909, bsz=128, num_updates=14537, lr=9.98917e-05, gnorm=2.058, loss_scale=8, train_wall=11, gb_free=2.8, wall=166841 2021-06-20 16:59:38 | INFO | train_inner | epoch 005: 2615 / 3002 loss=2.362, ppl=5.14, wps=5907.6, ups=0.09, wpb=64845, bsz=128, num_updates=14538, lr=9.98917e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=166852 2021-06-20 16:59:49 | INFO | train_inner | epoch 005: 2616 / 3002 loss=2.547, ppl=5.84, wps=5805.6, ups=0.09, wpb=64869, bsz=128, num_updates=14539, lr=9.98917e-05, gnorm=1.968, loss_scale=8, train_wall=11, gb_free=2.8, wall=166864 2021-06-20 17:00:00 | INFO | train_inner | epoch 005: 2617 / 3002 loss=2.554, ppl=5.87, wps=5830.8, ups=0.09, wpb=64847, bsz=128, num_updates=14540, lr=9.98917e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=166875 2021-06-20 17:00:11 | INFO | train_inner | epoch 005: 2618 / 3002 loss=2.578, ppl=5.97, wps=5872, ups=0.09, wpb=64897, bsz=128, num_updates=14541, lr=9.98917e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=166886 2021-06-20 17:00:23 | INFO | train_inner | epoch 005: 2619 / 3002 loss=2.588, ppl=6.01, wps=5832.5, ups=0.09, wpb=64900, bsz=128, num_updates=14542, lr=9.98917e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=166897 2021-06-20 17:00:34 | INFO | train_inner | epoch 005: 2620 / 3002 loss=2.616, ppl=6.13, wps=5892.3, ups=0.09, wpb=64686, bsz=128, num_updates=14543, lr=9.98916e-05, gnorm=2.153, loss_scale=16, train_wall=11, gb_free=2.8, wall=166908 2021-06-20 17:00:44 | INFO | train_inner | epoch 005: 2621 / 3002 loss=2.476, ppl=5.56, wps=5957.9, ups=0.09, wpb=64889, bsz=128, num_updates=14544, lr=9.98916e-05, gnorm=1.918, loss_scale=16, train_wall=10, gb_free=2.8, wall=166919 2021-06-20 17:00:55 | INFO | train_inner | epoch 005: 2622 / 3002 loss=2.473, ppl=5.55, wps=5947.5, ups=0.09, wpb=64789, bsz=128, num_updates=14545, lr=9.98916e-05, gnorm=1.895, loss_scale=16, train_wall=10, gb_free=2.8, wall=166930 2021-06-20 17:01:06 | INFO | train_inner | epoch 005: 2623 / 3002 loss=2.582, ppl=5.99, wps=5915.1, ups=0.09, wpb=64821, bsz=128, num_updates=14546, lr=9.98916e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=166941 2021-06-20 17:01:17 | INFO | train_inner | epoch 005: 2624 / 3002 loss=2.498, ppl=5.65, wps=5808.2, ups=0.09, wpb=64813, bsz=128, num_updates=14547, lr=9.98916e-05, gnorm=1.912, loss_scale=16, train_wall=11, gb_free=2.8, wall=166952 2021-06-20 17:01:29 | INFO | train_inner | epoch 005: 2625 / 3002 loss=2.488, ppl=5.61, wps=5836.5, ups=0.09, wpb=64831, bsz=128, num_updates=14548, lr=9.98916e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=166963 2021-06-20 17:01:40 | INFO | train_inner | epoch 005: 2626 / 3002 loss=2.411, ppl=5.32, wps=5892.9, ups=0.09, wpb=64859, bsz=128, num_updates=14549, lr=9.98916e-05, gnorm=2.038, loss_scale=16, train_wall=11, gb_free=2.8, wall=166974 2021-06-20 17:01:51 | INFO | train_inner | epoch 005: 2627 / 3002 loss=2.386, ppl=5.23, wps=5770.9, ups=0.09, wpb=64776, bsz=128, num_updates=14550, lr=9.98916e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=166985 2021-06-20 17:02:02 | INFO | train_inner | epoch 005: 2628 / 3002 loss=2.472, ppl=5.55, wps=5919.1, ups=0.09, wpb=64839, bsz=128, num_updates=14551, lr=9.98916e-05, gnorm=2.062, loss_scale=16, train_wall=11, gb_free=2.8, wall=166996 2021-06-20 17:02:13 | INFO | train_inner | epoch 005: 2629 / 3002 loss=2.58, ppl=5.98, wps=5837.6, ups=0.09, wpb=64781, bsz=128, num_updates=14552, lr=9.98916e-05, gnorm=2.01, loss_scale=16, train_wall=11, gb_free=2.8, wall=167007 2021-06-20 17:02:24 | INFO | train_inner | epoch 005: 2630 / 3002 loss=2.398, ppl=5.27, wps=5855.1, ups=0.09, wpb=64832, bsz=128, num_updates=14553, lr=9.98916e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=167018 2021-06-20 17:02:35 | INFO | train_inner | epoch 005: 2631 / 3002 loss=2.491, ppl=5.62, wps=5844.1, ups=0.09, wpb=64900, bsz=128, num_updates=14554, lr=9.98916e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=167029 2021-06-20 17:02:46 | INFO | train_inner | epoch 005: 2632 / 3002 loss=2.539, ppl=5.81, wps=5907.8, ups=0.09, wpb=64818, bsz=128, num_updates=14555, lr=9.98916e-05, gnorm=1.893, loss_scale=16, train_wall=11, gb_free=2.8, wall=167040 2021-06-20 17:02:57 | INFO | train_inner | epoch 005: 2633 / 3002 loss=2.578, ppl=5.97, wps=5753.4, ups=0.09, wpb=64771, bsz=128, num_updates=14556, lr=9.98915e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=167052 2021-06-20 17:03:08 | INFO | train_inner | epoch 005: 2634 / 3002 loss=2.442, ppl=5.43, wps=5852.2, ups=0.09, wpb=64848, bsz=128, num_updates=14557, lr=9.98915e-05, gnorm=1.968, loss_scale=16, train_wall=11, gb_free=2.8, wall=167063 2021-06-20 17:03:19 | INFO | train_inner | epoch 005: 2635 / 3002 loss=2.643, ppl=6.25, wps=5849.7, ups=0.09, wpb=64762, bsz=128, num_updates=14558, lr=9.98915e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=167074 2021-06-20 17:03:30 | INFO | train_inner | epoch 005: 2636 / 3002 loss=2.558, ppl=5.89, wps=5923.9, ups=0.09, wpb=64770, bsz=128, num_updates=14559, lr=9.98915e-05, gnorm=1.949, loss_scale=16, train_wall=10, gb_free=2.8, wall=167085 2021-06-20 17:03:42 | INFO | train_inner | epoch 005: 2637 / 3002 loss=2.635, ppl=6.21, wps=5816.5, ups=0.09, wpb=64825, bsz=128, num_updates=14560, lr=9.98915e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=167096 2021-06-20 17:03:52 | INFO | train_inner | epoch 005: 2638 / 3002 loss=2.464, ppl=5.52, wps=5922.4, ups=0.09, wpb=64905, bsz=128, num_updates=14561, lr=9.98915e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=167107 2021-06-20 17:04:04 | INFO | train_inner | epoch 005: 2639 / 3002 loss=2.465, ppl=5.52, wps=5849.4, ups=0.09, wpb=64769, bsz=128, num_updates=14562, lr=9.98915e-05, gnorm=1.912, loss_scale=16, train_wall=11, gb_free=2.8, wall=167118 2021-06-20 17:04:14 | INFO | train_inner | epoch 005: 2640 / 3002 loss=2.601, ppl=6.07, wps=5945.6, ups=0.09, wpb=64905, bsz=128, num_updates=14563, lr=9.98915e-05, gnorm=1.951, loss_scale=16, train_wall=10, gb_free=2.8, wall=167129 2021-06-20 17:04:25 | INFO | train_inner | epoch 005: 2641 / 3002 loss=2.659, ppl=6.31, wps=5900.3, ups=0.09, wpb=64818, bsz=128, num_updates=14564, lr=9.98915e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=167140 2021-06-20 17:04:37 | INFO | train_inner | epoch 005: 2642 / 3002 loss=2.548, ppl=5.85, wps=5782.1, ups=0.09, wpb=64872, bsz=128, num_updates=14565, lr=9.98915e-05, gnorm=2.009, loss_scale=16, train_wall=11, gb_free=2.8, wall=167151 2021-06-20 17:04:48 | INFO | train_inner | epoch 005: 2643 / 3002 loss=2.583, ppl=5.99, wps=5774.2, ups=0.09, wpb=64794, bsz=128, num_updates=14566, lr=9.98915e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=167162 2021-06-20 17:04:59 | INFO | train_inner | epoch 005: 2644 / 3002 loss=2.614, ppl=6.12, wps=5805.5, ups=0.09, wpb=64789, bsz=128, num_updates=14567, lr=9.98915e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=167173 2021-06-20 17:05:10 | INFO | train_inner | epoch 005: 2645 / 3002 loss=2.392, ppl=5.25, wps=5749.8, ups=0.09, wpb=64856, bsz=128, num_updates=14568, lr=9.98914e-05, gnorm=1.909, loss_scale=16, train_wall=11, gb_free=2.8, wall=167185 2021-06-20 17:05:22 | INFO | train_inner | epoch 005: 2646 / 3002 loss=2.463, ppl=5.52, wps=5801.4, ups=0.09, wpb=64832, bsz=128, num_updates=14569, lr=9.98914e-05, gnorm=1.879, loss_scale=16, train_wall=11, gb_free=2.8, wall=167196 2021-06-20 17:05:33 | INFO | train_inner | epoch 005: 2647 / 3002 loss=2.48, ppl=5.58, wps=5852.1, ups=0.09, wpb=64915, bsz=128, num_updates=14570, lr=9.98914e-05, gnorm=1.914, loss_scale=16, train_wall=11, gb_free=2.8, wall=167207 2021-06-20 17:05:44 | INFO | train_inner | epoch 005: 2648 / 3002 loss=2.403, ppl=5.29, wps=5841.2, ups=0.09, wpb=64760, bsz=128, num_updates=14571, lr=9.98914e-05, gnorm=1.871, loss_scale=16, train_wall=11, gb_free=2.8, wall=167218 2021-06-20 17:05:55 | INFO | train_inner | epoch 005: 2649 / 3002 loss=2.484, ppl=5.59, wps=5853.9, ups=0.09, wpb=64832, bsz=128, num_updates=14572, lr=9.98914e-05, gnorm=1.877, loss_scale=16, train_wall=11, gb_free=2.8, wall=167229 2021-06-20 17:06:06 | INFO | train_inner | epoch 005: 2650 / 3002 loss=2.424, ppl=5.37, wps=5903.8, ups=0.09, wpb=64842, bsz=128, num_updates=14573, lr=9.98914e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=167240 2021-06-20 17:06:17 | INFO | train_inner | epoch 005: 2651 / 3002 loss=2.565, ppl=5.92, wps=5801.7, ups=0.09, wpb=64882, bsz=128, num_updates=14574, lr=9.98914e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=167251 2021-06-20 17:06:28 | INFO | train_inner | epoch 005: 2652 / 3002 loss=2.411, ppl=5.32, wps=5814.2, ups=0.09, wpb=64808, bsz=128, num_updates=14575, lr=9.98914e-05, gnorm=1.919, loss_scale=16, train_wall=11, gb_free=2.8, wall=167262 2021-06-20 17:06:39 | INFO | train_inner | epoch 005: 2653 / 3002 loss=2.482, ppl=5.59, wps=5840, ups=0.09, wpb=64860, bsz=128, num_updates=14576, lr=9.98914e-05, gnorm=1.893, loss_scale=16, train_wall=11, gb_free=2.8, wall=167274 2021-06-20 17:06:50 | INFO | train_inner | epoch 005: 2654 / 3002 loss=2.481, ppl=5.58, wps=5782.6, ups=0.09, wpb=64824, bsz=128, num_updates=14577, lr=9.98914e-05, gnorm=1.967, loss_scale=16, train_wall=11, gb_free=2.8, wall=167285 2021-06-20 17:07:02 | INFO | train_inner | epoch 005: 2655 / 3002 loss=2.493, ppl=5.63, wps=5767.3, ups=0.09, wpb=64805, bsz=128, num_updates=14578, lr=9.98914e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=167296 2021-06-20 17:07:13 | INFO | train_inner | epoch 005: 2656 / 3002 loss=2.541, ppl=5.82, wps=5823.1, ups=0.09, wpb=64864, bsz=128, num_updates=14579, lr=9.98914e-05, gnorm=1.989, loss_scale=16, train_wall=11, gb_free=2.8, wall=167307 2021-06-20 17:07:24 | INFO | train_inner | epoch 005: 2657 / 3002 loss=2.702, ppl=6.51, wps=5951.8, ups=0.09, wpb=64885, bsz=128, num_updates=14580, lr=9.98914e-05, gnorm=1.941, loss_scale=16, train_wall=10, gb_free=2.8, wall=167318 2021-06-20 17:07:35 | INFO | train_inner | epoch 005: 2658 / 3002 loss=2.421, ppl=5.35, wps=5767.4, ups=0.09, wpb=64764, bsz=128, num_updates=14581, lr=9.98913e-05, gnorm=1.899, loss_scale=16, train_wall=11, gb_free=2.8, wall=167329 2021-06-20 17:07:46 | INFO | train_inner | epoch 005: 2659 / 3002 loss=2.466, ppl=5.52, wps=5842.4, ups=0.09, wpb=64819, bsz=128, num_updates=14582, lr=9.98913e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=167340 2021-06-20 17:07:57 | INFO | train_inner | epoch 005: 2660 / 3002 loss=2.528, ppl=5.77, wps=5905.1, ups=0.09, wpb=64818, bsz=128, num_updates=14583, lr=9.98913e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=167351 2021-06-20 17:08:08 | INFO | train_inner | epoch 005: 2661 / 3002 loss=2.592, ppl=6.03, wps=6008, ups=0.09, wpb=64780, bsz=128, num_updates=14584, lr=9.98913e-05, gnorm=2, loss_scale=16, train_wall=10, gb_free=2.8, wall=167362 2021-06-20 17:08:19 | INFO | train_inner | epoch 005: 2662 / 3002 loss=2.514, ppl=5.71, wps=5866.9, ups=0.09, wpb=64810, bsz=128, num_updates=14585, lr=9.98913e-05, gnorm=1.943, loss_scale=16, train_wall=11, gb_free=2.8, wall=167373 2021-06-20 17:08:30 | INFO | train_inner | epoch 005: 2663 / 3002 loss=2.404, ppl=5.29, wps=5789.1, ups=0.09, wpb=64803, bsz=128, num_updates=14586, lr=9.98913e-05, gnorm=2.068, loss_scale=16, train_wall=11, gb_free=2.8, wall=167384 2021-06-20 17:08:41 | INFO | train_inner | epoch 005: 2664 / 3002 loss=2.567, ppl=5.92, wps=5776.4, ups=0.09, wpb=64844, bsz=128, num_updates=14587, lr=9.98913e-05, gnorm=1.916, loss_scale=16, train_wall=11, gb_free=2.8, wall=167396 2021-06-20 17:08:52 | INFO | train_inner | epoch 005: 2665 / 3002 loss=2.508, ppl=5.69, wps=5953.2, ups=0.09, wpb=64851, bsz=128, num_updates=14588, lr=9.98913e-05, gnorm=1.967, loss_scale=16, train_wall=10, gb_free=2.8, wall=167406 2021-06-20 17:09:03 | INFO | train_inner | epoch 005: 2666 / 3002 loss=2.41, ppl=5.31, wps=5847.4, ups=0.09, wpb=64803, bsz=128, num_updates=14589, lr=9.98913e-05, gnorm=2.032, loss_scale=16, train_wall=11, gb_free=2.8, wall=167418 2021-06-20 17:09:14 | INFO | train_inner | epoch 005: 2667 / 3002 loss=2.507, ppl=5.68, wps=5809.5, ups=0.09, wpb=64796, bsz=128, num_updates=14590, lr=9.98913e-05, gnorm=1.828, loss_scale=16, train_wall=11, gb_free=2.8, wall=167429 2021-06-20 17:09:26 | INFO | train_inner | epoch 005: 2668 / 3002 loss=2.535, ppl=5.8, wps=5798, ups=0.09, wpb=64838, bsz=128, num_updates=14591, lr=9.98913e-05, gnorm=2.866, loss_scale=16, train_wall=11, gb_free=2.8, wall=167440 2021-06-20 17:09:37 | INFO | train_inner | epoch 005: 2669 / 3002 loss=2.457, ppl=5.49, wps=5806, ups=0.09, wpb=64893, bsz=128, num_updates=14592, lr=9.98913e-05, gnorm=1.883, loss_scale=16, train_wall=11, gb_free=2.8, wall=167451 2021-06-20 17:09:48 | INFO | train_inner | epoch 005: 2670 / 3002 loss=2.643, ppl=6.25, wps=5856, ups=0.09, wpb=64779, bsz=128, num_updates=14593, lr=9.98912e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=167462 2021-06-20 17:09:59 | INFO | train_inner | epoch 005: 2671 / 3002 loss=2.576, ppl=5.96, wps=5816.7, ups=0.09, wpb=64835, bsz=128, num_updates=14594, lr=9.98912e-05, gnorm=1.868, loss_scale=16, train_wall=11, gb_free=2.8, wall=167473 2021-06-20 17:10:10 | INFO | train_inner | epoch 005: 2672 / 3002 loss=2.512, ppl=5.71, wps=5819.5, ups=0.09, wpb=64848, bsz=128, num_updates=14595, lr=9.98912e-05, gnorm=2.068, loss_scale=16, train_wall=11, gb_free=2.8, wall=167484 2021-06-20 17:10:21 | INFO | train_inner | epoch 005: 2673 / 3002 loss=2.496, ppl=5.64, wps=5908.2, ups=0.09, wpb=64789, bsz=128, num_updates=14596, lr=9.98912e-05, gnorm=1.878, loss_scale=16, train_wall=11, gb_free=2.8, wall=167495 2021-06-20 17:10:32 | INFO | train_inner | epoch 005: 2674 / 3002 loss=2.493, ppl=5.63, wps=5880.8, ups=0.09, wpb=64894, bsz=128, num_updates=14597, lr=9.98912e-05, gnorm=1.923, loss_scale=16, train_wall=11, gb_free=2.8, wall=167506 2021-06-20 17:10:43 | INFO | train_inner | epoch 005: 2675 / 3002 loss=2.45, ppl=5.46, wps=5830.6, ups=0.09, wpb=64894, bsz=128, num_updates=14598, lr=9.98912e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=167518 2021-06-20 17:10:54 | INFO | train_inner | epoch 005: 2676 / 3002 loss=2.535, ppl=5.8, wps=5853.7, ups=0.09, wpb=64794, bsz=128, num_updates=14599, lr=9.98912e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=167529 2021-06-20 17:11:05 | INFO | train_inner | epoch 005: 2677 / 3002 loss=2.462, ppl=5.51, wps=5923.9, ups=0.09, wpb=64811, bsz=128, num_updates=14600, lr=9.98912e-05, gnorm=1.942, loss_scale=16, train_wall=10, gb_free=2.8, wall=167540 2021-06-20 17:11:16 | INFO | train_inner | epoch 005: 2678 / 3002 loss=2.605, ppl=6.09, wps=5821, ups=0.09, wpb=64760, bsz=128, num_updates=14601, lr=9.98912e-05, gnorm=2.009, loss_scale=16, train_wall=11, gb_free=2.8, wall=167551 2021-06-20 17:11:27 | INFO | train_inner | epoch 005: 2679 / 3002 loss=2.474, ppl=5.55, wps=5816.9, ups=0.09, wpb=64851, bsz=128, num_updates=14602, lr=9.98912e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=167562 2021-06-20 17:11:39 | INFO | train_inner | epoch 005: 2680 / 3002 loss=2.628, ppl=6.18, wps=5769.3, ups=0.09, wpb=64720, bsz=128, num_updates=14603, lr=9.98912e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=167573 2021-06-20 17:11:50 | INFO | train_inner | epoch 005: 2681 / 3002 loss=2.473, ppl=5.55, wps=5839.7, ups=0.09, wpb=64787, bsz=128, num_updates=14604, lr=9.98912e-05, gnorm=1.904, loss_scale=16, train_wall=11, gb_free=2.8, wall=167584 2021-06-20 17:12:01 | INFO | train_inner | epoch 005: 2682 / 3002 loss=2.577, ppl=5.97, wps=5848.9, ups=0.09, wpb=64835, bsz=128, num_updates=14605, lr=9.98912e-05, gnorm=2.329, loss_scale=16, train_wall=11, gb_free=2.8, wall=167595 2021-06-20 17:12:12 | INFO | train_inner | epoch 005: 2683 / 3002 loss=2.479, ppl=5.58, wps=5895.1, ups=0.09, wpb=64718, bsz=128, num_updates=14606, lr=9.98911e-05, gnorm=1.951, loss_scale=16, train_wall=11, gb_free=2.8, wall=167606 2021-06-20 17:12:23 | INFO | train_inner | epoch 005: 2684 / 3002 loss=2.54, ppl=5.82, wps=5712.1, ups=0.09, wpb=64784, bsz=128, num_updates=14607, lr=9.98911e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=167618 2021-06-20 17:12:34 | INFO | train_inner | epoch 005: 2685 / 3002 loss=2.405, ppl=5.3, wps=5779, ups=0.09, wpb=64825, bsz=128, num_updates=14608, lr=9.98911e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=167629 2021-06-20 17:12:46 | INFO | train_inner | epoch 005: 2686 / 3002 loss=2.566, ppl=5.92, wps=5712.3, ups=0.09, wpb=64753, bsz=128, num_updates=14609, lr=9.98911e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=167640 2021-06-20 17:12:57 | INFO | train_inner | epoch 005: 2687 / 3002 loss=2.357, ppl=5.12, wps=5807.7, ups=0.09, wpb=64866, bsz=128, num_updates=14610, lr=9.98911e-05, gnorm=1.878, loss_scale=16, train_wall=11, gb_free=2.8, wall=167651 2021-06-20 17:13:08 | INFO | train_inner | epoch 005: 2688 / 3002 loss=2.456, ppl=5.49, wps=5788.5, ups=0.09, wpb=64887, bsz=128, num_updates=14611, lr=9.98911e-05, gnorm=1.917, loss_scale=16, train_wall=11, gb_free=2.8, wall=167662 2021-06-20 17:13:19 | INFO | train_inner | epoch 005: 2689 / 3002 loss=2.439, ppl=5.42, wps=5931.6, ups=0.09, wpb=64853, bsz=128, num_updates=14612, lr=9.98911e-05, gnorm=2.02, loss_scale=16, train_wall=10, gb_free=2.8, wall=167673 2021-06-20 17:13:30 | INFO | train_inner | epoch 005: 2690 / 3002 loss=2.519, ppl=5.73, wps=6002.7, ups=0.09, wpb=64892, bsz=128, num_updates=14613, lr=9.98911e-05, gnorm=1.97, loss_scale=16, train_wall=10, gb_free=2.8, wall=167684 2021-06-20 17:13:41 | INFO | train_inner | epoch 005: 2691 / 3002 loss=2.605, ppl=6.08, wps=5790.2, ups=0.09, wpb=64711, bsz=128, num_updates=14614, lr=9.98911e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=167695 2021-06-20 17:13:52 | INFO | train_inner | epoch 005: 2692 / 3002 loss=2.55, ppl=5.86, wps=5938.2, ups=0.09, wpb=64812, bsz=128, num_updates=14615, lr=9.98911e-05, gnorm=1.952, loss_scale=16, train_wall=10, gb_free=2.8, wall=167706 2021-06-20 17:14:03 | INFO | train_inner | epoch 005: 2693 / 3002 loss=2.558, ppl=5.89, wps=5831.2, ups=0.09, wpb=64826, bsz=128, num_updates=14616, lr=9.98911e-05, gnorm=1.883, loss_scale=16, train_wall=11, gb_free=2.8, wall=167717 2021-06-20 17:14:14 | INFO | train_inner | epoch 005: 2694 / 3002 loss=2.431, ppl=5.39, wps=5845.2, ups=0.09, wpb=64878, bsz=128, num_updates=14617, lr=9.98911e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=167729 2021-06-20 17:14:25 | INFO | train_inner | epoch 005: 2695 / 3002 loss=2.498, ppl=5.65, wps=5903.6, ups=0.09, wpb=64812, bsz=128, num_updates=14618, lr=9.9891e-05, gnorm=1.881, loss_scale=16, train_wall=11, gb_free=2.8, wall=167739 2021-06-20 17:14:36 | INFO | train_inner | epoch 005: 2696 / 3002 loss=2.515, ppl=5.72, wps=5877.7, ups=0.09, wpb=64823, bsz=128, num_updates=14619, lr=9.9891e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=167751 2021-06-20 17:14:47 | INFO | train_inner | epoch 005: 2697 / 3002 loss=2.505, ppl=5.68, wps=5828.6, ups=0.09, wpb=64810, bsz=128, num_updates=14620, lr=9.9891e-05, gnorm=2.008, loss_scale=16, train_wall=11, gb_free=2.8, wall=167762 2021-06-20 17:14:58 | INFO | train_inner | epoch 005: 2698 / 3002 loss=2.431, ppl=5.39, wps=5798.8, ups=0.09, wpb=64796, bsz=128, num_updates=14621, lr=9.9891e-05, gnorm=2.322, loss_scale=16, train_wall=11, gb_free=2.8, wall=167773 2021-06-20 17:15:10 | INFO | train_inner | epoch 005: 2699 / 3002 loss=2.626, ppl=6.17, wps=5855.7, ups=0.09, wpb=64797, bsz=128, num_updates=14622, lr=9.9891e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=167784 2021-06-20 17:15:21 | INFO | train_inner | epoch 005: 2700 / 3002 loss=2.398, ppl=5.27, wps=5873, ups=0.09, wpb=64824, bsz=128, num_updates=14623, lr=9.9891e-05, gnorm=1.991, loss_scale=16, train_wall=11, gb_free=2.8, wall=167795 2021-06-20 17:15:32 | INFO | train_inner | epoch 005: 2701 / 3002 loss=2.685, ppl=6.43, wps=5845.7, ups=0.09, wpb=64692, bsz=128, num_updates=14624, lr=9.9891e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=167806 2021-06-20 17:15:43 | INFO | train_inner | epoch 005: 2702 / 3002 loss=2.722, ppl=6.6, wps=5791.9, ups=0.09, wpb=64787, bsz=128, num_updates=14625, lr=9.9891e-05, gnorm=1.952, loss_scale=16, train_wall=11, gb_free=2.8, wall=167817 2021-06-20 17:15:54 | INFO | train_inner | epoch 005: 2703 / 3002 loss=2.57, ppl=5.94, wps=5804.8, ups=0.09, wpb=64889, bsz=128, num_updates=14626, lr=9.9891e-05, gnorm=1.974, loss_scale=16, train_wall=11, gb_free=2.8, wall=167828 2021-06-20 17:16:05 | INFO | train_inner | epoch 005: 2704 / 3002 loss=2.544, ppl=5.83, wps=5960.2, ups=0.09, wpb=64774, bsz=128, num_updates=14627, lr=9.9891e-05, gnorm=1.992, loss_scale=16, train_wall=10, gb_free=2.8, wall=167839 2021-06-20 17:16:16 | INFO | train_inner | epoch 005: 2705 / 3002 loss=2.588, ppl=6.01, wps=5854.6, ups=0.09, wpb=64771, bsz=128, num_updates=14628, lr=9.9891e-05, gnorm=2.06, loss_scale=16, train_wall=11, gb_free=2.8, wall=167850 2021-06-20 17:16:27 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 17:16:38 | INFO | train_inner | epoch 005: 2707 / 3002 loss=2.403, ppl=5.29, wps=2914.6, ups=0.04, wpb=64823, bsz=128, num_updates=14629, lr=9.9891e-05, gnorm=1.884, loss_scale=8, train_wall=21, gb_free=2.8, wall=167873 2021-06-20 17:16:49 | INFO | train_inner | epoch 005: 2708 / 3002 loss=2.683, ppl=6.42, wps=5864.3, ups=0.09, wpb=64824, bsz=128, num_updates=14630, lr=9.9891e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=167884 2021-06-20 17:17:00 | INFO | train_inner | epoch 005: 2709 / 3002 loss=2.322, ppl=5, wps=5964.7, ups=0.09, wpb=64781, bsz=128, num_updates=14631, lr=9.98909e-05, gnorm=1.987, loss_scale=8, train_wall=10, gb_free=2.8, wall=167894 2021-06-20 17:17:11 | INFO | train_inner | epoch 005: 2710 / 3002 loss=2.39, ppl=5.24, wps=5910.7, ups=0.09, wpb=64849, bsz=128, num_updates=14632, lr=9.98909e-05, gnorm=1.878, loss_scale=8, train_wall=11, gb_free=2.8, wall=167905 2021-06-20 17:17:22 | INFO | train_inner | epoch 005: 2711 / 3002 loss=2.542, ppl=5.82, wps=5859.4, ups=0.09, wpb=64839, bsz=128, num_updates=14633, lr=9.98909e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=167916 2021-06-20 17:17:33 | INFO | train_inner | epoch 005: 2712 / 3002 loss=2.492, ppl=5.62, wps=5836.3, ups=0.09, wpb=64808, bsz=128, num_updates=14634, lr=9.98909e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=167928 2021-06-20 17:17:44 | INFO | train_inner | epoch 005: 2713 / 3002 loss=2.434, ppl=5.4, wps=5893.6, ups=0.09, wpb=64788, bsz=128, num_updates=14635, lr=9.98909e-05, gnorm=1.86, loss_scale=8, train_wall=11, gb_free=2.8, wall=167939 2021-06-20 17:17:55 | INFO | train_inner | epoch 005: 2714 / 3002 loss=2.42, ppl=5.35, wps=5817.4, ups=0.09, wpb=64735, bsz=128, num_updates=14636, lr=9.98909e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=167950 2021-06-20 17:18:06 | INFO | train_inner | epoch 005: 2715 / 3002 loss=2.467, ppl=5.53, wps=5888.9, ups=0.09, wpb=64817, bsz=128, num_updates=14637, lr=9.98909e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=167961 2021-06-20 17:18:18 | INFO | train_inner | epoch 005: 2716 / 3002 loss=2.585, ppl=6, wps=5811.5, ups=0.09, wpb=64860, bsz=128, num_updates=14638, lr=9.98909e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=167972 2021-06-20 17:18:29 | INFO | train_inner | epoch 005: 2717 / 3002 loss=2.423, ppl=5.36, wps=5829, ups=0.09, wpb=64898, bsz=128, num_updates=14639, lr=9.98909e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=167983 2021-06-20 17:18:40 | INFO | train_inner | epoch 005: 2718 / 3002 loss=2.482, ppl=5.59, wps=5776.1, ups=0.09, wpb=64834, bsz=128, num_updates=14640, lr=9.98909e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=167994 2021-06-20 17:18:51 | INFO | train_inner | epoch 005: 2719 / 3002 loss=2.453, ppl=5.48, wps=5755.5, ups=0.09, wpb=64867, bsz=128, num_updates=14641, lr=9.98909e-05, gnorm=2.054, loss_scale=8, train_wall=11, gb_free=2.8, wall=168006 2021-06-20 17:19:02 | INFO | train_inner | epoch 005: 2720 / 3002 loss=2.464, ppl=5.52, wps=5875.5, ups=0.09, wpb=64781, bsz=128, num_updates=14642, lr=9.98909e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=168017 2021-06-20 17:19:13 | INFO | train_inner | epoch 005: 2721 / 3002 loss=2.565, ppl=5.92, wps=5873.6, ups=0.09, wpb=64817, bsz=128, num_updates=14643, lr=9.98908e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=168028 2021-06-20 17:19:24 | INFO | train_inner | epoch 005: 2722 / 3002 loss=2.37, ppl=5.17, wps=5832.8, ups=0.09, wpb=64885, bsz=128, num_updates=14644, lr=9.98908e-05, gnorm=1.922, loss_scale=8, train_wall=11, gb_free=2.8, wall=168039 2021-06-20 17:19:36 | INFO | train_inner | epoch 005: 2723 / 3002 loss=2.461, ppl=5.51, wps=5748.1, ups=0.09, wpb=64830, bsz=128, num_updates=14645, lr=9.98908e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=168050 2021-06-20 17:19:47 | INFO | train_inner | epoch 005: 2724 / 3002 loss=2.477, ppl=5.57, wps=5873.9, ups=0.09, wpb=64851, bsz=128, num_updates=14646, lr=9.98908e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=168061 2021-06-20 17:19:58 | INFO | train_inner | epoch 005: 2725 / 3002 loss=2.567, ppl=5.93, wps=5895, ups=0.09, wpb=64832, bsz=128, num_updates=14647, lr=9.98908e-05, gnorm=1.863, loss_scale=8, train_wall=11, gb_free=2.8, wall=168072 2021-06-20 17:20:09 | INFO | train_inner | epoch 005: 2726 / 3002 loss=2.489, ppl=5.61, wps=5899.3, ups=0.09, wpb=64765, bsz=128, num_updates=14648, lr=9.98908e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=168083 2021-06-20 17:20:20 | INFO | train_inner | epoch 005: 2727 / 3002 loss=2.503, ppl=5.67, wps=5831.9, ups=0.09, wpb=64776, bsz=128, num_updates=14649, lr=9.98908e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=168094 2021-06-20 17:20:31 | INFO | train_inner | epoch 005: 2728 / 3002 loss=2.391, ppl=5.24, wps=5789.8, ups=0.09, wpb=64804, bsz=128, num_updates=14650, lr=9.98908e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=168105 2021-06-20 17:20:42 | INFO | train_inner | epoch 005: 2729 / 3002 loss=2.507, ppl=5.68, wps=5781, ups=0.09, wpb=64752, bsz=128, num_updates=14651, lr=9.98908e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=168116 2021-06-20 17:20:53 | INFO | train_inner | epoch 005: 2730 / 3002 loss=2.518, ppl=5.73, wps=5859.7, ups=0.09, wpb=64843, bsz=128, num_updates=14652, lr=9.98908e-05, gnorm=2.093, loss_scale=8, train_wall=11, gb_free=2.8, wall=168128 2021-06-20 17:21:04 | INFO | train_inner | epoch 005: 2731 / 3002 loss=2.496, ppl=5.64, wps=5914.6, ups=0.09, wpb=64799, bsz=128, num_updates=14653, lr=9.98908e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=168139 2021-06-20 17:21:15 | INFO | train_inner | epoch 005: 2732 / 3002 loss=2.513, ppl=5.71, wps=5816.8, ups=0.09, wpb=64889, bsz=128, num_updates=14654, lr=9.98908e-05, gnorm=2.112, loss_scale=8, train_wall=11, gb_free=2.8, wall=168150 2021-06-20 17:21:27 | INFO | train_inner | epoch 005: 2733 / 3002 loss=2.388, ppl=5.23, wps=5775.1, ups=0.09, wpb=64905, bsz=128, num_updates=14655, lr=9.98908e-05, gnorm=2.101, loss_scale=8, train_wall=11, gb_free=2.8, wall=168161 2021-06-20 17:21:38 | INFO | train_inner | epoch 005: 2734 / 3002 loss=2.417, ppl=5.34, wps=5905, ups=0.09, wpb=64925, bsz=128, num_updates=14656, lr=9.98907e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=168172 2021-06-20 17:21:49 | INFO | train_inner | epoch 005: 2735 / 3002 loss=2.601, ppl=6.07, wps=5860.6, ups=0.09, wpb=64769, bsz=128, num_updates=14657, lr=9.98907e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=168183 2021-06-20 17:22:00 | INFO | train_inner | epoch 005: 2736 / 3002 loss=2.448, ppl=5.45, wps=5906.7, ups=0.09, wpb=64881, bsz=128, num_updates=14658, lr=9.98907e-05, gnorm=2.246, loss_scale=8, train_wall=11, gb_free=2.8, wall=168194 2021-06-20 17:22:11 | INFO | train_inner | epoch 005: 2737 / 3002 loss=2.461, ppl=5.51, wps=5748.5, ups=0.09, wpb=64804, bsz=128, num_updates=14659, lr=9.98907e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=168205 2021-06-20 17:22:22 | INFO | train_inner | epoch 005: 2738 / 3002 loss=2.494, ppl=5.63, wps=5898.2, ups=0.09, wpb=64885, bsz=128, num_updates=14660, lr=9.98907e-05, gnorm=1.87, loss_scale=8, train_wall=11, gb_free=2.8, wall=168216 2021-06-20 17:22:33 | INFO | train_inner | epoch 005: 2739 / 3002 loss=2.525, ppl=5.76, wps=5837, ups=0.09, wpb=64751, bsz=128, num_updates=14661, lr=9.98907e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=168227 2021-06-20 17:22:44 | INFO | train_inner | epoch 005: 2740 / 3002 loss=2.542, ppl=5.82, wps=5828.4, ups=0.09, wpb=64799, bsz=128, num_updates=14662, lr=9.98907e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=168238 2021-06-20 17:22:55 | INFO | train_inner | epoch 005: 2741 / 3002 loss=2.433, ppl=5.4, wps=6012.9, ups=0.09, wpb=64903, bsz=128, num_updates=14663, lr=9.98907e-05, gnorm=1.885, loss_scale=8, train_wall=10, gb_free=2.8, wall=168249 2021-06-20 17:23:06 | INFO | train_inner | epoch 005: 2742 / 3002 loss=2.414, ppl=5.33, wps=5779.9, ups=0.09, wpb=64907, bsz=128, num_updates=14664, lr=9.98907e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=168260 2021-06-20 17:23:17 | INFO | train_inner | epoch 005: 2743 / 3002 loss=2.438, ppl=5.42, wps=5826.3, ups=0.09, wpb=64826, bsz=128, num_updates=14665, lr=9.98907e-05, gnorm=2.124, loss_scale=8, train_wall=11, gb_free=2.8, wall=168272 2021-06-20 17:23:28 | INFO | train_inner | epoch 005: 2744 / 3002 loss=2.545, ppl=5.84, wps=5862.9, ups=0.09, wpb=64825, bsz=128, num_updates=14666, lr=9.98907e-05, gnorm=2.046, loss_scale=8, train_wall=11, gb_free=2.8, wall=168283 2021-06-20 17:23:40 | INFO | train_inner | epoch 005: 2745 / 3002 loss=2.513, ppl=5.71, wps=5777.6, ups=0.09, wpb=64787, bsz=128, num_updates=14667, lr=9.98907e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=168294 2021-06-20 17:23:51 | INFO | train_inner | epoch 005: 2746 / 3002 loss=2.506, ppl=5.68, wps=5797, ups=0.09, wpb=64861, bsz=128, num_updates=14668, lr=9.98906e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=168305 2021-06-20 17:24:02 | INFO | train_inner | epoch 005: 2747 / 3002 loss=2.301, ppl=4.93, wps=5834, ups=0.09, wpb=64919, bsz=128, num_updates=14669, lr=9.98906e-05, gnorm=1.889, loss_scale=8, train_wall=11, gb_free=2.8, wall=168316 2021-06-20 17:24:13 | INFO | train_inner | epoch 005: 2748 / 3002 loss=2.476, ppl=5.56, wps=5769.9, ups=0.09, wpb=64776, bsz=128, num_updates=14670, lr=9.98906e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=168327 2021-06-20 17:24:24 | INFO | train_inner | epoch 005: 2749 / 3002 loss=2.485, ppl=5.6, wps=5784.9, ups=0.09, wpb=64896, bsz=128, num_updates=14671, lr=9.98906e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=168339 2021-06-20 17:24:35 | INFO | train_inner | epoch 005: 2750 / 3002 loss=2.496, ppl=5.64, wps=5910.6, ups=0.09, wpb=64863, bsz=128, num_updates=14672, lr=9.98906e-05, gnorm=1.879, loss_scale=8, train_wall=11, gb_free=2.8, wall=168350 2021-06-20 17:24:46 | INFO | train_inner | epoch 005: 2751 / 3002 loss=2.516, ppl=5.72, wps=5815.2, ups=0.09, wpb=64728, bsz=128, num_updates=14673, lr=9.98906e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=168361 2021-06-20 17:24:57 | INFO | train_inner | epoch 005: 2752 / 3002 loss=2.288, ppl=4.88, wps=5864.5, ups=0.09, wpb=64877, bsz=128, num_updates=14674, lr=9.98906e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=168372 2021-06-20 17:25:09 | INFO | train_inner | epoch 005: 2753 / 3002 loss=2.485, ppl=5.6, wps=5848.8, ups=0.09, wpb=64873, bsz=128, num_updates=14675, lr=9.98906e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=168383 2021-06-20 17:25:20 | INFO | train_inner | epoch 005: 2754 / 3002 loss=2.364, ppl=5.15, wps=5834.5, ups=0.09, wpb=64848, bsz=128, num_updates=14676, lr=9.98906e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=168394 2021-06-20 17:25:31 | INFO | train_inner | epoch 005: 2755 / 3002 loss=2.471, ppl=5.54, wps=5801.8, ups=0.09, wpb=64809, bsz=128, num_updates=14677, lr=9.98906e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=168405 2021-06-20 17:25:42 | INFO | train_inner | epoch 005: 2756 / 3002 loss=2.485, ppl=5.6, wps=5778.5, ups=0.09, wpb=64767, bsz=128, num_updates=14678, lr=9.98906e-05, gnorm=1.898, loss_scale=8, train_wall=11, gb_free=2.8, wall=168416 2021-06-20 17:25:53 | INFO | train_inner | epoch 005: 2757 / 3002 loss=2.539, ppl=5.81, wps=5835.3, ups=0.09, wpb=64821, bsz=128, num_updates=14679, lr=9.98906e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=168427 2021-06-20 17:26:04 | INFO | train_inner | epoch 005: 2758 / 3002 loss=2.538, ppl=5.81, wps=5830.9, ups=0.09, wpb=64817, bsz=128, num_updates=14680, lr=9.98906e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=168439 2021-06-20 17:26:15 | INFO | train_inner | epoch 005: 2759 / 3002 loss=2.56, ppl=5.9, wps=5919.8, ups=0.09, wpb=64848, bsz=128, num_updates=14681, lr=9.98905e-05, gnorm=1.898, loss_scale=8, train_wall=11, gb_free=2.8, wall=168450 2021-06-20 17:26:26 | INFO | train_inner | epoch 005: 2760 / 3002 loss=2.688, ppl=6.44, wps=5836.7, ups=0.09, wpb=64804, bsz=128, num_updates=14682, lr=9.98905e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=168461 2021-06-20 17:26:37 | INFO | train_inner | epoch 005: 2761 / 3002 loss=2.467, ppl=5.53, wps=5794.2, ups=0.09, wpb=64796, bsz=128, num_updates=14683, lr=9.98905e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=168472 2021-06-20 17:26:48 | INFO | train_inner | epoch 005: 2762 / 3002 loss=2.323, ppl=5, wps=5900, ups=0.09, wpb=64839, bsz=128, num_updates=14684, lr=9.98905e-05, gnorm=1.883, loss_scale=8, train_wall=11, gb_free=2.8, wall=168483 2021-06-20 17:26:59 | INFO | train_inner | epoch 005: 2763 / 3002 loss=2.57, ppl=5.94, wps=6002.8, ups=0.09, wpb=64883, bsz=128, num_updates=14685, lr=9.98905e-05, gnorm=1.933, loss_scale=8, train_wall=10, gb_free=2.8, wall=168494 2021-06-20 17:27:11 | INFO | train_inner | epoch 005: 2764 / 3002 loss=2.451, ppl=5.47, wps=5773.3, ups=0.09, wpb=64792, bsz=128, num_updates=14686, lr=9.98905e-05, gnorm=1.896, loss_scale=8, train_wall=11, gb_free=2.8, wall=168505 2021-06-20 17:27:21 | INFO | train_inner | epoch 005: 2765 / 3002 loss=2.302, ppl=4.93, wps=5963.3, ups=0.09, wpb=64845, bsz=128, num_updates=14687, lr=9.98905e-05, gnorm=1.912, loss_scale=8, train_wall=10, gb_free=2.8, wall=168516 2021-06-20 17:27:33 | INFO | train_inner | epoch 005: 2766 / 3002 loss=2.385, ppl=5.22, wps=5775.6, ups=0.09, wpb=64825, bsz=128, num_updates=14688, lr=9.98905e-05, gnorm=1.968, loss_scale=8, train_wall=11, gb_free=2.8, wall=168527 2021-06-20 17:27:44 | INFO | train_inner | epoch 005: 2767 / 3002 loss=2.351, ppl=5.1, wps=5839.7, ups=0.09, wpb=64792, bsz=128, num_updates=14689, lr=9.98905e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=168538 2021-06-20 17:27:55 | INFO | train_inner | epoch 005: 2768 / 3002 loss=2.521, ppl=5.74, wps=5849.5, ups=0.09, wpb=64851, bsz=128, num_updates=14690, lr=9.98905e-05, gnorm=3.18, loss_scale=8, train_wall=11, gb_free=2.8, wall=168549 2021-06-20 17:28:06 | INFO | train_inner | epoch 005: 2769 / 3002 loss=2.47, ppl=5.54, wps=5914.3, ups=0.09, wpb=64881, bsz=128, num_updates=14691, lr=9.98905e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=168560 2021-06-20 17:28:17 | INFO | train_inner | epoch 005: 2770 / 3002 loss=2.586, ppl=6.01, wps=5841.9, ups=0.09, wpb=64802, bsz=128, num_updates=14692, lr=9.98905e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=168571 2021-06-20 17:28:28 | INFO | train_inner | epoch 005: 2771 / 3002 loss=2.55, ppl=5.86, wps=5853.1, ups=0.09, wpb=64885, bsz=128, num_updates=14693, lr=9.98904e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=168582 2021-06-20 17:28:39 | INFO | train_inner | epoch 005: 2772 / 3002 loss=2.515, ppl=5.72, wps=5889.6, ups=0.09, wpb=64855, bsz=128, num_updates=14694, lr=9.98904e-05, gnorm=2.131, loss_scale=8, train_wall=11, gb_free=2.8, wall=168593 2021-06-20 17:28:50 | INFO | train_inner | epoch 005: 2773 / 3002 loss=2.363, ppl=5.15, wps=5830.1, ups=0.09, wpb=64848, bsz=128, num_updates=14695, lr=9.98904e-05, gnorm=1.834, loss_scale=8, train_wall=11, gb_free=2.8, wall=168604 2021-06-20 17:29:01 | INFO | train_inner | epoch 005: 2774 / 3002 loss=2.548, ppl=5.85, wps=5768.9, ups=0.09, wpb=64804, bsz=128, num_updates=14696, lr=9.98904e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=168616 2021-06-20 17:29:12 | INFO | train_inner | epoch 005: 2775 / 3002 loss=2.463, ppl=5.51, wps=5937, ups=0.09, wpb=64815, bsz=128, num_updates=14697, lr=9.98904e-05, gnorm=1.893, loss_scale=8, train_wall=10, gb_free=2.8, wall=168627 2021-06-20 17:29:24 | INFO | train_inner | epoch 005: 2776 / 3002 loss=2.538, ppl=5.81, wps=5726.3, ups=0.09, wpb=64838, bsz=128, num_updates=14698, lr=9.98904e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=168638 2021-06-20 17:29:35 | INFO | train_inner | epoch 005: 2777 / 3002 loss=2.473, ppl=5.55, wps=5850.7, ups=0.09, wpb=64790, bsz=128, num_updates=14699, lr=9.98904e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=168649 2021-06-20 17:29:46 | INFO | train_inner | epoch 005: 2778 / 3002 loss=2.473, ppl=5.55, wps=5891.7, ups=0.09, wpb=64871, bsz=128, num_updates=14700, lr=9.98904e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=168660 2021-06-20 17:29:57 | INFO | train_inner | epoch 005: 2779 / 3002 loss=2.475, ppl=5.56, wps=5901.9, ups=0.09, wpb=64892, bsz=128, num_updates=14701, lr=9.98904e-05, gnorm=1.896, loss_scale=8, train_wall=11, gb_free=2.8, wall=168671 2021-06-20 17:30:08 | INFO | train_inner | epoch 005: 2780 / 3002 loss=2.392, ppl=5.25, wps=5734, ups=0.09, wpb=64848, bsz=128, num_updates=14702, lr=9.98904e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=168682 2021-06-20 17:30:19 | INFO | train_inner | epoch 005: 2781 / 3002 loss=2.378, ppl=5.2, wps=5782.3, ups=0.09, wpb=64839, bsz=128, num_updates=14703, lr=9.98904e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=168693 2021-06-20 17:30:30 | INFO | train_inner | epoch 005: 2782 / 3002 loss=2.548, ppl=5.85, wps=5736.4, ups=0.09, wpb=64732, bsz=128, num_updates=14704, lr=9.98904e-05, gnorm=2.131, loss_scale=8, train_wall=11, gb_free=2.8, wall=168705 2021-06-20 17:30:42 | INFO | train_inner | epoch 005: 2783 / 3002 loss=2.523, ppl=5.75, wps=5789.1, ups=0.09, wpb=64858, bsz=128, num_updates=14705, lr=9.98904e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=168716 2021-06-20 17:30:53 | INFO | train_inner | epoch 005: 2784 / 3002 loss=2.472, ppl=5.55, wps=5947, ups=0.09, wpb=64858, bsz=128, num_updates=14706, lr=9.98903e-05, gnorm=1.985, loss_scale=8, train_wall=10, gb_free=2.8, wall=168727 2021-06-20 17:31:04 | INFO | train_inner | epoch 005: 2785 / 3002 loss=2.431, ppl=5.39, wps=5718.1, ups=0.09, wpb=64834, bsz=128, num_updates=14707, lr=9.98903e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=168738 2021-06-20 17:31:15 | INFO | train_inner | epoch 005: 2786 / 3002 loss=2.432, ppl=5.4, wps=5857.5, ups=0.09, wpb=64776, bsz=128, num_updates=14708, lr=9.98903e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=168749 2021-06-20 17:31:26 | INFO | train_inner | epoch 005: 2787 / 3002 loss=2.555, ppl=5.88, wps=5814.1, ups=0.09, wpb=64807, bsz=128, num_updates=14709, lr=9.98903e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=168760 2021-06-20 17:31:37 | INFO | train_inner | epoch 005: 2788 / 3002 loss=2.416, ppl=5.34, wps=6052, ups=0.09, wpb=64895, bsz=128, num_updates=14710, lr=9.98903e-05, gnorm=1.984, loss_scale=8, train_wall=10, gb_free=2.8, wall=168771 2021-06-20 17:31:48 | INFO | train_inner | epoch 005: 2789 / 3002 loss=2.464, ppl=5.52, wps=5848.9, ups=0.09, wpb=64827, bsz=128, num_updates=14711, lr=9.98903e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=168782 2021-06-20 17:31:59 | INFO | train_inner | epoch 005: 2790 / 3002 loss=2.567, ppl=5.93, wps=5831.3, ups=0.09, wpb=64789, bsz=128, num_updates=14712, lr=9.98903e-05, gnorm=1.922, loss_scale=8, train_wall=11, gb_free=2.8, wall=168793 2021-06-20 17:32:10 | INFO | train_inner | epoch 005: 2791 / 3002 loss=2.434, ppl=5.4, wps=5973.4, ups=0.09, wpb=64769, bsz=128, num_updates=14713, lr=9.98903e-05, gnorm=1.956, loss_scale=8, train_wall=10, gb_free=2.8, wall=168804 2021-06-20 17:32:21 | INFO | train_inner | epoch 005: 2792 / 3002 loss=2.485, ppl=5.6, wps=5957.5, ups=0.09, wpb=64886, bsz=128, num_updates=14714, lr=9.98903e-05, gnorm=1.932, loss_scale=8, train_wall=10, gb_free=2.8, wall=168815 2021-06-20 17:32:32 | INFO | train_inner | epoch 005: 2793 / 3002 loss=2.442, ppl=5.44, wps=5753.3, ups=0.09, wpb=64847, bsz=128, num_updates=14715, lr=9.98903e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=168826 2021-06-20 17:32:43 | INFO | train_inner | epoch 005: 2794 / 3002 loss=2.492, ppl=5.62, wps=5849.6, ups=0.09, wpb=64857, bsz=128, num_updates=14716, lr=9.98903e-05, gnorm=2.023, loss_scale=8, train_wall=11, gb_free=2.8, wall=168837 2021-06-20 17:32:54 | INFO | train_inner | epoch 005: 2795 / 3002 loss=2.433, ppl=5.4, wps=5935.6, ups=0.09, wpb=64907, bsz=128, num_updates=14717, lr=9.98903e-05, gnorm=2.022, loss_scale=8, train_wall=10, gb_free=2.8, wall=168848 2021-06-20 17:33:05 | INFO | train_inner | epoch 005: 2796 / 3002 loss=2.559, ppl=5.89, wps=5846.5, ups=0.09, wpb=64734, bsz=128, num_updates=14718, lr=9.98902e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=168859 2021-06-20 17:33:16 | INFO | train_inner | epoch 005: 2797 / 3002 loss=2.542, ppl=5.82, wps=5763.6, ups=0.09, wpb=64806, bsz=128, num_updates=14719, lr=9.98902e-05, gnorm=2.116, loss_scale=8, train_wall=11, gb_free=2.8, wall=168871 2021-06-20 17:33:27 | INFO | train_inner | epoch 005: 2798 / 3002 loss=2.478, ppl=5.57, wps=5856.6, ups=0.09, wpb=64894, bsz=128, num_updates=14720, lr=9.98902e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=168882 2021-06-20 17:33:39 | INFO | train_inner | epoch 005: 2799 / 3002 loss=2.518, ppl=5.73, wps=5818.4, ups=0.09, wpb=64842, bsz=128, num_updates=14721, lr=9.98902e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=168893 2021-06-20 17:33:50 | INFO | train_inner | epoch 005: 2800 / 3002 loss=2.507, ppl=5.68, wps=5906.6, ups=0.09, wpb=64870, bsz=128, num_updates=14722, lr=9.98902e-05, gnorm=1.88, loss_scale=8, train_wall=11, gb_free=2.8, wall=168904 2021-06-20 17:34:01 | INFO | train_inner | epoch 005: 2801 / 3002 loss=2.388, ppl=5.23, wps=5919.7, ups=0.09, wpb=64877, bsz=128, num_updates=14723, lr=9.98902e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=168915 2021-06-20 17:34:12 | INFO | train_inner | epoch 005: 2802 / 3002 loss=2.551, ppl=5.86, wps=5728.9, ups=0.09, wpb=64911, bsz=128, num_updates=14724, lr=9.98902e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=168926 2021-06-20 17:34:23 | INFO | train_inner | epoch 005: 2803 / 3002 loss=2.497, ppl=5.64, wps=5822.4, ups=0.09, wpb=64697, bsz=128, num_updates=14725, lr=9.98902e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=168937 2021-06-20 17:34:34 | INFO | train_inner | epoch 005: 2804 / 3002 loss=2.515, ppl=5.72, wps=5744.7, ups=0.09, wpb=64783, bsz=128, num_updates=14726, lr=9.98902e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=168949 2021-06-20 17:34:45 | INFO | train_inner | epoch 005: 2805 / 3002 loss=2.648, ppl=6.27, wps=5925.4, ups=0.09, wpb=64719, bsz=128, num_updates=14727, lr=9.98902e-05, gnorm=1.978, loss_scale=8, train_wall=10, gb_free=2.8, wall=168960 2021-06-20 17:34:56 | INFO | train_inner | epoch 005: 2806 / 3002 loss=2.361, ppl=5.14, wps=5822.5, ups=0.09, wpb=64878, bsz=128, num_updates=14728, lr=9.98902e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=168971 2021-06-20 17:35:07 | INFO | train_inner | epoch 005: 2807 / 3002 loss=2.515, ppl=5.72, wps=5845.3, ups=0.09, wpb=64937, bsz=128, num_updates=14729, lr=9.98902e-05, gnorm=2.032, loss_scale=8, train_wall=11, gb_free=2.8, wall=168982 2021-06-20 17:35:18 | INFO | train_inner | epoch 005: 2808 / 3002 loss=2.405, ppl=5.3, wps=5894.5, ups=0.09, wpb=64781, bsz=128, num_updates=14730, lr=9.98902e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=168993 2021-06-20 17:35:30 | INFO | train_inner | epoch 005: 2809 / 3002 loss=2.38, ppl=5.21, wps=5835.8, ups=0.09, wpb=64903, bsz=128, num_updates=14731, lr=9.98901e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=169004 2021-06-20 17:35:41 | INFO | train_inner | epoch 005: 2810 / 3002 loss=2.441, ppl=5.43, wps=5859.5, ups=0.09, wpb=64897, bsz=128, num_updates=14732, lr=9.98901e-05, gnorm=1.854, loss_scale=8, train_wall=11, gb_free=2.8, wall=169015 2021-06-20 17:35:52 | INFO | train_inner | epoch 005: 2811 / 3002 loss=2.487, ppl=5.61, wps=5802.3, ups=0.09, wpb=64884, bsz=128, num_updates=14733, lr=9.98901e-05, gnorm=1.899, loss_scale=8, train_wall=11, gb_free=2.8, wall=169026 2021-06-20 17:36:03 | INFO | train_inner | epoch 005: 2812 / 3002 loss=2.432, ppl=5.4, wps=5846.2, ups=0.09, wpb=64903, bsz=128, num_updates=14734, lr=9.98901e-05, gnorm=1.864, loss_scale=8, train_wall=11, gb_free=2.8, wall=169037 2021-06-20 17:36:14 | INFO | train_inner | epoch 005: 2813 / 3002 loss=2.51, ppl=5.7, wps=5825.1, ups=0.09, wpb=64807, bsz=128, num_updates=14735, lr=9.98901e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=169048 2021-06-20 17:36:25 | INFO | train_inner | epoch 005: 2814 / 3002 loss=2.474, ppl=5.55, wps=5822, ups=0.09, wpb=64800, bsz=128, num_updates=14736, lr=9.98901e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=169059 2021-06-20 17:36:36 | INFO | train_inner | epoch 005: 2815 / 3002 loss=2.356, ppl=5.12, wps=5808.7, ups=0.09, wpb=64852, bsz=128, num_updates=14737, lr=9.98901e-05, gnorm=2.246, loss_scale=8, train_wall=11, gb_free=2.8, wall=169071 2021-06-20 17:36:48 | INFO | train_inner | epoch 005: 2816 / 3002 loss=2.485, ppl=5.6, wps=5784.5, ups=0.09, wpb=64816, bsz=128, num_updates=14738, lr=9.98901e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=169082 2021-06-20 17:36:58 | INFO | train_inner | epoch 005: 2817 / 3002 loss=2.429, ppl=5.39, wps=5959.3, ups=0.09, wpb=64857, bsz=128, num_updates=14739, lr=9.98901e-05, gnorm=1.972, loss_scale=8, train_wall=10, gb_free=2.8, wall=169093 2021-06-20 17:37:09 | INFO | train_inner | epoch 005: 2818 / 3002 loss=2.482, ppl=5.59, wps=5943.5, ups=0.09, wpb=64809, bsz=128, num_updates=14740, lr=9.98901e-05, gnorm=1.969, loss_scale=8, train_wall=10, gb_free=2.8, wall=169104 2021-06-20 17:37:20 | INFO | train_inner | epoch 005: 2819 / 3002 loss=2.398, ppl=5.27, wps=5878.4, ups=0.09, wpb=64920, bsz=128, num_updates=14741, lr=9.98901e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=169115 2021-06-20 17:37:31 | INFO | train_inner | epoch 005: 2820 / 3002 loss=2.529, ppl=5.77, wps=5863.4, ups=0.09, wpb=64837, bsz=128, num_updates=14742, lr=9.98901e-05, gnorm=1.95, loss_scale=8, train_wall=11, gb_free=2.8, wall=169126 2021-06-20 17:37:42 | INFO | train_inner | epoch 005: 2821 / 3002 loss=2.49, ppl=5.62, wps=5902.6, ups=0.09, wpb=64863, bsz=128, num_updates=14743, lr=9.989e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=169137 2021-06-20 17:37:53 | INFO | train_inner | epoch 005: 2822 / 3002 loss=2.608, ppl=6.1, wps=5856.1, ups=0.09, wpb=64746, bsz=128, num_updates=14744, lr=9.989e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=169148 2021-06-20 17:38:04 | INFO | train_inner | epoch 005: 2823 / 3002 loss=2.533, ppl=5.79, wps=5885.9, ups=0.09, wpb=64790, bsz=128, num_updates=14745, lr=9.989e-05, gnorm=1.884, loss_scale=8, train_wall=11, gb_free=2.8, wall=169159 2021-06-20 17:38:15 | INFO | train_inner | epoch 005: 2824 / 3002 loss=2.507, ppl=5.68, wps=5960.7, ups=0.09, wpb=64899, bsz=128, num_updates=14746, lr=9.989e-05, gnorm=1.878, loss_scale=8, train_wall=10, gb_free=2.8, wall=169170 2021-06-20 17:38:26 | INFO | train_inner | epoch 005: 2825 / 3002 loss=2.472, ppl=5.55, wps=5848.3, ups=0.09, wpb=64858, bsz=128, num_updates=14747, lr=9.989e-05, gnorm=2.049, loss_scale=8, train_wall=11, gb_free=2.8, wall=169181 2021-06-20 17:38:37 | INFO | train_inner | epoch 005: 2826 / 3002 loss=2.436, ppl=5.41, wps=5967.2, ups=0.09, wpb=64794, bsz=128, num_updates=14748, lr=9.989e-05, gnorm=1.998, loss_scale=8, train_wall=10, gb_free=2.8, wall=169192 2021-06-20 17:38:49 | INFO | train_inner | epoch 005: 2827 / 3002 loss=2.568, ppl=5.93, wps=5776.7, ups=0.09, wpb=64808, bsz=128, num_updates=14749, lr=9.989e-05, gnorm=2.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=169203 2021-06-20 17:39:00 | INFO | train_inner | epoch 005: 2828 / 3002 loss=2.449, ppl=5.46, wps=5818.2, ups=0.09, wpb=64854, bsz=128, num_updates=14750, lr=9.989e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=169214 2021-06-20 17:39:11 | INFO | train_inner | epoch 005: 2829 / 3002 loss=2.541, ppl=5.82, wps=5889.3, ups=0.09, wpb=64847, bsz=128, num_updates=14751, lr=9.989e-05, gnorm=1.968, loss_scale=8, train_wall=11, gb_free=2.8, wall=169225 2021-06-20 17:39:22 | INFO | train_inner | epoch 005: 2830 / 3002 loss=2.491, ppl=5.62, wps=5794.4, ups=0.09, wpb=64787, bsz=128, num_updates=14752, lr=9.989e-05, gnorm=1.994, loss_scale=8, train_wall=11, gb_free=2.8, wall=169236 2021-06-20 17:39:33 | INFO | train_inner | epoch 005: 2831 / 3002 loss=2.552, ppl=5.86, wps=5857.5, ups=0.09, wpb=64869, bsz=128, num_updates=14753, lr=9.989e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=169247 2021-06-20 17:39:44 | INFO | train_inner | epoch 005: 2832 / 3002 loss=2.416, ppl=5.34, wps=5887.9, ups=0.09, wpb=64863, bsz=128, num_updates=14754, lr=9.989e-05, gnorm=1.892, loss_scale=8, train_wall=11, gb_free=2.8, wall=169258 2021-06-20 17:39:55 | INFO | train_inner | epoch 005: 2833 / 3002 loss=2.531, ppl=5.78, wps=5776.6, ups=0.09, wpb=64792, bsz=128, num_updates=14755, lr=9.989e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=169270 2021-06-20 17:40:06 | INFO | train_inner | epoch 005: 2834 / 3002 loss=2.535, ppl=5.8, wps=5858.9, ups=0.09, wpb=64798, bsz=128, num_updates=14756, lr=9.98899e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=169281 2021-06-20 17:40:17 | INFO | train_inner | epoch 005: 2835 / 3002 loss=2.385, ppl=5.22, wps=5815.9, ups=0.09, wpb=64827, bsz=128, num_updates=14757, lr=9.98899e-05, gnorm=1.996, loss_scale=16, train_wall=11, gb_free=2.8, wall=169292 2021-06-20 17:40:29 | INFO | train_inner | epoch 005: 2836 / 3002 loss=2.442, ppl=5.44, wps=5751.8, ups=0.09, wpb=64768, bsz=128, num_updates=14758, lr=9.98899e-05, gnorm=1.974, loss_scale=16, train_wall=11, gb_free=2.8, wall=169303 2021-06-20 17:40:40 | INFO | train_inner | epoch 005: 2837 / 3002 loss=2.385, ppl=5.22, wps=5799.8, ups=0.09, wpb=64762, bsz=128, num_updates=14759, lr=9.98899e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=169314 2021-06-20 17:40:51 | INFO | train_inner | epoch 005: 2838 / 3002 loss=2.476, ppl=5.56, wps=5716.2, ups=0.09, wpb=64923, bsz=128, num_updates=14760, lr=9.98899e-05, gnorm=1.923, loss_scale=16, train_wall=11, gb_free=2.8, wall=169326 2021-06-20 17:41:02 | INFO | train_inner | epoch 005: 2839 / 3002 loss=2.474, ppl=5.56, wps=5809.9, ups=0.09, wpb=64763, bsz=128, num_updates=14761, lr=9.98899e-05, gnorm=1.988, loss_scale=16, train_wall=11, gb_free=2.8, wall=169337 2021-06-20 17:41:14 | INFO | train_inner | epoch 005: 2840 / 3002 loss=2.612, ppl=6.11, wps=5691.7, ups=0.09, wpb=64838, bsz=128, num_updates=14762, lr=9.98899e-05, gnorm=2.017, loss_scale=16, train_wall=11, gb_free=2.8, wall=169348 2021-06-20 17:41:25 | INFO | train_inner | epoch 005: 2841 / 3002 loss=2.555, ppl=5.88, wps=6001.4, ups=0.09, wpb=64862, bsz=128, num_updates=14763, lr=9.98899e-05, gnorm=1.957, loss_scale=16, train_wall=10, gb_free=2.8, wall=169359 2021-06-20 17:41:36 | INFO | train_inner | epoch 005: 2842 / 3002 loss=2.501, ppl=5.66, wps=5832.5, ups=0.09, wpb=64848, bsz=128, num_updates=14764, lr=9.98899e-05, gnorm=1.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=169370 2021-06-20 17:41:47 | INFO | train_inner | epoch 005: 2843 / 3002 loss=2.417, ppl=5.34, wps=5810, ups=0.09, wpb=64834, bsz=128, num_updates=14765, lr=9.98899e-05, gnorm=1.881, loss_scale=16, train_wall=11, gb_free=2.8, wall=169381 2021-06-20 17:41:58 | INFO | train_inner | epoch 005: 2844 / 3002 loss=2.524, ppl=5.75, wps=5816.2, ups=0.09, wpb=64835, bsz=128, num_updates=14766, lr=9.98899e-05, gnorm=1.923, loss_scale=16, train_wall=11, gb_free=2.8, wall=169392 2021-06-20 17:42:09 | INFO | train_inner | epoch 005: 2845 / 3002 loss=2.56, ppl=5.9, wps=5735.8, ups=0.09, wpb=64805, bsz=128, num_updates=14767, lr=9.98899e-05, gnorm=1.913, loss_scale=16, train_wall=11, gb_free=2.8, wall=169404 2021-06-20 17:42:20 | INFO | train_inner | epoch 005: 2846 / 3002 loss=2.493, ppl=5.63, wps=5836.3, ups=0.09, wpb=64810, bsz=128, num_updates=14768, lr=9.98898e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=169415 2021-06-20 17:42:31 | INFO | train_inner | epoch 005: 2847 / 3002 loss=2.542, ppl=5.82, wps=5868.8, ups=0.09, wpb=64876, bsz=128, num_updates=14769, lr=9.98898e-05, gnorm=2.057, loss_scale=16, train_wall=11, gb_free=2.8, wall=169426 2021-06-20 17:42:42 | INFO | train_inner | epoch 005: 2848 / 3002 loss=2.447, ppl=5.45, wps=5844.7, ups=0.09, wpb=64892, bsz=128, num_updates=14770, lr=9.98898e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=169437 2021-06-20 17:42:54 | INFO | train_inner | epoch 005: 2849 / 3002 loss=2.403, ppl=5.29, wps=5870.7, ups=0.09, wpb=64839, bsz=128, num_updates=14771, lr=9.98898e-05, gnorm=1.951, loss_scale=16, train_wall=11, gb_free=2.8, wall=169448 2021-06-20 17:43:05 | INFO | train_inner | epoch 005: 2850 / 3002 loss=2.554, ppl=5.87, wps=5912.9, ups=0.09, wpb=64780, bsz=128, num_updates=14772, lr=9.98898e-05, gnorm=2.104, loss_scale=16, train_wall=11, gb_free=2.8, wall=169459 2021-06-20 17:43:16 | INFO | train_inner | epoch 005: 2851 / 3002 loss=2.359, ppl=5.13, wps=5814.3, ups=0.09, wpb=64801, bsz=128, num_updates=14773, lr=9.98898e-05, gnorm=2.089, loss_scale=16, train_wall=11, gb_free=2.8, wall=169470 2021-06-20 17:43:27 | INFO | train_inner | epoch 005: 2852 / 3002 loss=2.454, ppl=5.48, wps=5860.2, ups=0.09, wpb=64829, bsz=128, num_updates=14774, lr=9.98898e-05, gnorm=2.076, loss_scale=16, train_wall=11, gb_free=2.8, wall=169481 2021-06-20 17:43:38 | INFO | train_inner | epoch 005: 2853 / 3002 loss=2.49, ppl=5.62, wps=6008.6, ups=0.09, wpb=64850, bsz=128, num_updates=14775, lr=9.98898e-05, gnorm=2.012, loss_scale=16, train_wall=10, gb_free=2.8, wall=169492 2021-06-20 17:43:49 | INFO | train_inner | epoch 005: 2854 / 3002 loss=2.722, ppl=6.6, wps=5860.3, ups=0.09, wpb=64797, bsz=128, num_updates=14776, lr=9.98898e-05, gnorm=1.945, loss_scale=16, train_wall=11, gb_free=2.8, wall=169503 2021-06-20 17:43:59 | INFO | train_inner | epoch 005: 2855 / 3002 loss=2.487, ppl=5.61, wps=5949.7, ups=0.09, wpb=64808, bsz=128, num_updates=14777, lr=9.98898e-05, gnorm=1.921, loss_scale=16, train_wall=10, gb_free=2.8, wall=169514 2021-06-20 17:44:11 | INFO | train_inner | epoch 005: 2856 / 3002 loss=2.547, ppl=5.84, wps=5700.3, ups=0.09, wpb=64698, bsz=128, num_updates=14778, lr=9.98898e-05, gnorm=2.185, loss_scale=16, train_wall=11, gb_free=2.8, wall=169525 2021-06-20 17:44:22 | INFO | train_inner | epoch 005: 2857 / 3002 loss=2.46, ppl=5.5, wps=5803.7, ups=0.09, wpb=64830, bsz=128, num_updates=14779, lr=9.98898e-05, gnorm=1.902, loss_scale=16, train_wall=11, gb_free=2.8, wall=169536 2021-06-20 17:44:33 | INFO | train_inner | epoch 005: 2858 / 3002 loss=2.567, ppl=5.93, wps=5768.1, ups=0.09, wpb=64827, bsz=128, num_updates=14780, lr=9.98898e-05, gnorm=1.857, loss_scale=16, train_wall=11, gb_free=2.8, wall=169548 2021-06-20 17:44:44 | INFO | train_inner | epoch 005: 2859 / 3002 loss=2.457, ppl=5.49, wps=5790.4, ups=0.09, wpb=64791, bsz=128, num_updates=14781, lr=9.98897e-05, gnorm=1.84, loss_scale=16, train_wall=11, gb_free=2.8, wall=169559 2021-06-20 17:44:55 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 17:45:06 | INFO | train_inner | epoch 005: 2861 / 3002 loss=2.44, ppl=5.42, wps=2970.2, ups=0.05, wpb=64854, bsz=128, num_updates=14782, lr=9.98897e-05, gnorm=1.941, loss_scale=8, train_wall=21, gb_free=2.8, wall=169581 2021-06-20 17:45:17 | INFO | train_inner | epoch 005: 2862 / 3002 loss=2.548, ppl=5.85, wps=5787.8, ups=0.09, wpb=64729, bsz=128, num_updates=14783, lr=9.98897e-05, gnorm=1.934, loss_scale=8, train_wall=11, gb_free=2.8, wall=169592 2021-06-20 17:45:29 | INFO | train_inner | epoch 005: 2863 / 3002 loss=2.521, ppl=5.74, wps=5816.6, ups=0.09, wpb=64860, bsz=128, num_updates=14784, lr=9.98897e-05, gnorm=1.894, loss_scale=8, train_wall=11, gb_free=2.8, wall=169603 2021-06-20 17:45:40 | INFO | train_inner | epoch 005: 2864 / 3002 loss=2.4, ppl=5.28, wps=5851.3, ups=0.09, wpb=64911, bsz=128, num_updates=14785, lr=9.98897e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=169614 2021-06-20 17:45:51 | INFO | train_inner | epoch 005: 2865 / 3002 loss=2.58, ppl=5.98, wps=5812.1, ups=0.09, wpb=64840, bsz=128, num_updates=14786, lr=9.98897e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=169625 2021-06-20 17:46:02 | INFO | train_inner | epoch 005: 2866 / 3002 loss=2.387, ppl=5.23, wps=5988.8, ups=0.09, wpb=64782, bsz=128, num_updates=14787, lr=9.98897e-05, gnorm=1.866, loss_scale=8, train_wall=10, gb_free=2.8, wall=169636 2021-06-20 17:46:13 | INFO | train_inner | epoch 005: 2867 / 3002 loss=2.476, ppl=5.57, wps=5882.7, ups=0.09, wpb=64804, bsz=128, num_updates=14788, lr=9.98897e-05, gnorm=2.004, loss_scale=8, train_wall=11, gb_free=2.8, wall=169647 2021-06-20 17:46:24 | INFO | train_inner | epoch 005: 2868 / 3002 loss=2.572, ppl=5.95, wps=5842.6, ups=0.09, wpb=64853, bsz=128, num_updates=14789, lr=9.98897e-05, gnorm=2.309, loss_scale=8, train_wall=11, gb_free=2.8, wall=169658 2021-06-20 17:46:35 | INFO | train_inner | epoch 005: 2869 / 3002 loss=2.438, ppl=5.42, wps=5896.9, ups=0.09, wpb=64923, bsz=128, num_updates=14790, lr=9.98897e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=169669 2021-06-20 17:46:46 | INFO | train_inner | epoch 005: 2870 / 3002 loss=2.413, ppl=5.32, wps=5969.2, ups=0.09, wpb=64837, bsz=128, num_updates=14791, lr=9.98897e-05, gnorm=1.907, loss_scale=8, train_wall=10, gb_free=2.8, wall=169680 2021-06-20 17:46:57 | INFO | train_inner | epoch 005: 2871 / 3002 loss=2.332, ppl=5.04, wps=5851.5, ups=0.09, wpb=64777, bsz=128, num_updates=14792, lr=9.98897e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=169691 2021-06-20 17:47:08 | INFO | train_inner | epoch 005: 2872 / 3002 loss=2.475, ppl=5.56, wps=5814.6, ups=0.09, wpb=64805, bsz=128, num_updates=14793, lr=9.98896e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=169702 2021-06-20 17:47:19 | INFO | train_inner | epoch 005: 2873 / 3002 loss=2.669, ppl=6.36, wps=5854.8, ups=0.09, wpb=64777, bsz=128, num_updates=14794, lr=9.98896e-05, gnorm=3.174, loss_scale=8, train_wall=11, gb_free=2.8, wall=169713 2021-06-20 17:47:30 | INFO | train_inner | epoch 005: 2874 / 3002 loss=2.533, ppl=5.79, wps=5762, ups=0.09, wpb=64794, bsz=128, num_updates=14795, lr=9.98896e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=169724 2021-06-20 17:47:41 | INFO | train_inner | epoch 005: 2875 / 3002 loss=2.493, ppl=5.63, wps=5917.1, ups=0.09, wpb=64824, bsz=128, num_updates=14796, lr=9.98896e-05, gnorm=1.907, loss_scale=8, train_wall=10, gb_free=2.8, wall=169735 2021-06-20 17:47:52 | INFO | train_inner | epoch 005: 2876 / 3002 loss=2.414, ppl=5.33, wps=5900, ups=0.09, wpb=64813, bsz=128, num_updates=14797, lr=9.98896e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=169746 2021-06-20 17:48:03 | INFO | train_inner | epoch 005: 2877 / 3002 loss=2.449, ppl=5.46, wps=5937.6, ups=0.09, wpb=64914, bsz=128, num_updates=14798, lr=9.98896e-05, gnorm=2.136, loss_scale=8, train_wall=11, gb_free=2.8, wall=169757 2021-06-20 17:48:14 | INFO | train_inner | epoch 005: 2878 / 3002 loss=2.593, ppl=6.04, wps=5930.2, ups=0.09, wpb=64773, bsz=128, num_updates=14799, lr=9.98896e-05, gnorm=2.358, loss_scale=8, train_wall=10, gb_free=2.8, wall=169768 2021-06-20 17:48:25 | INFO | train_inner | epoch 005: 2879 / 3002 loss=2.392, ppl=5.25, wps=5943.7, ups=0.09, wpb=64891, bsz=128, num_updates=14800, lr=9.98896e-05, gnorm=1.919, loss_scale=8, train_wall=10, gb_free=2.8, wall=169779 2021-06-20 17:48:36 | INFO | train_inner | epoch 005: 2880 / 3002 loss=2.591, ppl=6.03, wps=5743.2, ups=0.09, wpb=64823, bsz=128, num_updates=14801, lr=9.98896e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=169790 2021-06-20 17:48:47 | INFO | train_inner | epoch 005: 2881 / 3002 loss=2.443, ppl=5.44, wps=5877, ups=0.09, wpb=64843, bsz=128, num_updates=14802, lr=9.98896e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=169802 2021-06-20 17:48:58 | INFO | train_inner | epoch 005: 2882 / 3002 loss=2.621, ppl=6.15, wps=5842.4, ups=0.09, wpb=64799, bsz=128, num_updates=14803, lr=9.98896e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=169813 2021-06-20 17:49:09 | INFO | train_inner | epoch 005: 2883 / 3002 loss=2.514, ppl=5.71, wps=5868.3, ups=0.09, wpb=64838, bsz=128, num_updates=14804, lr=9.98896e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=169824 2021-06-20 17:49:20 | INFO | train_inner | epoch 005: 2884 / 3002 loss=2.291, ppl=4.89, wps=5839.6, ups=0.09, wpb=64819, bsz=128, num_updates=14805, lr=9.98896e-05, gnorm=1.833, loss_scale=8, train_wall=11, gb_free=2.8, wall=169835 2021-06-20 17:49:31 | INFO | train_inner | epoch 005: 2885 / 3002 loss=2.48, ppl=5.58, wps=5887, ups=0.09, wpb=64914, bsz=128, num_updates=14806, lr=9.98895e-05, gnorm=2.044, loss_scale=8, train_wall=11, gb_free=2.8, wall=169846 2021-06-20 17:49:43 | INFO | train_inner | epoch 005: 2886 / 3002 loss=2.501, ppl=5.66, wps=5774.5, ups=0.09, wpb=64849, bsz=128, num_updates=14807, lr=9.98895e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=169857 2021-06-20 17:49:54 | INFO | train_inner | epoch 005: 2887 / 3002 loss=2.514, ppl=5.71, wps=5873.7, ups=0.09, wpb=64805, bsz=128, num_updates=14808, lr=9.98895e-05, gnorm=2.465, loss_scale=8, train_wall=11, gb_free=2.8, wall=169868 2021-06-20 17:50:05 | INFO | train_inner | epoch 005: 2888 / 3002 loss=2.451, ppl=5.47, wps=5906.5, ups=0.09, wpb=64799, bsz=128, num_updates=14809, lr=9.98895e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=169879 2021-06-20 17:50:16 | INFO | train_inner | epoch 005: 2889 / 3002 loss=2.465, ppl=5.52, wps=5782.3, ups=0.09, wpb=64825, bsz=128, num_updates=14810, lr=9.98895e-05, gnorm=2.205, loss_scale=8, train_wall=11, gb_free=2.8, wall=169890 2021-06-20 17:50:27 | INFO | train_inner | epoch 005: 2890 / 3002 loss=2.493, ppl=5.63, wps=5890, ups=0.09, wpb=64825, bsz=128, num_updates=14811, lr=9.98895e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=169901 2021-06-20 17:50:38 | INFO | train_inner | epoch 005: 2891 / 3002 loss=2.721, ppl=6.59, wps=5750.2, ups=0.09, wpb=64742, bsz=128, num_updates=14812, lr=9.98895e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=169913 2021-06-20 17:50:49 | INFO | train_inner | epoch 005: 2892 / 3002 loss=2.484, ppl=5.59, wps=5858.1, ups=0.09, wpb=64877, bsz=128, num_updates=14813, lr=9.98895e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=169924 2021-06-20 17:51:00 | INFO | train_inner | epoch 005: 2893 / 3002 loss=2.348, ppl=5.09, wps=5868.4, ups=0.09, wpb=64931, bsz=128, num_updates=14814, lr=9.98895e-05, gnorm=1.895, loss_scale=8, train_wall=11, gb_free=2.8, wall=169935 2021-06-20 17:51:11 | INFO | train_inner | epoch 005: 2894 / 3002 loss=2.566, ppl=5.92, wps=5883.6, ups=0.09, wpb=64848, bsz=128, num_updates=14815, lr=9.98895e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=169946 2021-06-20 17:51:23 | INFO | train_inner | epoch 005: 2895 / 3002 loss=2.375, ppl=5.19, wps=5807.4, ups=0.09, wpb=64893, bsz=128, num_updates=14816, lr=9.98895e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=169957 2021-06-20 17:51:34 | INFO | train_inner | epoch 005: 2896 / 3002 loss=2.481, ppl=5.58, wps=5794.9, ups=0.09, wpb=64817, bsz=128, num_updates=14817, lr=9.98895e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=169968 2021-06-20 17:51:45 | INFO | train_inner | epoch 005: 2897 / 3002 loss=2.519, ppl=5.73, wps=5757.7, ups=0.09, wpb=64800, bsz=128, num_updates=14818, lr=9.98894e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=169979 2021-06-20 17:51:56 | INFO | train_inner | epoch 005: 2898 / 3002 loss=2.421, ppl=5.36, wps=5795, ups=0.09, wpb=64798, bsz=128, num_updates=14819, lr=9.98894e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=169990 2021-06-20 17:52:07 | INFO | train_inner | epoch 005: 2899 / 3002 loss=2.488, ppl=5.61, wps=5776.3, ups=0.09, wpb=64773, bsz=128, num_updates=14820, lr=9.98894e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=170002 2021-06-20 17:52:18 | INFO | train_inner | epoch 005: 2900 / 3002 loss=2.368, ppl=5.16, wps=5835.8, ups=0.09, wpb=64824, bsz=128, num_updates=14821, lr=9.98894e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=170013 2021-06-20 17:52:30 | INFO | train_inner | epoch 005: 2901 / 3002 loss=2.504, ppl=5.67, wps=5856.7, ups=0.09, wpb=64842, bsz=128, num_updates=14822, lr=9.98894e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=170024 2021-06-20 17:52:41 | INFO | train_inner | epoch 005: 2902 / 3002 loss=2.54, ppl=5.82, wps=5788.5, ups=0.09, wpb=64792, bsz=128, num_updates=14823, lr=9.98894e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=170035 2021-06-20 17:52:52 | INFO | train_inner | epoch 005: 2903 / 3002 loss=2.609, ppl=6.1, wps=5824.3, ups=0.09, wpb=64743, bsz=128, num_updates=14824, lr=9.98894e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=170046 2021-06-20 17:53:03 | INFO | train_inner | epoch 005: 2904 / 3002 loss=2.494, ppl=5.63, wps=5877.5, ups=0.09, wpb=64775, bsz=128, num_updates=14825, lr=9.98894e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=170057 2021-06-20 17:53:14 | INFO | train_inner | epoch 005: 2905 / 3002 loss=2.41, ppl=5.31, wps=5859, ups=0.09, wpb=64804, bsz=128, num_updates=14826, lr=9.98894e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=170068 2021-06-20 17:53:25 | INFO | train_inner | epoch 005: 2906 / 3002 loss=2.706, ppl=6.52, wps=5747.4, ups=0.09, wpb=64800, bsz=128, num_updates=14827, lr=9.98894e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=170080 2021-06-20 17:53:36 | INFO | train_inner | epoch 005: 2907 / 3002 loss=2.497, ppl=5.65, wps=5891.3, ups=0.09, wpb=64849, bsz=128, num_updates=14828, lr=9.98894e-05, gnorm=2.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=170091 2021-06-20 17:53:47 | INFO | train_inner | epoch 005: 2908 / 3002 loss=2.44, ppl=5.42, wps=5776.4, ups=0.09, wpb=64754, bsz=128, num_updates=14829, lr=9.98894e-05, gnorm=1.895, loss_scale=8, train_wall=11, gb_free=2.8, wall=170102 2021-06-20 17:53:58 | INFO | train_inner | epoch 005: 2909 / 3002 loss=2.574, ppl=5.95, wps=5905.4, ups=0.09, wpb=64782, bsz=128, num_updates=14830, lr=9.98894e-05, gnorm=1.932, loss_scale=8, train_wall=11, gb_free=2.8, wall=170113 2021-06-20 17:54:10 | INFO | train_inner | epoch 005: 2910 / 3002 loss=2.478, ppl=5.57, wps=5797.6, ups=0.09, wpb=64676, bsz=128, num_updates=14831, lr=9.98893e-05, gnorm=1.867, loss_scale=8, train_wall=11, gb_free=2.8, wall=170124 2021-06-20 17:54:21 | INFO | train_inner | epoch 005: 2911 / 3002 loss=2.548, ppl=5.85, wps=5748.6, ups=0.09, wpb=64815, bsz=128, num_updates=14832, lr=9.98893e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=170135 2021-06-20 17:54:32 | INFO | train_inner | epoch 005: 2912 / 3002 loss=2.481, ppl=5.58, wps=5845.3, ups=0.09, wpb=64819, bsz=128, num_updates=14833, lr=9.98893e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=170146 2021-06-20 17:54:43 | INFO | train_inner | epoch 005: 2913 / 3002 loss=2.419, ppl=5.35, wps=5848.1, ups=0.09, wpb=64911, bsz=128, num_updates=14834, lr=9.98893e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=170157 2021-06-20 17:54:54 | INFO | train_inner | epoch 005: 2914 / 3002 loss=2.576, ppl=5.96, wps=6017.1, ups=0.09, wpb=64888, bsz=128, num_updates=14835, lr=9.98893e-05, gnorm=1.926, loss_scale=8, train_wall=10, gb_free=2.8, wall=170168 2021-06-20 17:55:05 | INFO | train_inner | epoch 005: 2915 / 3002 loss=2.506, ppl=5.68, wps=5863, ups=0.09, wpb=64931, bsz=128, num_updates=14836, lr=9.98893e-05, gnorm=2.117, loss_scale=8, train_wall=11, gb_free=2.8, wall=170179 2021-06-20 17:55:16 | INFO | train_inner | epoch 005: 2916 / 3002 loss=2.617, ppl=6.14, wps=5919.1, ups=0.09, wpb=64778, bsz=128, num_updates=14837, lr=9.98893e-05, gnorm=1.96, loss_scale=8, train_wall=10, gb_free=2.8, wall=170190 2021-06-20 17:55:27 | INFO | train_inner | epoch 005: 2917 / 3002 loss=2.544, ppl=5.83, wps=5998, ups=0.09, wpb=64846, bsz=128, num_updates=14838, lr=9.98893e-05, gnorm=1.962, loss_scale=8, train_wall=10, gb_free=2.8, wall=170201 2021-06-20 17:55:38 | INFO | train_inner | epoch 005: 2918 / 3002 loss=2.569, ppl=5.93, wps=5780.4, ups=0.09, wpb=64767, bsz=128, num_updates=14839, lr=9.98893e-05, gnorm=1.894, loss_scale=8, train_wall=11, gb_free=2.8, wall=170212 2021-06-20 17:55:49 | INFO | train_inner | epoch 005: 2919 / 3002 loss=2.546, ppl=5.84, wps=5772.5, ups=0.09, wpb=64765, bsz=128, num_updates=14840, lr=9.98893e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=170223 2021-06-20 17:56:00 | INFO | train_inner | epoch 005: 2920 / 3002 loss=2.532, ppl=5.78, wps=5884.1, ups=0.09, wpb=64895, bsz=128, num_updates=14841, lr=9.98893e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=170234 2021-06-20 17:56:11 | INFO | train_inner | epoch 005: 2921 / 3002 loss=2.552, ppl=5.86, wps=5975.4, ups=0.09, wpb=64825, bsz=128, num_updates=14842, lr=9.98893e-05, gnorm=2.09, loss_scale=8, train_wall=10, gb_free=2.8, wall=170245 2021-06-20 17:56:22 | INFO | train_inner | epoch 005: 2922 / 3002 loss=2.471, ppl=5.54, wps=5754.4, ups=0.09, wpb=64816, bsz=128, num_updates=14843, lr=9.98892e-05, gnorm=1.919, loss_scale=8, train_wall=11, gb_free=2.8, wall=170257 2021-06-20 17:56:33 | INFO | train_inner | epoch 005: 2923 / 3002 loss=2.433, ppl=5.4, wps=5823.1, ups=0.09, wpb=64843, bsz=128, num_updates=14844, lr=9.98892e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=170268 2021-06-20 17:56:44 | INFO | train_inner | epoch 005: 2924 / 3002 loss=2.491, ppl=5.62, wps=5868.4, ups=0.09, wpb=64887, bsz=128, num_updates=14845, lr=9.98892e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=170279 2021-06-20 17:56:55 | INFO | train_inner | epoch 005: 2925 / 3002 loss=2.501, ppl=5.66, wps=6002, ups=0.09, wpb=64845, bsz=128, num_updates=14846, lr=9.98892e-05, gnorm=1.989, loss_scale=8, train_wall=10, gb_free=2.8, wall=170290 2021-06-20 17:57:06 | INFO | train_inner | epoch 005: 2926 / 3002 loss=2.498, ppl=5.65, wps=5850, ups=0.09, wpb=64880, bsz=128, num_updates=14847, lr=9.98892e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=170301 2021-06-20 17:57:17 | INFO | train_inner | epoch 005: 2927 / 3002 loss=2.481, ppl=5.58, wps=5800.5, ups=0.09, wpb=64863, bsz=128, num_updates=14848, lr=9.98892e-05, gnorm=1.865, loss_scale=8, train_wall=11, gb_free=2.8, wall=170312 2021-06-20 17:57:28 | INFO | train_inner | epoch 005: 2928 / 3002 loss=2.666, ppl=6.35, wps=5933.9, ups=0.09, wpb=64850, bsz=128, num_updates=14849, lr=9.98892e-05, gnorm=1.998, loss_scale=8, train_wall=10, gb_free=2.8, wall=170323 2021-06-20 17:57:39 | INFO | train_inner | epoch 005: 2929 / 3002 loss=2.404, ppl=5.29, wps=5904.6, ups=0.09, wpb=64857, bsz=128, num_updates=14850, lr=9.98892e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=170334 2021-06-20 17:57:51 | INFO | train_inner | epoch 005: 2930 / 3002 loss=2.466, ppl=5.53, wps=5818.5, ups=0.09, wpb=64801, bsz=128, num_updates=14851, lr=9.98892e-05, gnorm=1.902, loss_scale=8, train_wall=11, gb_free=2.8, wall=170345 2021-06-20 17:58:01 | INFO | train_inner | epoch 005: 2931 / 3002 loss=2.478, ppl=5.57, wps=5941, ups=0.09, wpb=64865, bsz=128, num_updates=14852, lr=9.98892e-05, gnorm=1.943, loss_scale=8, train_wall=10, gb_free=2.8, wall=170356 2021-06-20 17:58:13 | INFO | train_inner | epoch 005: 2932 / 3002 loss=2.409, ppl=5.31, wps=5756.9, ups=0.09, wpb=64819, bsz=128, num_updates=14853, lr=9.98892e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=170367 2021-06-20 17:58:24 | INFO | train_inner | epoch 005: 2933 / 3002 loss=2.448, ppl=5.46, wps=5929.4, ups=0.09, wpb=64736, bsz=128, num_updates=14854, lr=9.98892e-05, gnorm=1.872, loss_scale=8, train_wall=10, gb_free=2.8, wall=170378 2021-06-20 17:58:35 | INFO | train_inner | epoch 005: 2934 / 3002 loss=2.582, ppl=5.99, wps=5895.9, ups=0.09, wpb=64823, bsz=128, num_updates=14855, lr=9.98892e-05, gnorm=1.967, loss_scale=8, train_wall=10, gb_free=2.8, wall=170389 2021-06-20 17:58:46 | INFO | train_inner | epoch 005: 2935 / 3002 loss=2.55, ppl=5.86, wps=5862.1, ups=0.09, wpb=64880, bsz=128, num_updates=14856, lr=9.98891e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=170400 2021-06-20 17:58:57 | INFO | train_inner | epoch 005: 2936 / 3002 loss=2.482, ppl=5.59, wps=5842.7, ups=0.09, wpb=64850, bsz=128, num_updates=14857, lr=9.98891e-05, gnorm=2.068, loss_scale=8, train_wall=11, gb_free=2.8, wall=170411 2021-06-20 17:59:08 | INFO | train_inner | epoch 005: 2937 / 3002 loss=2.386, ppl=5.23, wps=5696.6, ups=0.09, wpb=64830, bsz=128, num_updates=14858, lr=9.98891e-05, gnorm=1.914, loss_scale=8, train_wall=11, gb_free=2.8, wall=170422 2021-06-20 17:59:19 | INFO | train_inner | epoch 005: 2938 / 3002 loss=2.486, ppl=5.6, wps=5812.3, ups=0.09, wpb=64865, bsz=128, num_updates=14859, lr=9.98891e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=170434 2021-06-20 17:59:31 | INFO | train_inner | epoch 005: 2939 / 3002 loss=2.577, ppl=5.97, wps=5738.3, ups=0.09, wpb=64758, bsz=128, num_updates=14860, lr=9.98891e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=170445 2021-06-20 17:59:42 | INFO | train_inner | epoch 005: 2940 / 3002 loss=2.391, ppl=5.24, wps=5918.8, ups=0.09, wpb=64792, bsz=128, num_updates=14861, lr=9.98891e-05, gnorm=1.946, loss_scale=8, train_wall=10, gb_free=2.8, wall=170456 2021-06-20 17:59:53 | INFO | train_inner | epoch 005: 2941 / 3002 loss=2.499, ppl=5.65, wps=5743.6, ups=0.09, wpb=64871, bsz=128, num_updates=14862, lr=9.98891e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=170467 2021-06-20 18:00:04 | INFO | train_inner | epoch 005: 2942 / 3002 loss=2.588, ppl=6.01, wps=5812.1, ups=0.09, wpb=64812, bsz=128, num_updates=14863, lr=9.98891e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=170478 2021-06-20 18:00:15 | INFO | train_inner | epoch 005: 2943 / 3002 loss=2.546, ppl=5.84, wps=5873.8, ups=0.09, wpb=64857, bsz=128, num_updates=14864, lr=9.98891e-05, gnorm=2.137, loss_scale=8, train_wall=11, gb_free=2.8, wall=170489 2021-06-20 18:00:26 | INFO | train_inner | epoch 005: 2944 / 3002 loss=2.574, ppl=5.96, wps=5849.5, ups=0.09, wpb=64834, bsz=128, num_updates=14865, lr=9.98891e-05, gnorm=2.063, loss_scale=8, train_wall=11, gb_free=2.8, wall=170500 2021-06-20 18:00:37 | INFO | train_inner | epoch 005: 2945 / 3002 loss=2.344, ppl=5.08, wps=5823.9, ups=0.09, wpb=64785, bsz=128, num_updates=14866, lr=9.98891e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=170512 2021-06-20 18:00:48 | INFO | train_inner | epoch 005: 2946 / 3002 loss=2.538, ppl=5.81, wps=5899.6, ups=0.09, wpb=64829, bsz=128, num_updates=14867, lr=9.98891e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=170523 2021-06-20 18:00:59 | INFO | train_inner | epoch 005: 2947 / 3002 loss=2.565, ppl=5.92, wps=5994.6, ups=0.09, wpb=64935, bsz=128, num_updates=14868, lr=9.9889e-05, gnorm=1.973, loss_scale=8, train_wall=10, gb_free=2.8, wall=170533 2021-06-20 18:01:10 | INFO | train_inner | epoch 005: 2948 / 3002 loss=2.595, ppl=6.04, wps=5843.9, ups=0.09, wpb=64760, bsz=128, num_updates=14869, lr=9.9889e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=170544 2021-06-20 18:01:21 | INFO | train_inner | epoch 005: 2949 / 3002 loss=2.458, ppl=5.5, wps=5877.5, ups=0.09, wpb=64767, bsz=128, num_updates=14870, lr=9.9889e-05, gnorm=1.884, loss_scale=8, train_wall=11, gb_free=2.8, wall=170555 2021-06-20 18:01:32 | INFO | train_inner | epoch 005: 2950 / 3002 loss=2.498, ppl=5.65, wps=5794.9, ups=0.09, wpb=64880, bsz=128, num_updates=14871, lr=9.9889e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=170567 2021-06-20 18:01:44 | INFO | train_inner | epoch 005: 2951 / 3002 loss=2.368, ppl=5.16, wps=5773.5, ups=0.09, wpb=64823, bsz=128, num_updates=14872, lr=9.9889e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=170578 2021-06-20 18:01:55 | INFO | train_inner | epoch 005: 2952 / 3002 loss=2.457, ppl=5.49, wps=5820.4, ups=0.09, wpb=64805, bsz=128, num_updates=14873, lr=9.9889e-05, gnorm=2.026, loss_scale=8, train_wall=11, gb_free=2.8, wall=170589 2021-06-20 18:02:06 | INFO | train_inner | epoch 005: 2953 / 3002 loss=2.579, ppl=5.98, wps=5852.8, ups=0.09, wpb=64764, bsz=128, num_updates=14874, lr=9.9889e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=170600 2021-06-20 18:02:17 | INFO | train_inner | epoch 005: 2954 / 3002 loss=2.692, ppl=6.46, wps=5744, ups=0.09, wpb=64817, bsz=128, num_updates=14875, lr=9.9889e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=170611 2021-06-20 18:02:28 | INFO | train_inner | epoch 005: 2955 / 3002 loss=2.45, ppl=5.46, wps=5694.8, ups=0.09, wpb=64815, bsz=128, num_updates=14876, lr=9.9889e-05, gnorm=2.066, loss_scale=8, train_wall=11, gb_free=2.8, wall=170623 2021-06-20 18:02:40 | INFO | train_inner | epoch 005: 2956 / 3002 loss=2.609, ppl=6.1, wps=5767.5, ups=0.09, wpb=64793, bsz=128, num_updates=14877, lr=9.9889e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=170634 2021-06-20 18:02:51 | INFO | train_inner | epoch 005: 2957 / 3002 loss=2.599, ppl=6.06, wps=5912, ups=0.09, wpb=64890, bsz=128, num_updates=14878, lr=9.9889e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=170645 2021-06-20 18:03:02 | INFO | train_inner | epoch 005: 2958 / 3002 loss=2.589, ppl=6.02, wps=5837, ups=0.09, wpb=64781, bsz=128, num_updates=14879, lr=9.9889e-05, gnorm=1.836, loss_scale=8, train_wall=11, gb_free=2.8, wall=170656 2021-06-20 18:03:13 | INFO | train_inner | epoch 005: 2959 / 3002 loss=2.611, ppl=6.11, wps=5754.1, ups=0.09, wpb=64879, bsz=128, num_updates=14880, lr=9.9889e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=170667 2021-06-20 18:03:24 | INFO | train_inner | epoch 005: 2960 / 3002 loss=2.555, ppl=5.87, wps=5795.1, ups=0.09, wpb=64772, bsz=128, num_updates=14881, lr=9.98889e-05, gnorm=1.89, loss_scale=8, train_wall=11, gb_free=2.8, wall=170679 2021-06-20 18:03:35 | INFO | train_inner | epoch 005: 2961 / 3002 loss=2.682, ppl=6.42, wps=5775.6, ups=0.09, wpb=64763, bsz=128, num_updates=14882, lr=9.98889e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=170690 2021-06-20 18:03:47 | INFO | train_inner | epoch 005: 2962 / 3002 loss=2.418, ppl=5.35, wps=5820.9, ups=0.09, wpb=64889, bsz=128, num_updates=14883, lr=9.98889e-05, gnorm=1.862, loss_scale=8, train_wall=11, gb_free=2.8, wall=170701 2021-06-20 18:03:58 | INFO | train_inner | epoch 005: 2963 / 3002 loss=2.396, ppl=5.26, wps=5890.7, ups=0.09, wpb=64804, bsz=128, num_updates=14884, lr=9.98889e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=170712 2021-06-20 18:04:09 | INFO | train_inner | epoch 005: 2964 / 3002 loss=2.512, ppl=5.7, wps=5808.5, ups=0.09, wpb=64816, bsz=128, num_updates=14885, lr=9.98889e-05, gnorm=2.008, loss_scale=8, train_wall=11, gb_free=2.8, wall=170723 2021-06-20 18:04:20 | INFO | train_inner | epoch 005: 2965 / 3002 loss=2.555, ppl=5.88, wps=5759.5, ups=0.09, wpb=64732, bsz=128, num_updates=14886, lr=9.98889e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=170734 2021-06-20 18:04:31 | INFO | train_inner | epoch 005: 2966 / 3002 loss=2.38, ppl=5.2, wps=5763.6, ups=0.09, wpb=64876, bsz=128, num_updates=14887, lr=9.98889e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=170746 2021-06-20 18:04:42 | INFO | train_inner | epoch 005: 2967 / 3002 loss=2.592, ppl=6.03, wps=5781.4, ups=0.09, wpb=64769, bsz=128, num_updates=14888, lr=9.98889e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=170757 2021-06-20 18:04:54 | INFO | train_inner | epoch 005: 2968 / 3002 loss=2.499, ppl=5.65, wps=5787.7, ups=0.09, wpb=64871, bsz=128, num_updates=14889, lr=9.98889e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=170768 2021-06-20 18:05:05 | INFO | train_inner | epoch 005: 2969 / 3002 loss=2.439, ppl=5.42, wps=5792.5, ups=0.09, wpb=64873, bsz=128, num_updates=14890, lr=9.98889e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=170779 2021-06-20 18:05:16 | INFO | train_inner | epoch 005: 2970 / 3002 loss=2.53, ppl=5.77, wps=5977.4, ups=0.09, wpb=64961, bsz=128, num_updates=14891, lr=9.98889e-05, gnorm=1.98, loss_scale=8, train_wall=10, gb_free=2.8, wall=170790 2021-06-20 18:05:27 | INFO | train_inner | epoch 005: 2971 / 3002 loss=2.378, ppl=5.2, wps=5861, ups=0.09, wpb=64850, bsz=128, num_updates=14892, lr=9.98889e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=170801 2021-06-20 18:05:38 | INFO | train_inner | epoch 005: 2972 / 3002 loss=2.469, ppl=5.54, wps=5931.2, ups=0.09, wpb=64851, bsz=128, num_updates=14893, lr=9.98888e-05, gnorm=1.869, loss_scale=8, train_wall=10, gb_free=2.8, wall=170812 2021-06-20 18:05:49 | INFO | train_inner | epoch 005: 2973 / 3002 loss=2.465, ppl=5.52, wps=5929.9, ups=0.09, wpb=64817, bsz=128, num_updates=14894, lr=9.98888e-05, gnorm=1.891, loss_scale=8, train_wall=10, gb_free=2.8, wall=170823 2021-06-20 18:06:00 | INFO | train_inner | epoch 005: 2974 / 3002 loss=2.417, ppl=5.34, wps=5776.4, ups=0.09, wpb=64793, bsz=128, num_updates=14895, lr=9.98888e-05, gnorm=1.885, loss_scale=8, train_wall=11, gb_free=2.8, wall=170834 2021-06-20 18:06:11 | INFO | train_inner | epoch 005: 2975 / 3002 loss=2.41, ppl=5.31, wps=5776.4, ups=0.09, wpb=64804, bsz=128, num_updates=14896, lr=9.98888e-05, gnorm=2.059, loss_scale=8, train_wall=11, gb_free=2.8, wall=170845 2021-06-20 18:06:22 | INFO | train_inner | epoch 005: 2976 / 3002 loss=2.551, ppl=5.86, wps=5868.2, ups=0.09, wpb=64860, bsz=128, num_updates=14897, lr=9.98888e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=170856 2021-06-20 18:06:33 | INFO | train_inner | epoch 005: 2977 / 3002 loss=2.455, ppl=5.48, wps=5749.7, ups=0.09, wpb=64779, bsz=128, num_updates=14898, lr=9.98888e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=170868 2021-06-20 18:06:44 | INFO | train_inner | epoch 005: 2978 / 3002 loss=2.615, ppl=6.13, wps=6000.7, ups=0.09, wpb=64875, bsz=128, num_updates=14899, lr=9.98888e-05, gnorm=1.888, loss_scale=8, train_wall=10, gb_free=2.8, wall=170879 2021-06-20 18:06:55 | INFO | train_inner | epoch 005: 2979 / 3002 loss=2.474, ppl=5.56, wps=5741.5, ups=0.09, wpb=64806, bsz=128, num_updates=14900, lr=9.98888e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=170890 2021-06-20 18:07:07 | INFO | train_inner | epoch 005: 2980 / 3002 loss=2.56, ppl=5.9, wps=5731.3, ups=0.09, wpb=64692, bsz=128, num_updates=14901, lr=9.98888e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=170901 2021-06-20 18:07:18 | INFO | train_inner | epoch 005: 2981 / 3002 loss=2.456, ppl=5.49, wps=5829.1, ups=0.09, wpb=64878, bsz=128, num_updates=14902, lr=9.98888e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=170912 2021-06-20 18:07:29 | INFO | train_inner | epoch 005: 2982 / 3002 loss=2.479, ppl=5.57, wps=5834.1, ups=0.09, wpb=64850, bsz=128, num_updates=14903, lr=9.98888e-05, gnorm=2.041, loss_scale=8, train_wall=11, gb_free=2.8, wall=170923 2021-06-20 18:07:40 | INFO | train_inner | epoch 005: 2983 / 3002 loss=2.417, ppl=5.34, wps=5847.1, ups=0.09, wpb=64823, bsz=128, num_updates=14904, lr=9.98888e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=170934 2021-06-20 18:07:51 | INFO | train_inner | epoch 005: 2984 / 3002 loss=2.599, ppl=6.06, wps=5880.8, ups=0.09, wpb=64834, bsz=128, num_updates=14905, lr=9.98888e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=170945 2021-06-20 18:08:02 | INFO | train_inner | epoch 005: 2985 / 3002 loss=2.488, ppl=5.61, wps=5885.3, ups=0.09, wpb=64829, bsz=128, num_updates=14906, lr=9.98887e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=170956 2021-06-20 18:08:13 | INFO | train_inner | epoch 005: 2986 / 3002 loss=2.371, ppl=5.17, wps=5795, ups=0.09, wpb=64887, bsz=128, num_updates=14907, lr=9.98887e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=170968 2021-06-20 18:08:24 | INFO | train_inner | epoch 005: 2987 / 3002 loss=2.445, ppl=5.45, wps=5907.7, ups=0.09, wpb=64842, bsz=128, num_updates=14908, lr=9.98887e-05, gnorm=1.875, loss_scale=8, train_wall=10, gb_free=2.8, wall=170979 2021-06-20 18:08:36 | INFO | train_inner | epoch 005: 2988 / 3002 loss=2.493, ppl=5.63, wps=5796.2, ups=0.09, wpb=64784, bsz=128, num_updates=14909, lr=9.98887e-05, gnorm=2.045, loss_scale=16, train_wall=11, gb_free=2.8, wall=170990 2021-06-20 18:08:46 | INFO | train_inner | epoch 005: 2989 / 3002 loss=2.461, ppl=5.51, wps=5899.7, ups=0.09, wpb=64784, bsz=128, num_updates=14910, lr=9.98887e-05, gnorm=1.95, loss_scale=16, train_wall=11, gb_free=2.8, wall=171001 2021-06-20 18:08:58 | INFO | train_inner | epoch 005: 2990 / 3002 loss=2.483, ppl=5.59, wps=5850.1, ups=0.09, wpb=64873, bsz=128, num_updates=14911, lr=9.98887e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=171012 2021-06-20 18:09:09 | INFO | train_inner | epoch 005: 2991 / 3002 loss=2.559, ppl=5.89, wps=5801.8, ups=0.09, wpb=64831, bsz=128, num_updates=14912, lr=9.98887e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=171023 2021-06-20 18:09:19 | INFO | train_inner | epoch 005: 2992 / 3002 loss=2.477, ppl=5.57, wps=6068.4, ups=0.09, wpb=64839, bsz=128, num_updates=14913, lr=9.98887e-05, gnorm=2.298, loss_scale=16, train_wall=10, gb_free=2.8, wall=171034 2021-06-20 18:09:30 | INFO | train_inner | epoch 005: 2993 / 3002 loss=2.438, ppl=5.42, wps=5889.5, ups=0.09, wpb=64873, bsz=128, num_updates=14914, lr=9.98887e-05, gnorm=1.896, loss_scale=16, train_wall=11, gb_free=2.8, wall=171045 2021-06-20 18:09:41 | INFO | train_inner | epoch 005: 2994 / 3002 loss=2.561, ppl=5.9, wps=5873.7, ups=0.09, wpb=64832, bsz=128, num_updates=14915, lr=9.98887e-05, gnorm=1.963, loss_scale=16, train_wall=11, gb_free=2.8, wall=171056 2021-06-20 18:09:53 | INFO | train_inner | epoch 005: 2995 / 3002 loss=2.46, ppl=5.5, wps=5877.7, ups=0.09, wpb=64901, bsz=128, num_updates=14916, lr=9.98887e-05, gnorm=1.886, loss_scale=16, train_wall=11, gb_free=2.8, wall=171067 2021-06-20 18:10:04 | INFO | train_inner | epoch 005: 2996 / 3002 loss=2.555, ppl=5.88, wps=5743.2, ups=0.09, wpb=64728, bsz=128, num_updates=14917, lr=9.98887e-05, gnorm=1.855, loss_scale=16, train_wall=11, gb_free=2.8, wall=171078 2021-06-20 18:10:15 | INFO | train_inner | epoch 005: 2997 / 3002 loss=2.351, ppl=5.1, wps=5783.6, ups=0.09, wpb=64829, bsz=128, num_updates=14918, lr=9.98886e-05, gnorm=2.275, loss_scale=16, train_wall=11, gb_free=2.8, wall=171089 2021-06-20 18:10:26 | INFO | train_inner | epoch 005: 2998 / 3002 loss=2.497, ppl=5.65, wps=5810.3, ups=0.09, wpb=64818, bsz=128, num_updates=14919, lr=9.98886e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=171100 2021-06-20 18:10:37 | INFO | train_inner | epoch 005: 2999 / 3002 loss=2.355, ppl=5.11, wps=5789.3, ups=0.09, wpb=64852, bsz=128, num_updates=14920, lr=9.98886e-05, gnorm=1.863, loss_scale=16, train_wall=11, gb_free=2.8, wall=171112 2021-06-20 18:10:48 | INFO | train_inner | epoch 005: 3000 / 3002 loss=2.45, ppl=5.46, wps=5932.6, ups=0.09, wpb=64907, bsz=128, num_updates=14921, lr=9.98886e-05, gnorm=1.937, loss_scale=16, train_wall=10, gb_free=2.8, wall=171123 2021-06-20 18:10:59 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 18:11:06 | INFO | train_inner | epoch 005: 3002 / 3002 loss=2.4, ppl=5.28, wps=2104.2, ups=0.06, wpb=36411, bsz=72, num_updates=14922, lr=9.98886e-05, gnorm=2.57, loss_scale=8, train_wall=17, gb_free=2.8, wall=171140 2021-06-20 18:11:06 | INFO | fairseq_cli.train | begin validation on "valid" subset 2021-06-20 18:26:02 | INFO | valid | epoch 005 | valid on 'valid' subset | loss 2.365 | ppl 5.15 | wps 19686.9 | wpb 506.5 | bsz 1 | num_updates 14922 | best_loss 2.365 2021-06-20 18:26:02 | INFO | fairseq.checkpoint_utils | Preparing to save checkpoint for epoch 5 @ 14922 updates 2021-06-20 18:26:02 | INFO | fairseq.trainer | Saving checkpoint to checkpoints/checkpoint5.pt 2021-06-20 18:26:16 | INFO | fairseq.trainer | Finished saving checkpoint to checkpoints/checkpoint5.pt 2021-06-20 18:33:20 | INFO | fairseq.checkpoint_utils | Saved checkpoint checkpoints/checkpoint5.pt (epoch 5 @ 14922 updates, score 2.365) (writing took 437.672937018011 seconds) 2021-06-20 18:33:20 | INFO | fairseq_cli.train | end of epoch 5 (average epoch stats below) 2021-06-20 18:33:20 | INFO | train | epoch 005 | loss 2.514 | ppl 5.71 | wps 5586.4 | ups 0.09 | wpb 64819.4 | bsz 128 | num_updates 14922 | lr 9.98886e-05 | gnorm 2.04 | loss_scale 8 | train_wall 31919 | gb_free 2.8 | wall 172474 2021-06-20 18:33:20 | INFO | fairseq.trainer | begin training epoch 6 2021-06-20 18:33:20 | INFO | fairseq_cli.train | Start iterating over samples 2021-06-20 18:33:31 | INFO | train_inner | epoch 006: 1 / 3002 loss=2.387, ppl=5.23, wps=48.2, ups=0, wpb=64865, bsz=128, num_updates=14923, lr=9.98886e-05, gnorm=1.938, loss_scale=8, train_wall=10, gb_free=2.8, wall=172485 2021-06-20 18:33:41 | INFO | train_inner | epoch 006: 2 / 3002 loss=2.497, ppl=5.65, wps=6178.9, ups=0.1, wpb=64767, bsz=128, num_updates=14924, lr=9.98886e-05, gnorm=2.015, loss_scale=8, train_wall=10, gb_free=2.8, wall=172495 2021-06-20 18:33:52 | INFO | train_inner | epoch 006: 3 / 3002 loss=2.383, ppl=5.22, wps=6143.6, ups=0.09, wpb=64876, bsz=128, num_updates=14925, lr=9.98886e-05, gnorm=1.929, loss_scale=8, train_wall=10, gb_free=2.8, wall=172506 2021-06-20 18:34:02 | INFO | train_inner | epoch 006: 4 / 3002 loss=2.434, ppl=5.4, wps=6198.3, ups=0.1, wpb=64813, bsz=128, num_updates=14926, lr=9.98886e-05, gnorm=1.927, loss_scale=8, train_wall=10, gb_free=2.8, wall=172516 2021-06-20 18:34:12 | INFO | train_inner | epoch 006: 5 / 3002 loss=2.499, ppl=5.65, wps=6257.8, ups=0.1, wpb=64930, bsz=128, num_updates=14927, lr=9.98886e-05, gnorm=1.999, loss_scale=8, train_wall=10, gb_free=2.8, wall=172527 2021-06-20 18:34:23 | INFO | train_inner | epoch 006: 6 / 3002 loss=2.429, ppl=5.38, wps=6183, ups=0.1, wpb=64762, bsz=128, num_updates=14928, lr=9.98886e-05, gnorm=2.012, loss_scale=8, train_wall=10, gb_free=2.8, wall=172537 2021-06-20 18:34:34 | INFO | train_inner | epoch 006: 7 / 3002 loss=2.434, ppl=5.4, wps=5980.6, ups=0.09, wpb=64818, bsz=128, num_updates=14929, lr=9.98886e-05, gnorm=1.92, loss_scale=8, train_wall=10, gb_free=2.8, wall=172548 2021-06-20 18:34:44 | INFO | train_inner | epoch 006: 8 / 3002 loss=2.39, ppl=5.24, wps=6060.8, ups=0.09, wpb=64847, bsz=128, num_updates=14930, lr=9.98886e-05, gnorm=1.939, loss_scale=8, train_wall=10, gb_free=2.8, wall=172559 2021-06-20 18:34:55 | INFO | train_inner | epoch 006: 9 / 3002 loss=2.541, ppl=5.82, wps=6087.3, ups=0.09, wpb=64858, bsz=128, num_updates=14931, lr=9.98885e-05, gnorm=1.912, loss_scale=8, train_wall=10, gb_free=2.8, wall=172569 2021-06-20 18:35:06 | INFO | train_inner | epoch 006: 10 / 3002 loss=2.42, ppl=5.35, wps=6016.6, ups=0.09, wpb=64830, bsz=128, num_updates=14932, lr=9.98885e-05, gnorm=1.918, loss_scale=8, train_wall=10, gb_free=2.8, wall=172580 2021-06-20 18:35:17 | INFO | train_inner | epoch 006: 11 / 3002 loss=2.475, ppl=5.56, wps=6078.8, ups=0.09, wpb=64808, bsz=128, num_updates=14933, lr=9.98885e-05, gnorm=1.903, loss_scale=8, train_wall=10, gb_free=2.8, wall=172591 2021-06-20 18:35:28 | INFO | train_inner | epoch 006: 12 / 3002 loss=2.456, ppl=5.49, wps=5854.3, ups=0.09, wpb=64878, bsz=128, num_updates=14934, lr=9.98885e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=172602 2021-06-20 18:35:39 | INFO | train_inner | epoch 006: 13 / 3002 loss=2.596, ppl=6.05, wps=5918.6, ups=0.09, wpb=64881, bsz=128, num_updates=14935, lr=9.98885e-05, gnorm=2.055, loss_scale=8, train_wall=10, gb_free=2.8, wall=172613 2021-06-20 18:35:50 | INFO | train_inner | epoch 006: 14 / 3002 loss=2.489, ppl=5.61, wps=5884.9, ups=0.09, wpb=64772, bsz=128, num_updates=14936, lr=9.98885e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=172624 2021-06-20 18:36:00 | INFO | train_inner | epoch 006: 15 / 3002 loss=2.531, ppl=5.78, wps=5974.3, ups=0.09, wpb=64763, bsz=128, num_updates=14937, lr=9.98885e-05, gnorm=1.999, loss_scale=8, train_wall=10, gb_free=2.8, wall=172635 2021-06-20 18:36:11 | INFO | train_inner | epoch 006: 16 / 3002 loss=2.598, ppl=6.05, wps=6071.4, ups=0.09, wpb=64804, bsz=128, num_updates=14938, lr=9.98885e-05, gnorm=1.998, loss_scale=8, train_wall=10, gb_free=2.8, wall=172645 2021-06-20 18:36:22 | INFO | train_inner | epoch 006: 17 / 3002 loss=2.477, ppl=5.57, wps=5943.7, ups=0.09, wpb=64817, bsz=128, num_updates=14939, lr=9.98885e-05, gnorm=2.129, loss_scale=8, train_wall=10, gb_free=2.8, wall=172656 2021-06-20 18:36:33 | INFO | train_inner | epoch 006: 18 / 3002 loss=2.427, ppl=5.38, wps=5896, ups=0.09, wpb=64718, bsz=128, num_updates=14940, lr=9.98885e-05, gnorm=2.012, loss_scale=8, train_wall=11, gb_free=2.8, wall=172667 2021-06-20 18:36:44 | INFO | train_inner | epoch 006: 19 / 3002 loss=2.493, ppl=5.63, wps=5891.1, ups=0.09, wpb=64732, bsz=128, num_updates=14941, lr=9.98885e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=172678 2021-06-20 18:36:55 | INFO | train_inner | epoch 006: 20 / 3002 loss=2.657, ppl=6.31, wps=5857.2, ups=0.09, wpb=64799, bsz=128, num_updates=14942, lr=9.98885e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=172689 2021-06-20 18:37:06 | INFO | train_inner | epoch 006: 21 / 3002 loss=2.554, ppl=5.87, wps=5729.6, ups=0.09, wpb=64838, bsz=128, num_updates=14943, lr=9.98884e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=172701 2021-06-20 18:37:18 | INFO | train_inner | epoch 006: 22 / 3002 loss=2.328, ppl=5.02, wps=5764.1, ups=0.09, wpb=64872, bsz=128, num_updates=14944, lr=9.98884e-05, gnorm=1.88, loss_scale=8, train_wall=11, gb_free=2.8, wall=172712 2021-06-20 18:37:29 | INFO | train_inner | epoch 006: 23 / 3002 loss=2.537, ppl=5.8, wps=5838.1, ups=0.09, wpb=64789, bsz=128, num_updates=14945, lr=9.98884e-05, gnorm=1.915, loss_scale=8, train_wall=11, gb_free=2.8, wall=172723 2021-06-20 18:37:40 | INFO | train_inner | epoch 006: 24 / 3002 loss=2.675, ppl=6.38, wps=5834, ups=0.09, wpb=64885, bsz=128, num_updates=14946, lr=9.98884e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=172734 2021-06-20 18:37:51 | INFO | train_inner | epoch 006: 25 / 3002 loss=2.487, ppl=5.61, wps=5872.9, ups=0.09, wpb=64816, bsz=128, num_updates=14947, lr=9.98884e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=172745 2021-06-20 18:38:02 | INFO | train_inner | epoch 006: 26 / 3002 loss=2.518, ppl=5.73, wps=5798.5, ups=0.09, wpb=64743, bsz=128, num_updates=14948, lr=9.98884e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=172756 2021-06-20 18:38:13 | INFO | train_inner | epoch 006: 27 / 3002 loss=2.407, ppl=5.3, wps=5778.3, ups=0.09, wpb=64740, bsz=128, num_updates=14949, lr=9.98884e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=172768 2021-06-20 18:38:24 | INFO | train_inner | epoch 006: 28 / 3002 loss=2.328, ppl=5.02, wps=5979.2, ups=0.09, wpb=64879, bsz=128, num_updates=14950, lr=9.98884e-05, gnorm=1.935, loss_scale=8, train_wall=10, gb_free=2.8, wall=172778 2021-06-20 18:38:35 | INFO | train_inner | epoch 006: 29 / 3002 loss=2.445, ppl=5.45, wps=5775.5, ups=0.09, wpb=64737, bsz=128, num_updates=14951, lr=9.98884e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=172790 2021-06-20 18:38:47 | INFO | train_inner | epoch 006: 30 / 3002 loss=2.561, ppl=5.9, wps=5753.5, ups=0.09, wpb=64794, bsz=128, num_updates=14952, lr=9.98884e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=172801 2021-06-20 18:38:58 | INFO | train_inner | epoch 006: 31 / 3002 loss=2.39, ppl=5.24, wps=5792.8, ups=0.09, wpb=64788, bsz=128, num_updates=14953, lr=9.98884e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=172812 2021-06-20 18:39:09 | INFO | train_inner | epoch 006: 32 / 3002 loss=2.498, ppl=5.65, wps=5764.9, ups=0.09, wpb=64881, bsz=128, num_updates=14954, lr=9.98884e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=172823 2021-06-20 18:39:20 | INFO | train_inner | epoch 006: 33 / 3002 loss=2.451, ppl=5.47, wps=5875.4, ups=0.09, wpb=64714, bsz=128, num_updates=14955, lr=9.98884e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=172834 2021-06-20 18:39:31 | INFO | train_inner | epoch 006: 34 / 3002 loss=2.51, ppl=5.7, wps=5893.5, ups=0.09, wpb=64763, bsz=128, num_updates=14956, lr=9.98883e-05, gnorm=2.091, loss_scale=8, train_wall=11, gb_free=2.8, wall=172845 2021-06-20 18:39:42 | INFO | train_inner | epoch 006: 35 / 3002 loss=2.425, ppl=5.37, wps=5883.2, ups=0.09, wpb=64773, bsz=128, num_updates=14957, lr=9.98883e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=172856 2021-06-20 18:39:53 | INFO | train_inner | epoch 006: 36 / 3002 loss=2.503, ppl=5.67, wps=5782.8, ups=0.09, wpb=64874, bsz=128, num_updates=14958, lr=9.98883e-05, gnorm=2.297, loss_scale=8, train_wall=11, gb_free=2.8, wall=172868 2021-06-20 18:40:05 | INFO | train_inner | epoch 006: 37 / 3002 loss=2.431, ppl=5.39, wps=5691.8, ups=0.09, wpb=64832, bsz=128, num_updates=14959, lr=9.98883e-05, gnorm=1.879, loss_scale=8, train_wall=11, gb_free=2.8, wall=172879 2021-06-20 18:40:16 | INFO | train_inner | epoch 006: 38 / 3002 loss=2.226, ppl=4.68, wps=5711.9, ups=0.09, wpb=64832, bsz=128, num_updates=14960, lr=9.98883e-05, gnorm=1.874, loss_scale=8, train_wall=11, gb_free=2.8, wall=172890 2021-06-20 18:40:27 | INFO | train_inner | epoch 006: 39 / 3002 loss=2.367, ppl=5.16, wps=5941.6, ups=0.09, wpb=64893, bsz=128, num_updates=14961, lr=9.98883e-05, gnorm=1.926, loss_scale=8, train_wall=10, gb_free=2.8, wall=172901 2021-06-20 18:40:38 | INFO | train_inner | epoch 006: 40 / 3002 loss=2.493, ppl=5.63, wps=5816.8, ups=0.09, wpb=64841, bsz=128, num_updates=14962, lr=9.98883e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=172912 2021-06-20 18:40:49 | INFO | train_inner | epoch 006: 41 / 3002 loss=2.346, ppl=5.08, wps=5806.7, ups=0.09, wpb=64904, bsz=128, num_updates=14963, lr=9.98883e-05, gnorm=1.859, loss_scale=8, train_wall=11, gb_free=2.8, wall=172924 2021-06-20 18:41:01 | INFO | train_inner | epoch 006: 42 / 3002 loss=2.488, ppl=5.61, wps=5744, ups=0.09, wpb=64809, bsz=128, num_updates=14964, lr=9.98883e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=172935 2021-06-20 18:41:12 | INFO | train_inner | epoch 006: 43 / 3002 loss=2.563, ppl=5.91, wps=5773, ups=0.09, wpb=64860, bsz=128, num_updates=14965, lr=9.98883e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=172946 2021-06-20 18:41:23 | INFO | train_inner | epoch 006: 44 / 3002 loss=2.647, ppl=6.26, wps=5817.4, ups=0.09, wpb=64844, bsz=128, num_updates=14966, lr=9.98883e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=172957 2021-06-20 18:41:34 | INFO | train_inner | epoch 006: 45 / 3002 loss=2.57, ppl=5.94, wps=5770.6, ups=0.09, wpb=64854, bsz=128, num_updates=14967, lr=9.98883e-05, gnorm=2.044, loss_scale=8, train_wall=11, gb_free=2.8, wall=172969 2021-06-20 18:41:45 | INFO | train_inner | epoch 006: 46 / 3002 loss=2.583, ppl=5.99, wps=5811.5, ups=0.09, wpb=64835, bsz=128, num_updates=14968, lr=9.98882e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=172980 2021-06-20 18:41:56 | INFO | train_inner | epoch 006: 47 / 3002 loss=2.487, ppl=5.61, wps=5830, ups=0.09, wpb=64788, bsz=128, num_updates=14969, lr=9.98882e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=172991 2021-06-20 18:42:08 | INFO | train_inner | epoch 006: 48 / 3002 loss=2.465, ppl=5.52, wps=5796.5, ups=0.09, wpb=64903, bsz=128, num_updates=14970, lr=9.98882e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=173002 2021-06-20 18:42:19 | INFO | train_inner | epoch 006: 49 / 3002 loss=2.493, ppl=5.63, wps=5782.2, ups=0.09, wpb=64790, bsz=128, num_updates=14971, lr=9.98882e-05, gnorm=1.868, loss_scale=8, train_wall=11, gb_free=2.8, wall=173013 2021-06-20 18:42:30 | INFO | train_inner | epoch 006: 50 / 3002 loss=2.575, ppl=5.96, wps=5684.7, ups=0.09, wpb=64785, bsz=128, num_updates=14972, lr=9.98882e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=173025 2021-06-20 18:42:41 | INFO | train_inner | epoch 006: 51 / 3002 loss=2.652, ppl=6.29, wps=5776.9, ups=0.09, wpb=64858, bsz=128, num_updates=14973, lr=9.98882e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=173036 2021-06-20 18:42:53 | INFO | train_inner | epoch 006: 52 / 3002 loss=2.44, ppl=5.43, wps=5653.8, ups=0.09, wpb=64882, bsz=128, num_updates=14974, lr=9.98882e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=173047 2021-06-20 18:43:04 | INFO | train_inner | epoch 006: 53 / 3002 loss=2.558, ppl=5.89, wps=5779.5, ups=0.09, wpb=64912, bsz=128, num_updates=14975, lr=9.98882e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=173059 2021-06-20 18:43:15 | INFO | train_inner | epoch 006: 54 / 3002 loss=2.541, ppl=5.82, wps=5759.8, ups=0.09, wpb=64879, bsz=128, num_updates=14976, lr=9.98882e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=173070 2021-06-20 18:43:26 | INFO | train_inner | epoch 006: 55 / 3002 loss=2.379, ppl=5.2, wps=5911.9, ups=0.09, wpb=64825, bsz=128, num_updates=14977, lr=9.98882e-05, gnorm=1.91, loss_scale=8, train_wall=10, gb_free=2.8, wall=173081 2021-06-20 18:43:37 | INFO | train_inner | epoch 006: 56 / 3002 loss=2.487, ppl=5.61, wps=5852.4, ups=0.09, wpb=64805, bsz=128, num_updates=14978, lr=9.98882e-05, gnorm=2.131, loss_scale=8, train_wall=11, gb_free=2.8, wall=173092 2021-06-20 18:43:49 | INFO | train_inner | epoch 006: 57 / 3002 loss=2.461, ppl=5.51, wps=5756.5, ups=0.09, wpb=64820, bsz=128, num_updates=14979, lr=9.98882e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=173103 2021-06-20 18:44:00 | INFO | train_inner | epoch 006: 58 / 3002 loss=2.524, ppl=5.75, wps=5882.4, ups=0.09, wpb=64807, bsz=128, num_updates=14980, lr=9.98882e-05, gnorm=2.032, loss_scale=8, train_wall=11, gb_free=2.8, wall=173114 2021-06-20 18:44:11 | INFO | train_inner | epoch 006: 59 / 3002 loss=2.359, ppl=5.13, wps=5778.3, ups=0.09, wpb=64850, bsz=128, num_updates=14981, lr=9.98881e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=173125 2021-06-20 18:44:22 | INFO | train_inner | epoch 006: 60 / 3002 loss=2.46, ppl=5.5, wps=5888.5, ups=0.09, wpb=64869, bsz=128, num_updates=14982, lr=9.98881e-05, gnorm=2.088, loss_scale=8, train_wall=11, gb_free=2.8, wall=173136 2021-06-20 18:44:33 | INFO | train_inner | epoch 006: 61 / 3002 loss=2.309, ppl=4.96, wps=5787.8, ups=0.09, wpb=64894, bsz=128, num_updates=14983, lr=9.98881e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=173148 2021-06-20 18:44:44 | INFO | train_inner | epoch 006: 62 / 3002 loss=2.496, ppl=5.64, wps=5801.1, ups=0.09, wpb=64835, bsz=128, num_updates=14984, lr=9.98881e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=173159 2021-06-20 18:44:55 | INFO | train_inner | epoch 006: 63 / 3002 loss=2.571, ppl=5.94, wps=5956, ups=0.09, wpb=64794, bsz=128, num_updates=14985, lr=9.98881e-05, gnorm=1.985, loss_scale=8, train_wall=10, gb_free=2.8, wall=173170 2021-06-20 18:45:06 | INFO | train_inner | epoch 006: 64 / 3002 loss=2.533, ppl=5.79, wps=5881.1, ups=0.09, wpb=64815, bsz=128, num_updates=14986, lr=9.98881e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=173181 2021-06-20 18:45:17 | INFO | train_inner | epoch 006: 65 / 3002 loss=2.448, ppl=5.46, wps=5884.6, ups=0.09, wpb=64840, bsz=128, num_updates=14987, lr=9.98881e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=173192 2021-06-20 18:45:28 | INFO | train_inner | epoch 006: 66 / 3002 loss=2.615, ppl=6.13, wps=5832.5, ups=0.09, wpb=64831, bsz=128, num_updates=14988, lr=9.98881e-05, gnorm=2, loss_scale=8, train_wall=11, gb_free=2.8, wall=173203 2021-06-20 18:45:40 | INFO | train_inner | epoch 006: 67 / 3002 loss=2.372, ppl=5.18, wps=5837.9, ups=0.09, wpb=64866, bsz=128, num_updates=14989, lr=9.98881e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=173214 2021-06-20 18:45:51 | INFO | train_inner | epoch 006: 68 / 3002 loss=2.513, ppl=5.71, wps=5789.8, ups=0.09, wpb=64849, bsz=128, num_updates=14990, lr=9.98881e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=173225 2021-06-20 18:46:02 | INFO | train_inner | epoch 006: 69 / 3002 loss=2.407, ppl=5.3, wps=5767.6, ups=0.09, wpb=64891, bsz=128, num_updates=14991, lr=9.98881e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=173236 2021-06-20 18:46:13 | INFO | train_inner | epoch 006: 70 / 3002 loss=2.526, ppl=5.76, wps=5772, ups=0.09, wpb=64818, bsz=128, num_updates=14992, lr=9.98881e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=173248 2021-06-20 18:46:25 | INFO | train_inner | epoch 006: 71 / 3002 loss=2.455, ppl=5.48, wps=5707.2, ups=0.09, wpb=64799, bsz=128, num_updates=14993, lr=9.9888e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=173259 2021-06-20 18:46:36 | INFO | train_inner | epoch 006: 72 / 3002 loss=2.395, ppl=5.26, wps=5872.5, ups=0.09, wpb=64827, bsz=128, num_updates=14994, lr=9.9888e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=173270 2021-06-20 18:46:47 | INFO | train_inner | epoch 006: 73 / 3002 loss=2.423, ppl=5.36, wps=5767.5, ups=0.09, wpb=64787, bsz=128, num_updates=14995, lr=9.9888e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=173281 2021-06-20 18:46:58 | INFO | train_inner | epoch 006: 74 / 3002 loss=2.429, ppl=5.39, wps=5891.4, ups=0.09, wpb=64893, bsz=128, num_updates=14996, lr=9.9888e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=173292 2021-06-20 18:47:09 | INFO | train_inner | epoch 006: 75 / 3002 loss=2.454, ppl=5.48, wps=5759.3, ups=0.09, wpb=64793, bsz=128, num_updates=14997, lr=9.9888e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=173303 2021-06-20 18:47:20 | INFO | train_inner | epoch 006: 76 / 3002 loss=2.409, ppl=5.31, wps=5828.8, ups=0.09, wpb=64858, bsz=128, num_updates=14998, lr=9.9888e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=173315 2021-06-20 18:47:31 | INFO | train_inner | epoch 006: 77 / 3002 loss=2.468, ppl=5.53, wps=5858, ups=0.09, wpb=64840, bsz=128, num_updates=14999, lr=9.9888e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=173326 2021-06-20 18:47:42 | INFO | train_inner | epoch 006: 78 / 3002 loss=2.455, ppl=5.48, wps=5849.4, ups=0.09, wpb=64900, bsz=128, num_updates=15000, lr=9.9888e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=173337 2021-06-20 18:47:54 | INFO | train_inner | epoch 006: 79 / 3002 loss=2.421, ppl=5.35, wps=5838.6, ups=0.09, wpb=64812, bsz=128, num_updates=15001, lr=9.9888e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=173348 2021-06-20 18:48:05 | INFO | train_inner | epoch 006: 80 / 3002 loss=2.434, ppl=5.41, wps=5819.1, ups=0.09, wpb=64890, bsz=128, num_updates=15002, lr=9.9888e-05, gnorm=1.877, loss_scale=8, train_wall=11, gb_free=2.8, wall=173359 2021-06-20 18:48:16 | INFO | train_inner | epoch 006: 81 / 3002 loss=2.437, ppl=5.42, wps=5777.1, ups=0.09, wpb=64839, bsz=128, num_updates=15003, lr=9.9888e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=173370 2021-06-20 18:48:27 | INFO | train_inner | epoch 006: 82 / 3002 loss=2.328, ppl=5.02, wps=5891.5, ups=0.09, wpb=64851, bsz=128, num_updates=15004, lr=9.9888e-05, gnorm=1.922, loss_scale=8, train_wall=11, gb_free=2.8, wall=173381 2021-06-20 18:48:38 | INFO | train_inner | epoch 006: 83 / 3002 loss=2.288, ppl=4.88, wps=5838.7, ups=0.09, wpb=64878, bsz=128, num_updates=15005, lr=9.9888e-05, gnorm=1.87, loss_scale=8, train_wall=11, gb_free=2.8, wall=173392 2021-06-20 18:48:49 | INFO | train_inner | epoch 006: 84 / 3002 loss=2.379, ppl=5.2, wps=5732.5, ups=0.09, wpb=64908, bsz=128, num_updates=15006, lr=9.98879e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=173404 2021-06-20 18:49:00 | INFO | train_inner | epoch 006: 85 / 3002 loss=2.541, ppl=5.82, wps=5844, ups=0.09, wpb=64872, bsz=128, num_updates=15007, lr=9.98879e-05, gnorm=2.072, loss_scale=8, train_wall=11, gb_free=2.8, wall=173415 2021-06-20 18:49:12 | INFO | train_inner | epoch 006: 86 / 3002 loss=2.457, ppl=5.49, wps=5821.7, ups=0.09, wpb=64867, bsz=128, num_updates=15008, lr=9.98879e-05, gnorm=2.096, loss_scale=8, train_wall=11, gb_free=2.8, wall=173426 2021-06-20 18:49:23 | INFO | train_inner | epoch 006: 87 / 3002 loss=2.342, ppl=5.07, wps=5882.7, ups=0.09, wpb=64843, bsz=128, num_updates=15009, lr=9.98879e-05, gnorm=1.907, loss_scale=8, train_wall=11, gb_free=2.8, wall=173437 2021-06-20 18:49:34 | INFO | train_inner | epoch 006: 88 / 3002 loss=2.517, ppl=5.72, wps=5791.5, ups=0.09, wpb=64756, bsz=128, num_updates=15010, lr=9.98879e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=173448 2021-06-20 18:49:45 | INFO | train_inner | epoch 006: 89 / 3002 loss=2.415, ppl=5.33, wps=5713.4, ups=0.09, wpb=64742, bsz=128, num_updates=15011, lr=9.98879e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=173459 2021-06-20 18:49:56 | INFO | train_inner | epoch 006: 90 / 3002 loss=2.483, ppl=5.59, wps=5764.6, ups=0.09, wpb=64793, bsz=128, num_updates=15012, lr=9.98879e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=173471 2021-06-20 18:50:08 | INFO | train_inner | epoch 006: 91 / 3002 loss=2.495, ppl=5.64, wps=5662.8, ups=0.09, wpb=64830, bsz=128, num_updates=15013, lr=9.98879e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=173482 2021-06-20 18:50:19 | INFO | train_inner | epoch 006: 92 / 3002 loss=2.379, ppl=5.2, wps=5784.1, ups=0.09, wpb=64850, bsz=128, num_updates=15014, lr=9.98879e-05, gnorm=1.847, loss_scale=8, train_wall=11, gb_free=2.8, wall=173493 2021-06-20 18:50:30 | INFO | train_inner | epoch 006: 93 / 3002 loss=2.426, ppl=5.37, wps=5895.9, ups=0.09, wpb=64863, bsz=128, num_updates=15015, lr=9.98879e-05, gnorm=2.033, loss_scale=8, train_wall=11, gb_free=2.8, wall=173504 2021-06-20 18:50:41 | INFO | train_inner | epoch 006: 94 / 3002 loss=2.472, ppl=5.55, wps=5907.8, ups=0.09, wpb=64881, bsz=128, num_updates=15016, lr=9.98879e-05, gnorm=1.96, loss_scale=8, train_wall=10, gb_free=2.8, wall=173515 2021-06-20 18:50:52 | INFO | train_inner | epoch 006: 95 / 3002 loss=2.412, ppl=5.32, wps=5818.9, ups=0.09, wpb=64775, bsz=128, num_updates=15017, lr=9.98879e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=173526 2021-06-20 18:51:03 | INFO | train_inner | epoch 006: 96 / 3002 loss=2.448, ppl=5.46, wps=5741, ups=0.09, wpb=64810, bsz=128, num_updates=15018, lr=9.98878e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=173538 2021-06-20 18:51:15 | INFO | train_inner | epoch 006: 97 / 3002 loss=2.491, ppl=5.62, wps=5721.2, ups=0.09, wpb=64853, bsz=128, num_updates=15019, lr=9.98878e-05, gnorm=2.067, loss_scale=8, train_wall=11, gb_free=2.8, wall=173549 2021-06-20 18:51:26 | INFO | train_inner | epoch 006: 98 / 3002 loss=2.376, ppl=5.19, wps=5868, ups=0.09, wpb=64810, bsz=128, num_updates=15020, lr=9.98878e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=173560 2021-06-20 18:51:37 | INFO | train_inner | epoch 006: 99 / 3002 loss=2.355, ppl=5.12, wps=5851.5, ups=0.09, wpb=64798, bsz=128, num_updates=15021, lr=9.98878e-05, gnorm=2.333, loss_scale=8, train_wall=11, gb_free=2.8, wall=173571 2021-06-20 18:51:48 | INFO | train_inner | epoch 006: 100 / 3002 loss=2.368, ppl=5.16, wps=5722.8, ups=0.09, wpb=64802, bsz=128, num_updates=15022, lr=9.98878e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=173583 2021-06-20 18:51:59 | INFO | train_inner | epoch 006: 101 / 3002 loss=2.546, ppl=5.84, wps=5840.5, ups=0.09, wpb=64781, bsz=128, num_updates=15023, lr=9.98878e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=173594 2021-06-20 18:52:10 | INFO | train_inner | epoch 006: 102 / 3002 loss=2.376, ppl=5.19, wps=5845.3, ups=0.09, wpb=64932, bsz=128, num_updates=15024, lr=9.98878e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=173605 2021-06-20 18:52:21 | INFO | train_inner | epoch 006: 103 / 3002 loss=2.462, ppl=5.51, wps=5859.5, ups=0.09, wpb=64844, bsz=128, num_updates=15025, lr=9.98878e-05, gnorm=1.908, loss_scale=8, train_wall=11, gb_free=2.8, wall=173616 2021-06-20 18:52:33 | INFO | train_inner | epoch 006: 104 / 3002 loss=2.433, ppl=5.4, wps=5839.7, ups=0.09, wpb=64879, bsz=128, num_updates=15026, lr=9.98878e-05, gnorm=1.871, loss_scale=8, train_wall=11, gb_free=2.8, wall=173627 2021-06-20 18:52:44 | INFO | train_inner | epoch 006: 105 / 3002 loss=2.538, ppl=5.81, wps=5840.7, ups=0.09, wpb=64828, bsz=128, num_updates=15027, lr=9.98878e-05, gnorm=1.842, loss_scale=8, train_wall=11, gb_free=2.8, wall=173638 2021-06-20 18:52:55 | INFO | train_inner | epoch 006: 106 / 3002 loss=2.366, ppl=5.15, wps=5815.6, ups=0.09, wpb=64849, bsz=128, num_updates=15028, lr=9.98878e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=173649 2021-06-20 18:53:06 | INFO | train_inner | epoch 006: 107 / 3002 loss=2.457, ppl=5.49, wps=5912.6, ups=0.09, wpb=64855, bsz=128, num_updates=15029, lr=9.98878e-05, gnorm=1.908, loss_scale=8, train_wall=10, gb_free=2.8, wall=173660 2021-06-20 18:53:17 | INFO | train_inner | epoch 006: 108 / 3002 loss=2.356, ppl=5.12, wps=5687, ups=0.09, wpb=64784, bsz=128, num_updates=15030, lr=9.98878e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=173672 2021-06-20 18:53:29 | INFO | train_inner | epoch 006: 109 / 3002 loss=2.496, ppl=5.64, wps=5718.4, ups=0.09, wpb=64804, bsz=128, num_updates=15031, lr=9.98877e-05, gnorm=2.028, loss_scale=8, train_wall=11, gb_free=2.8, wall=173683 2021-06-20 18:53:40 | INFO | train_inner | epoch 006: 110 / 3002 loss=2.635, ppl=6.21, wps=5897.7, ups=0.09, wpb=64832, bsz=128, num_updates=15032, lr=9.98877e-05, gnorm=1.867, loss_scale=8, train_wall=11, gb_free=2.8, wall=173694 2021-06-20 18:53:51 | INFO | train_inner | epoch 006: 111 / 3002 loss=2.511, ppl=5.7, wps=5820.9, ups=0.09, wpb=64801, bsz=128, num_updates=15033, lr=9.98877e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=173705 2021-06-20 18:54:02 | INFO | train_inner | epoch 006: 112 / 3002 loss=2.467, ppl=5.53, wps=5924.9, ups=0.09, wpb=64871, bsz=128, num_updates=15034, lr=9.98877e-05, gnorm=1.909, loss_scale=8, train_wall=10, gb_free=2.8, wall=173716 2021-06-20 18:54:13 | INFO | train_inner | epoch 006: 113 / 3002 loss=2.289, ppl=4.89, wps=5810.1, ups=0.09, wpb=64826, bsz=128, num_updates=15035, lr=9.98877e-05, gnorm=1.818, loss_scale=8, train_wall=11, gb_free=2.8, wall=173727 2021-06-20 18:54:24 | INFO | train_inner | epoch 006: 114 / 3002 loss=2.405, ppl=5.3, wps=5808.4, ups=0.09, wpb=64878, bsz=128, num_updates=15036, lr=9.98877e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=173738 2021-06-20 18:54:35 | INFO | train_inner | epoch 006: 115 / 3002 loss=2.577, ppl=5.97, wps=5769.5, ups=0.09, wpb=64834, bsz=128, num_updates=15037, lr=9.98877e-05, gnorm=2.061, loss_scale=8, train_wall=11, gb_free=2.8, wall=173749 2021-06-20 18:54:46 | INFO | train_inner | epoch 006: 116 / 3002 loss=2.441, ppl=5.43, wps=5819.9, ups=0.09, wpb=64861, bsz=128, num_updates=15038, lr=9.98877e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=173761 2021-06-20 18:54:57 | INFO | train_inner | epoch 006: 117 / 3002 loss=2.571, ppl=5.94, wps=5863.9, ups=0.09, wpb=64808, bsz=128, num_updates=15039, lr=9.98877e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=173772 2021-06-20 18:55:09 | INFO | train_inner | epoch 006: 118 / 3002 loss=2.62, ppl=6.15, wps=5788.7, ups=0.09, wpb=64835, bsz=128, num_updates=15040, lr=9.98877e-05, gnorm=2.025, loss_scale=8, train_wall=11, gb_free=2.8, wall=173783 2021-06-20 18:55:20 | INFO | train_inner | epoch 006: 119 / 3002 loss=2.368, ppl=5.16, wps=5904.5, ups=0.09, wpb=64851, bsz=128, num_updates=15041, lr=9.98877e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=173794 2021-06-20 18:55:31 | INFO | train_inner | epoch 006: 120 / 3002 loss=2.552, ppl=5.86, wps=5824.6, ups=0.09, wpb=64814, bsz=128, num_updates=15042, lr=9.98877e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=173805 2021-06-20 18:55:42 | INFO | train_inner | epoch 006: 121 / 3002 loss=2.417, ppl=5.34, wps=5799.6, ups=0.09, wpb=64806, bsz=128, num_updates=15043, lr=9.98876e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=173816 2021-06-20 18:55:53 | INFO | train_inner | epoch 006: 122 / 3002 loss=2.529, ppl=5.77, wps=5839.4, ups=0.09, wpb=64822, bsz=128, num_updates=15044, lr=9.98876e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=173827 2021-06-20 18:56:04 | INFO | train_inner | epoch 006: 123 / 3002 loss=2.404, ppl=5.29, wps=5838.2, ups=0.09, wpb=64763, bsz=128, num_updates=15045, lr=9.98876e-05, gnorm=2.285, loss_scale=8, train_wall=11, gb_free=2.8, wall=173838 2021-06-20 18:56:15 | INFO | train_inner | epoch 006: 124 / 3002 loss=2.397, ppl=5.27, wps=5717.6, ups=0.09, wpb=64852, bsz=128, num_updates=15046, lr=9.98876e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=173850 2021-06-20 18:56:26 | INFO | train_inner | epoch 006: 125 / 3002 loss=2.496, ppl=5.64, wps=5857.4, ups=0.09, wpb=64897, bsz=128, num_updates=15047, lr=9.98876e-05, gnorm=1.95, loss_scale=8, train_wall=11, gb_free=2.8, wall=173861 2021-06-20 18:56:38 | INFO | train_inner | epoch 006: 126 / 3002 loss=2.514, ppl=5.71, wps=5758.2, ups=0.09, wpb=64810, bsz=128, num_updates=15048, lr=9.98876e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=173872 2021-06-20 18:56:49 | INFO | train_inner | epoch 006: 127 / 3002 loss=2.413, ppl=5.33, wps=5805.8, ups=0.09, wpb=64769, bsz=128, num_updates=15049, lr=9.98876e-05, gnorm=1.991, loss_scale=16, train_wall=11, gb_free=2.8, wall=173883 2021-06-20 18:57:00 | INFO | train_inner | epoch 006: 128 / 3002 loss=2.54, ppl=5.82, wps=5645.8, ups=0.09, wpb=64770, bsz=128, num_updates=15050, lr=9.98876e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=173895 2021-06-20 18:57:12 | INFO | train_inner | epoch 006: 129 / 3002 loss=2.534, ppl=5.79, wps=5784.2, ups=0.09, wpb=64782, bsz=128, num_updates=15051, lr=9.98876e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=173906 2021-06-20 18:57:23 | INFO | train_inner | epoch 006: 130 / 3002 loss=2.401, ppl=5.28, wps=5897.3, ups=0.09, wpb=64910, bsz=128, num_updates=15052, lr=9.98876e-05, gnorm=1.954, loss_scale=16, train_wall=11, gb_free=2.8, wall=173917 2021-06-20 18:57:34 | INFO | train_inner | epoch 006: 131 / 3002 loss=2.549, ppl=5.85, wps=5741.2, ups=0.09, wpb=64845, bsz=128, num_updates=15053, lr=9.98876e-05, gnorm=1.945, loss_scale=16, train_wall=11, gb_free=2.8, wall=173928 2021-06-20 18:57:45 | INFO | train_inner | epoch 006: 132 / 3002 loss=2.619, ppl=6.14, wps=5753.8, ups=0.09, wpb=64858, bsz=128, num_updates=15054, lr=9.98876e-05, gnorm=2.087, loss_scale=16, train_wall=11, gb_free=2.8, wall=173939 2021-06-20 18:57:56 | INFO | train_inner | epoch 006: 133 / 3002 loss=2.432, ppl=5.4, wps=5887.3, ups=0.09, wpb=64857, bsz=128, num_updates=15055, lr=9.98876e-05, gnorm=1.905, loss_scale=16, train_wall=11, gb_free=2.8, wall=173950 2021-06-20 18:58:07 | INFO | train_inner | epoch 006: 134 / 3002 loss=2.54, ppl=5.81, wps=5869.1, ups=0.09, wpb=64803, bsz=128, num_updates=15056, lr=9.98875e-05, gnorm=1.937, loss_scale=16, train_wall=11, gb_free=2.8, wall=173962 2021-06-20 18:58:18 | INFO | train_inner | epoch 006: 135 / 3002 loss=2.5, ppl=5.66, wps=5838.4, ups=0.09, wpb=64874, bsz=128, num_updates=15057, lr=9.98875e-05, gnorm=1.877, loss_scale=16, train_wall=11, gb_free=2.8, wall=173973 2021-06-20 18:58:30 | INFO | train_inner | epoch 006: 136 / 3002 loss=2.512, ppl=5.7, wps=5756.1, ups=0.09, wpb=64768, bsz=128, num_updates=15058, lr=9.98875e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=173984 2021-06-20 18:58:41 | INFO | train_inner | epoch 006: 137 / 3002 loss=2.574, ppl=5.95, wps=5835.8, ups=0.09, wpb=64766, bsz=128, num_updates=15059, lr=9.98875e-05, gnorm=1.997, loss_scale=16, train_wall=11, gb_free=2.8, wall=173995 2021-06-20 18:58:52 | INFO | train_inner | epoch 006: 138 / 3002 loss=2.526, ppl=5.76, wps=5876.9, ups=0.09, wpb=64858, bsz=128, num_updates=15060, lr=9.98875e-05, gnorm=1.868, loss_scale=16, train_wall=11, gb_free=2.8, wall=174006 2021-06-20 18:59:03 | INFO | train_inner | epoch 006: 139 / 3002 loss=2.466, ppl=5.52, wps=5853.7, ups=0.09, wpb=64831, bsz=128, num_updates=15061, lr=9.98875e-05, gnorm=1.925, loss_scale=16, train_wall=11, gb_free=2.8, wall=174017 2021-06-20 18:59:14 | INFO | train_inner | epoch 006: 140 / 3002 loss=2.593, ppl=6.03, wps=5829.8, ups=0.09, wpb=64793, bsz=128, num_updates=15062, lr=9.98875e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=174028 2021-06-20 18:59:25 | INFO | train_inner | epoch 006: 141 / 3002 loss=2.687, ppl=6.44, wps=5798.2, ups=0.09, wpb=64853, bsz=128, num_updates=15063, lr=9.98875e-05, gnorm=1.937, loss_scale=16, train_wall=11, gb_free=2.8, wall=174039 2021-06-20 18:59:36 | INFO | train_inner | epoch 006: 142 / 3002 loss=2.601, ppl=6.07, wps=5848.2, ups=0.09, wpb=64849, bsz=128, num_updates=15064, lr=9.98875e-05, gnorm=1.973, loss_scale=16, train_wall=11, gb_free=2.8, wall=174050 2021-06-20 18:59:47 | INFO | train_inner | epoch 006: 143 / 3002 loss=2.532, ppl=5.79, wps=5788.7, ups=0.09, wpb=64857, bsz=128, num_updates=15065, lr=9.98875e-05, gnorm=1.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=174062 2021-06-20 18:59:58 | INFO | train_inner | epoch 006: 144 / 3002 loss=2.502, ppl=5.66, wps=5821.8, ups=0.09, wpb=64876, bsz=128, num_updates=15066, lr=9.98875e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=174073 2021-06-20 19:00:10 | INFO | train_inner | epoch 006: 145 / 3002 loss=2.522, ppl=5.74, wps=5877.2, ups=0.09, wpb=64916, bsz=128, num_updates=15067, lr=9.98875e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=174084 2021-06-20 19:00:21 | INFO | train_inner | epoch 006: 146 / 3002 loss=2.497, ppl=5.65, wps=5729.9, ups=0.09, wpb=64749, bsz=128, num_updates=15068, lr=9.98874e-05, gnorm=2.046, loss_scale=16, train_wall=11, gb_free=2.8, wall=174095 2021-06-20 19:00:32 | INFO | train_inner | epoch 006: 147 / 3002 loss=2.415, ppl=5.33, wps=5827.5, ups=0.09, wpb=64765, bsz=128, num_updates=15069, lr=9.98874e-05, gnorm=1.923, loss_scale=16, train_wall=11, gb_free=2.8, wall=174106 2021-06-20 19:00:43 | INFO | train_inner | epoch 006: 148 / 3002 loss=2.612, ppl=6.11, wps=5773.9, ups=0.09, wpb=64809, bsz=128, num_updates=15070, lr=9.98874e-05, gnorm=2.273, loss_scale=16, train_wall=11, gb_free=2.8, wall=174117 2021-06-20 19:00:54 | INFO | train_inner | epoch 006: 149 / 3002 loss=2.622, ppl=6.16, wps=5741.8, ups=0.09, wpb=64779, bsz=128, num_updates=15071, lr=9.98874e-05, gnorm=1.975, loss_scale=16, train_wall=11, gb_free=2.8, wall=174129 2021-06-20 19:01:06 | INFO | train_inner | epoch 006: 150 / 3002 loss=2.493, ppl=5.63, wps=5805.9, ups=0.09, wpb=64775, bsz=128, num_updates=15072, lr=9.98874e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=174140 2021-06-20 19:01:17 | INFO | train_inner | epoch 006: 151 / 3002 loss=2.509, ppl=5.69, wps=5870.5, ups=0.09, wpb=64805, bsz=128, num_updates=15073, lr=9.98874e-05, gnorm=1.952, loss_scale=16, train_wall=11, gb_free=2.8, wall=174151 2021-06-20 19:01:28 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 19:01:39 | INFO | train_inner | epoch 006: 153 / 3002 loss=2.497, ppl=5.65, wps=2882.5, ups=0.04, wpb=64824, bsz=128, num_updates=15074, lr=9.98874e-05, gnorm=1.979, loss_scale=8, train_wall=22, gb_free=2.8, wall=174173 2021-06-20 19:01:50 | INFO | train_inner | epoch 006: 154 / 3002 loss=2.439, ppl=5.42, wps=5710.6, ups=0.09, wpb=64799, bsz=128, num_updates=15075, lr=9.98874e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=174185 2021-06-20 19:02:02 | INFO | train_inner | epoch 006: 155 / 3002 loss=2.607, ppl=6.09, wps=5727.8, ups=0.09, wpb=64818, bsz=128, num_updates=15076, lr=9.98874e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=174196 2021-06-20 19:02:13 | INFO | train_inner | epoch 006: 156 / 3002 loss=2.472, ppl=5.55, wps=5717, ups=0.09, wpb=64832, bsz=128, num_updates=15077, lr=9.98874e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=174207 2021-06-20 19:02:24 | INFO | train_inner | epoch 006: 157 / 3002 loss=2.456, ppl=5.49, wps=5727.4, ups=0.09, wpb=64800, bsz=128, num_updates=15078, lr=9.98874e-05, gnorm=1.858, loss_scale=8, train_wall=11, gb_free=2.8, wall=174219 2021-06-20 19:02:36 | INFO | train_inner | epoch 006: 158 / 3002 loss=2.298, ppl=4.92, wps=5788.9, ups=0.09, wpb=64754, bsz=128, num_updates=15079, lr=9.98874e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=174230 2021-06-20 19:02:47 | INFO | train_inner | epoch 006: 159 / 3002 loss=2.551, ppl=5.86, wps=5872.6, ups=0.09, wpb=64898, bsz=128, num_updates=15080, lr=9.98874e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=174241 2021-06-20 19:02:58 | INFO | train_inner | epoch 006: 160 / 3002 loss=2.627, ppl=6.18, wps=5861.1, ups=0.09, wpb=64824, bsz=128, num_updates=15081, lr=9.98873e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=174252 2021-06-20 19:03:09 | INFO | train_inner | epoch 006: 161 / 3002 loss=2.44, ppl=5.43, wps=5830.1, ups=0.09, wpb=64835, bsz=128, num_updates=15082, lr=9.98873e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=174263 2021-06-20 19:03:20 | INFO | train_inner | epoch 006: 162 / 3002 loss=2.38, ppl=5.2, wps=5779.1, ups=0.09, wpb=64859, bsz=128, num_updates=15083, lr=9.98873e-05, gnorm=1.814, loss_scale=8, train_wall=11, gb_free=2.8, wall=174274 2021-06-20 19:03:31 | INFO | train_inner | epoch 006: 163 / 3002 loss=2.516, ppl=5.72, wps=5792.2, ups=0.09, wpb=64787, bsz=128, num_updates=15084, lr=9.98873e-05, gnorm=1.886, loss_scale=8, train_wall=11, gb_free=2.8, wall=174286 2021-06-20 19:03:43 | INFO | train_inner | epoch 006: 164 / 3002 loss=2.478, ppl=5.57, wps=5755.5, ups=0.09, wpb=64831, bsz=128, num_updates=15085, lr=9.98873e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=174297 2021-06-20 19:03:54 | INFO | train_inner | epoch 006: 165 / 3002 loss=2.549, ppl=5.85, wps=5741.7, ups=0.09, wpb=64730, bsz=128, num_updates=15086, lr=9.98873e-05, gnorm=2.267, loss_scale=8, train_wall=11, gb_free=2.8, wall=174308 2021-06-20 19:04:05 | INFO | train_inner | epoch 006: 166 / 3002 loss=2.47, ppl=5.54, wps=5734, ups=0.09, wpb=64810, bsz=128, num_updates=15087, lr=9.98873e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=174319 2021-06-20 19:04:16 | INFO | train_inner | epoch 006: 167 / 3002 loss=2.558, ppl=5.89, wps=5788.6, ups=0.09, wpb=64782, bsz=128, num_updates=15088, lr=9.98873e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=174331 2021-06-20 19:04:27 | INFO | train_inner | epoch 006: 168 / 3002 loss=2.568, ppl=5.93, wps=5900.4, ups=0.09, wpb=64815, bsz=128, num_updates=15089, lr=9.98873e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=174342 2021-06-20 19:04:39 | INFO | train_inner | epoch 006: 169 / 3002 loss=2.321, ppl=5, wps=5764, ups=0.09, wpb=64843, bsz=128, num_updates=15090, lr=9.98873e-05, gnorm=1.894, loss_scale=8, train_wall=11, gb_free=2.8, wall=174353 2021-06-20 19:04:50 | INFO | train_inner | epoch 006: 170 / 3002 loss=2.439, ppl=5.42, wps=5804.6, ups=0.09, wpb=64919, bsz=128, num_updates=15091, lr=9.98873e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=174364 2021-06-20 19:05:01 | INFO | train_inner | epoch 006: 171 / 3002 loss=2.51, ppl=5.7, wps=5739.8, ups=0.09, wpb=64785, bsz=128, num_updates=15092, lr=9.98873e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=174375 2021-06-20 19:05:12 | INFO | train_inner | epoch 006: 172 / 3002 loss=2.411, ppl=5.32, wps=5810.2, ups=0.09, wpb=64737, bsz=128, num_updates=15093, lr=9.98872e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=174386 2021-06-20 19:05:23 | INFO | train_inner | epoch 006: 173 / 3002 loss=2.52, ppl=5.73, wps=5826.4, ups=0.09, wpb=64778, bsz=128, num_updates=15094, lr=9.98872e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=174398 2021-06-20 19:05:35 | INFO | train_inner | epoch 006: 174 / 3002 loss=2.62, ppl=6.15, wps=5779.4, ups=0.09, wpb=64895, bsz=128, num_updates=15095, lr=9.98872e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=174409 2021-06-20 19:05:46 | INFO | train_inner | epoch 006: 175 / 3002 loss=2.476, ppl=5.56, wps=5851.8, ups=0.09, wpb=64836, bsz=128, num_updates=15096, lr=9.98872e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=174420 2021-06-20 19:05:57 | INFO | train_inner | epoch 006: 176 / 3002 loss=2.585, ppl=6, wps=5748.5, ups=0.09, wpb=64807, bsz=128, num_updates=15097, lr=9.98872e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=174431 2021-06-20 19:06:08 | INFO | train_inner | epoch 006: 177 / 3002 loss=2.415, ppl=5.33, wps=5821.5, ups=0.09, wpb=64862, bsz=128, num_updates=15098, lr=9.98872e-05, gnorm=2.086, loss_scale=8, train_wall=11, gb_free=2.8, wall=174442 2021-06-20 19:06:19 | INFO | train_inner | epoch 006: 178 / 3002 loss=2.421, ppl=5.36, wps=5725.4, ups=0.09, wpb=64854, bsz=128, num_updates=15099, lr=9.98872e-05, gnorm=1.902, loss_scale=8, train_wall=11, gb_free=2.8, wall=174454 2021-06-20 19:06:30 | INFO | train_inner | epoch 006: 179 / 3002 loss=2.509, ppl=5.69, wps=5878.6, ups=0.09, wpb=64867, bsz=128, num_updates=15100, lr=9.98872e-05, gnorm=1.922, loss_scale=8, train_wall=11, gb_free=2.8, wall=174465 2021-06-20 19:06:41 | INFO | train_inner | epoch 006: 180 / 3002 loss=2.432, ppl=5.4, wps=5900.5, ups=0.09, wpb=64775, bsz=128, num_updates=15101, lr=9.98872e-05, gnorm=2.147, loss_scale=8, train_wall=11, gb_free=2.8, wall=174476 2021-06-20 19:06:52 | INFO | train_inner | epoch 006: 181 / 3002 loss=2.403, ppl=5.29, wps=5822.8, ups=0.09, wpb=64792, bsz=128, num_updates=15102, lr=9.98872e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=174487 2021-06-20 19:07:04 | INFO | train_inner | epoch 006: 182 / 3002 loss=2.469, ppl=5.54, wps=5820, ups=0.09, wpb=64850, bsz=128, num_updates=15103, lr=9.98872e-05, gnorm=1.894, loss_scale=8, train_wall=11, gb_free=2.8, wall=174498 2021-06-20 19:07:15 | INFO | train_inner | epoch 006: 183 / 3002 loss=2.48, ppl=5.58, wps=5767.9, ups=0.09, wpb=64878, bsz=128, num_updates=15104, lr=9.98872e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=174509 2021-06-20 19:07:26 | INFO | train_inner | epoch 006: 184 / 3002 loss=2.401, ppl=5.28, wps=5741.7, ups=0.09, wpb=64849, bsz=128, num_updates=15105, lr=9.98872e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=174520 2021-06-20 19:07:37 | INFO | train_inner | epoch 006: 185 / 3002 loss=2.41, ppl=5.32, wps=5807.7, ups=0.09, wpb=64849, bsz=128, num_updates=15106, lr=9.98871e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=174532 2021-06-20 19:07:49 | INFO | train_inner | epoch 006: 186 / 3002 loss=2.481, ppl=5.58, wps=5779.5, ups=0.09, wpb=64788, bsz=128, num_updates=15107, lr=9.98871e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=174543 2021-06-20 19:08:00 | INFO | train_inner | epoch 006: 187 / 3002 loss=2.591, ppl=6.03, wps=5687.1, ups=0.09, wpb=64846, bsz=128, num_updates=15108, lr=9.98871e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=174554 2021-06-20 19:08:11 | INFO | train_inner | epoch 006: 188 / 3002 loss=2.499, ppl=5.65, wps=5750, ups=0.09, wpb=64801, bsz=128, num_updates=15109, lr=9.98871e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=174566 2021-06-20 19:08:22 | INFO | train_inner | epoch 006: 189 / 3002 loss=2.528, ppl=5.77, wps=5780.8, ups=0.09, wpb=64916, bsz=128, num_updates=15110, lr=9.98871e-05, gnorm=2.025, loss_scale=8, train_wall=11, gb_free=2.8, wall=174577 2021-06-20 19:08:34 | INFO | train_inner | epoch 006: 190 / 3002 loss=2.506, ppl=5.68, wps=5767.4, ups=0.09, wpb=64747, bsz=128, num_updates=15111, lr=9.98871e-05, gnorm=2.354, loss_scale=8, train_wall=11, gb_free=2.8, wall=174588 2021-06-20 19:08:45 | INFO | train_inner | epoch 006: 191 / 3002 loss=2.527, ppl=5.77, wps=5838.6, ups=0.09, wpb=64872, bsz=128, num_updates=15112, lr=9.98871e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=174599 2021-06-20 19:08:56 | INFO | train_inner | epoch 006: 192 / 3002 loss=2.466, ppl=5.52, wps=5771.8, ups=0.09, wpb=64878, bsz=128, num_updates=15113, lr=9.98871e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=174610 2021-06-20 19:09:07 | INFO | train_inner | epoch 006: 193 / 3002 loss=2.39, ppl=5.24, wps=5702.4, ups=0.09, wpb=64774, bsz=128, num_updates=15114, lr=9.98871e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=174622 2021-06-20 19:09:18 | INFO | train_inner | epoch 006: 194 / 3002 loss=2.563, ppl=5.91, wps=5869.5, ups=0.09, wpb=64778, bsz=128, num_updates=15115, lr=9.98871e-05, gnorm=2.153, loss_scale=8, train_wall=11, gb_free=2.8, wall=174633 2021-06-20 19:09:30 | INFO | train_inner | epoch 006: 195 / 3002 loss=2.399, ppl=5.27, wps=5817.9, ups=0.09, wpb=64880, bsz=128, num_updates=15116, lr=9.98871e-05, gnorm=2.018, loss_scale=8, train_wall=11, gb_free=2.8, wall=174644 2021-06-20 19:09:41 | INFO | train_inner | epoch 006: 196 / 3002 loss=2.382, ppl=5.21, wps=5889.7, ups=0.09, wpb=64766, bsz=128, num_updates=15117, lr=9.98871e-05, gnorm=1.891, loss_scale=8, train_wall=11, gb_free=2.8, wall=174655 2021-06-20 19:09:52 | INFO | train_inner | epoch 006: 197 / 3002 loss=2.535, ppl=5.8, wps=5773, ups=0.09, wpb=64795, bsz=128, num_updates=15118, lr=9.9887e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=174666 2021-06-20 19:10:03 | INFO | train_inner | epoch 006: 198 / 3002 loss=2.477, ppl=5.57, wps=5775.7, ups=0.09, wpb=64819, bsz=128, num_updates=15119, lr=9.9887e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=174677 2021-06-20 19:10:14 | INFO | train_inner | epoch 006: 199 / 3002 loss=2.578, ppl=5.97, wps=5748.6, ups=0.09, wpb=64763, bsz=128, num_updates=15120, lr=9.9887e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=174689 2021-06-20 19:10:26 | INFO | train_inner | epoch 006: 200 / 3002 loss=2.427, ppl=5.38, wps=5645.5, ups=0.09, wpb=64770, bsz=128, num_updates=15121, lr=9.9887e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=174700 2021-06-20 19:10:37 | INFO | train_inner | epoch 006: 201 / 3002 loss=2.467, ppl=5.53, wps=5715.1, ups=0.09, wpb=64885, bsz=128, num_updates=15122, lr=9.9887e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=174711 2021-06-20 19:10:48 | INFO | train_inner | epoch 006: 202 / 3002 loss=2.636, ppl=6.22, wps=5795.9, ups=0.09, wpb=64770, bsz=128, num_updates=15123, lr=9.9887e-05, gnorm=2.042, loss_scale=8, train_wall=11, gb_free=2.8, wall=174723 2021-06-20 19:11:00 | INFO | train_inner | epoch 006: 203 / 3002 loss=2.499, ppl=5.65, wps=5600.4, ups=0.09, wpb=64777, bsz=128, num_updates=15124, lr=9.9887e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=174734 2021-06-20 19:11:11 | INFO | train_inner | epoch 006: 204 / 3002 loss=2.499, ppl=5.65, wps=5915, ups=0.09, wpb=64863, bsz=128, num_updates=15125, lr=9.9887e-05, gnorm=1.958, loss_scale=8, train_wall=10, gb_free=2.8, wall=174745 2021-06-20 19:11:22 | INFO | train_inner | epoch 006: 205 / 3002 loss=2.53, ppl=5.78, wps=5848.2, ups=0.09, wpb=64825, bsz=128, num_updates=15126, lr=9.9887e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=174756 2021-06-20 19:11:33 | INFO | train_inner | epoch 006: 206 / 3002 loss=2.494, ppl=5.63, wps=5757.1, ups=0.09, wpb=64825, bsz=128, num_updates=15127, lr=9.9887e-05, gnorm=2.054, loss_scale=8, train_wall=11, gb_free=2.8, wall=174767 2021-06-20 19:11:44 | INFO | train_inner | epoch 006: 207 / 3002 loss=2.593, ppl=6.03, wps=5790.7, ups=0.09, wpb=64843, bsz=128, num_updates=15128, lr=9.9887e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=174779 2021-06-20 19:11:56 | INFO | train_inner | epoch 006: 208 / 3002 loss=2.586, ppl=6, wps=5716.3, ups=0.09, wpb=64891, bsz=128, num_updates=15129, lr=9.9887e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=174790 2021-06-20 19:12:07 | INFO | train_inner | epoch 006: 209 / 3002 loss=2.433, ppl=5.4, wps=5799.5, ups=0.09, wpb=64862, bsz=128, num_updates=15130, lr=9.9887e-05, gnorm=1.86, loss_scale=8, train_wall=11, gb_free=2.8, wall=174801 2021-06-20 19:12:18 | INFO | train_inner | epoch 006: 210 / 3002 loss=2.402, ppl=5.29, wps=5829.9, ups=0.09, wpb=64853, bsz=128, num_updates=15131, lr=9.98869e-05, gnorm=1.891, loss_scale=8, train_wall=11, gb_free=2.8, wall=174812 2021-06-20 19:12:29 | INFO | train_inner | epoch 006: 211 / 3002 loss=2.439, ppl=5.42, wps=5760.9, ups=0.09, wpb=64792, bsz=128, num_updates=15132, lr=9.98869e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=174824 2021-06-20 19:12:40 | INFO | train_inner | epoch 006: 212 / 3002 loss=2.552, ppl=5.87, wps=5815.6, ups=0.09, wpb=64733, bsz=128, num_updates=15133, lr=9.98869e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=174835 2021-06-20 19:12:52 | INFO | train_inner | epoch 006: 213 / 3002 loss=2.412, ppl=5.32, wps=5815.3, ups=0.09, wpb=64784, bsz=128, num_updates=15134, lr=9.98869e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=174846 2021-06-20 19:13:03 | INFO | train_inner | epoch 006: 214 / 3002 loss=2.636, ppl=6.22, wps=5781.7, ups=0.09, wpb=64713, bsz=128, num_updates=15135, lr=9.98869e-05, gnorm=2.182, loss_scale=8, train_wall=11, gb_free=2.8, wall=174857 2021-06-20 19:13:14 | INFO | train_inner | epoch 006: 215 / 3002 loss=2.507, ppl=5.69, wps=5655.4, ups=0.09, wpb=64835, bsz=128, num_updates=15136, lr=9.98869e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=174869 2021-06-20 19:13:25 | INFO | train_inner | epoch 006: 216 / 3002 loss=2.444, ppl=5.44, wps=5750.8, ups=0.09, wpb=64801, bsz=128, num_updates=15137, lr=9.98869e-05, gnorm=2.019, loss_scale=8, train_wall=11, gb_free=2.8, wall=174880 2021-06-20 19:13:37 | INFO | train_inner | epoch 006: 217 / 3002 loss=2.388, ppl=5.23, wps=5776, ups=0.09, wpb=64854, bsz=128, num_updates=15138, lr=9.98869e-05, gnorm=1.876, loss_scale=8, train_wall=11, gb_free=2.8, wall=174891 2021-06-20 19:13:48 | INFO | train_inner | epoch 006: 218 / 3002 loss=2.62, ppl=6.15, wps=5781.1, ups=0.09, wpb=64843, bsz=128, num_updates=15139, lr=9.98869e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=174902 2021-06-20 19:13:59 | INFO | train_inner | epoch 006: 219 / 3002 loss=2.402, ppl=5.28, wps=5685.6, ups=0.09, wpb=64748, bsz=128, num_updates=15140, lr=9.98869e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=174914 2021-06-20 19:14:11 | INFO | train_inner | epoch 006: 220 / 3002 loss=2.582, ppl=5.99, wps=5780.9, ups=0.09, wpb=64790, bsz=128, num_updates=15141, lr=9.98869e-05, gnorm=2, loss_scale=8, train_wall=11, gb_free=2.8, wall=174925 2021-06-20 19:14:21 | INFO | train_inner | epoch 006: 221 / 3002 loss=2.447, ppl=5.45, wps=5897.1, ups=0.09, wpb=64834, bsz=128, num_updates=15142, lr=9.98869e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=174936 2021-06-20 19:14:32 | INFO | train_inner | epoch 006: 222 / 3002 loss=2.466, ppl=5.52, wps=5923.1, ups=0.09, wpb=64832, bsz=128, num_updates=15143, lr=9.98868e-05, gnorm=1.967, loss_scale=8, train_wall=10, gb_free=2.8, wall=174947 2021-06-20 19:14:44 | INFO | train_inner | epoch 006: 223 / 3002 loss=2.5, ppl=5.66, wps=5838.1, ups=0.09, wpb=64817, bsz=128, num_updates=15144, lr=9.98868e-05, gnorm=2.019, loss_scale=8, train_wall=11, gb_free=2.8, wall=174958 2021-06-20 19:14:55 | INFO | train_inner | epoch 006: 224 / 3002 loss=2.478, ppl=5.57, wps=5801.9, ups=0.09, wpb=64753, bsz=128, num_updates=15145, lr=9.98868e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=174969 2021-06-20 19:15:06 | INFO | train_inner | epoch 006: 225 / 3002 loss=2.546, ppl=5.84, wps=5764.7, ups=0.09, wpb=64826, bsz=128, num_updates=15146, lr=9.98868e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=174980 2021-06-20 19:15:17 | INFO | train_inner | epoch 006: 226 / 3002 loss=2.479, ppl=5.58, wps=5853.4, ups=0.09, wpb=64782, bsz=128, num_updates=15147, lr=9.98868e-05, gnorm=1.843, loss_scale=8, train_wall=11, gb_free=2.8, wall=174991 2021-06-20 19:15:28 | INFO | train_inner | epoch 006: 227 / 3002 loss=2.526, ppl=5.76, wps=5789.8, ups=0.09, wpb=64763, bsz=128, num_updates=15148, lr=9.98868e-05, gnorm=1.899, loss_scale=8, train_wall=11, gb_free=2.8, wall=175003 2021-06-20 19:15:39 | INFO | train_inner | epoch 006: 228 / 3002 loss=2.434, ppl=5.4, wps=5820.6, ups=0.09, wpb=64853, bsz=128, num_updates=15149, lr=9.98868e-05, gnorm=1.891, loss_scale=8, train_wall=11, gb_free=2.8, wall=175014 2021-06-20 19:15:51 | INFO | train_inner | epoch 006: 229 / 3002 loss=2.4, ppl=5.28, wps=5796.9, ups=0.09, wpb=64747, bsz=128, num_updates=15150, lr=9.98868e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=175025 2021-06-20 19:16:02 | INFO | train_inner | epoch 006: 230 / 3002 loss=2.591, ppl=6.02, wps=5886.9, ups=0.09, wpb=64900, bsz=128, num_updates=15151, lr=9.98868e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=175036 2021-06-20 19:16:13 | INFO | train_inner | epoch 006: 231 / 3002 loss=2.418, ppl=5.34, wps=5779.8, ups=0.09, wpb=64847, bsz=128, num_updates=15152, lr=9.98868e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=175047 2021-06-20 19:16:24 | INFO | train_inner | epoch 006: 232 / 3002 loss=2.422, ppl=5.36, wps=5706.4, ups=0.09, wpb=64801, bsz=128, num_updates=15153, lr=9.98868e-05, gnorm=2.073, loss_scale=8, train_wall=11, gb_free=2.8, wall=175058 2021-06-20 19:16:35 | INFO | train_inner | epoch 006: 233 / 3002 loss=2.554, ppl=5.87, wps=5806.4, ups=0.09, wpb=64821, bsz=128, num_updates=15154, lr=9.98868e-05, gnorm=2.173, loss_scale=8, train_wall=11, gb_free=2.8, wall=175070 2021-06-20 19:16:46 | INFO | train_inner | epoch 006: 234 / 3002 loss=2.519, ppl=5.73, wps=5831.7, ups=0.09, wpb=64891, bsz=128, num_updates=15155, lr=9.98868e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=175081 2021-06-20 19:16:58 | INFO | train_inner | epoch 006: 235 / 3002 loss=2.372, ppl=5.18, wps=5669.8, ups=0.09, wpb=64833, bsz=128, num_updates=15156, lr=9.98867e-05, gnorm=1.908, loss_scale=8, train_wall=11, gb_free=2.8, wall=175092 2021-06-20 19:17:09 | INFO | train_inner | epoch 006: 236 / 3002 loss=2.495, ppl=5.64, wps=5767.9, ups=0.09, wpb=64823, bsz=128, num_updates=15157, lr=9.98867e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=175103 2021-06-20 19:17:20 | INFO | train_inner | epoch 006: 237 / 3002 loss=2.498, ppl=5.65, wps=5798.9, ups=0.09, wpb=64859, bsz=128, num_updates=15158, lr=9.98867e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=175115 2021-06-20 19:17:31 | INFO | train_inner | epoch 006: 238 / 3002 loss=2.535, ppl=5.8, wps=5775.1, ups=0.09, wpb=64740, bsz=128, num_updates=15159, lr=9.98867e-05, gnorm=1.898, loss_scale=8, train_wall=11, gb_free=2.8, wall=175126 2021-06-20 19:17:43 | INFO | train_inner | epoch 006: 239 / 3002 loss=2.694, ppl=6.47, wps=5771.1, ups=0.09, wpb=64852, bsz=128, num_updates=15160, lr=9.98867e-05, gnorm=2.044, loss_scale=8, train_wall=11, gb_free=2.8, wall=175137 2021-06-20 19:17:54 | INFO | train_inner | epoch 006: 240 / 3002 loss=2.44, ppl=5.43, wps=5732, ups=0.09, wpb=64802, bsz=128, num_updates=15161, lr=9.98867e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=175148 2021-06-20 19:18:05 | INFO | train_inner | epoch 006: 241 / 3002 loss=2.458, ppl=5.49, wps=5739.4, ups=0.09, wpb=64838, bsz=128, num_updates=15162, lr=9.98867e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=175160 2021-06-20 19:18:16 | INFO | train_inner | epoch 006: 242 / 3002 loss=2.431, ppl=5.39, wps=5886.7, ups=0.09, wpb=64861, bsz=128, num_updates=15163, lr=9.98867e-05, gnorm=2.425, loss_scale=8, train_wall=11, gb_free=2.8, wall=175171 2021-06-20 19:18:27 | INFO | train_inner | epoch 006: 243 / 3002 loss=2.42, ppl=5.35, wps=5825.4, ups=0.09, wpb=64798, bsz=128, num_updates=15164, lr=9.98867e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=175182 2021-06-20 19:18:39 | INFO | train_inner | epoch 006: 244 / 3002 loss=2.334, ppl=5.04, wps=5681.6, ups=0.09, wpb=64852, bsz=128, num_updates=15165, lr=9.98867e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=175193 2021-06-20 19:18:50 | INFO | train_inner | epoch 006: 245 / 3002 loss=2.413, ppl=5.33, wps=5673.2, ups=0.09, wpb=64741, bsz=128, num_updates=15166, lr=9.98867e-05, gnorm=2.897, loss_scale=8, train_wall=11, gb_free=2.8, wall=175205 2021-06-20 19:19:02 | INFO | train_inner | epoch 006: 246 / 3002 loss=2.513, ppl=5.71, wps=5686.7, ups=0.09, wpb=64813, bsz=128, num_updates=15167, lr=9.98867e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=175216 2021-06-20 19:19:13 | INFO | train_inner | epoch 006: 247 / 3002 loss=2.605, ppl=6.08, wps=5770.8, ups=0.09, wpb=64866, bsz=128, num_updates=15168, lr=9.98866e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=175227 2021-06-20 19:19:24 | INFO | train_inner | epoch 006: 248 / 3002 loss=2.43, ppl=5.39, wps=5886.5, ups=0.09, wpb=64899, bsz=128, num_updates=15169, lr=9.98866e-05, gnorm=1.874, loss_scale=8, train_wall=11, gb_free=2.8, wall=175238 2021-06-20 19:19:35 | INFO | train_inner | epoch 006: 249 / 3002 loss=2.572, ppl=5.94, wps=5894.7, ups=0.09, wpb=64852, bsz=128, num_updates=15170, lr=9.98866e-05, gnorm=2.817, loss_scale=8, train_wall=11, gb_free=2.8, wall=175249 2021-06-20 19:19:46 | INFO | train_inner | epoch 006: 250 / 3002 loss=2.38, ppl=5.2, wps=5824.1, ups=0.09, wpb=64899, bsz=128, num_updates=15171, lr=9.98866e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=175260 2021-06-20 19:19:57 | INFO | train_inner | epoch 006: 251 / 3002 loss=2.332, ppl=5.03, wps=5774, ups=0.09, wpb=64896, bsz=128, num_updates=15172, lr=9.98866e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=175272 2021-06-20 19:20:08 | INFO | train_inner | epoch 006: 252 / 3002 loss=2.454, ppl=5.48, wps=5805.2, ups=0.09, wpb=64766, bsz=128, num_updates=15173, lr=9.98866e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=175283 2021-06-20 19:20:20 | INFO | train_inner | epoch 006: 253 / 3002 loss=2.355, ppl=5.12, wps=5792, ups=0.09, wpb=64843, bsz=128, num_updates=15174, lr=9.98866e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=175294 2021-06-20 19:20:31 | INFO | train_inner | epoch 006: 254 / 3002 loss=2.377, ppl=5.2, wps=5847.9, ups=0.09, wpb=64828, bsz=128, num_updates=15175, lr=9.98866e-05, gnorm=2.082, loss_scale=8, train_wall=11, gb_free=2.8, wall=175305 2021-06-20 19:20:42 | INFO | train_inner | epoch 006: 255 / 3002 loss=2.386, ppl=5.23, wps=5802.3, ups=0.09, wpb=64732, bsz=128, num_updates=15176, lr=9.98866e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=175316 2021-06-20 19:20:53 | INFO | train_inner | epoch 006: 256 / 3002 loss=2.523, ppl=5.75, wps=5813.2, ups=0.09, wpb=64824, bsz=128, num_updates=15177, lr=9.98866e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=175327 2021-06-20 19:21:04 | INFO | train_inner | epoch 006: 257 / 3002 loss=2.386, ppl=5.23, wps=5727.8, ups=0.09, wpb=64858, bsz=128, num_updates=15178, lr=9.98866e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=175339 2021-06-20 19:21:16 | INFO | train_inner | epoch 006: 258 / 3002 loss=2.528, ppl=5.77, wps=5784.7, ups=0.09, wpb=64865, bsz=128, num_updates=15179, lr=9.98866e-05, gnorm=2.068, loss_scale=8, train_wall=11, gb_free=2.8, wall=175350 2021-06-20 19:21:27 | INFO | train_inner | epoch 006: 259 / 3002 loss=2.48, ppl=5.58, wps=5848.6, ups=0.09, wpb=64807, bsz=128, num_updates=15180, lr=9.98866e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=175361 2021-06-20 19:21:38 | INFO | train_inner | epoch 006: 260 / 3002 loss=2.324, ppl=5.01, wps=5826.9, ups=0.09, wpb=64782, bsz=128, num_updates=15181, lr=9.98865e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=175372 2021-06-20 19:21:49 | INFO | train_inner | epoch 006: 261 / 3002 loss=2.336, ppl=5.05, wps=5824.4, ups=0.09, wpb=64954, bsz=128, num_updates=15182, lr=9.98865e-05, gnorm=1.977, loss_scale=8, train_wall=11, gb_free=2.8, wall=175383 2021-06-20 19:22:00 | INFO | train_inner | epoch 006: 262 / 3002 loss=2.658, ppl=6.31, wps=5660, ups=0.09, wpb=64775, bsz=128, num_updates=15183, lr=9.98865e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=175395 2021-06-20 19:22:12 | INFO | train_inner | epoch 006: 263 / 3002 loss=2.488, ppl=5.61, wps=5733.2, ups=0.09, wpb=64871, bsz=128, num_updates=15184, lr=9.98865e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=175406 2021-06-20 19:22:23 | INFO | train_inner | epoch 006: 264 / 3002 loss=2.3, ppl=4.93, wps=5775.3, ups=0.09, wpb=64842, bsz=128, num_updates=15185, lr=9.98865e-05, gnorm=2.028, loss_scale=8, train_wall=11, gb_free=2.8, wall=175417 2021-06-20 19:22:34 | INFO | train_inner | epoch 006: 265 / 3002 loss=2.37, ppl=5.17, wps=5820.5, ups=0.09, wpb=64926, bsz=128, num_updates=15186, lr=9.98865e-05, gnorm=2.028, loss_scale=8, train_wall=11, gb_free=2.8, wall=175428 2021-06-20 19:22:45 | INFO | train_inner | epoch 006: 266 / 3002 loss=2.6, ppl=6.06, wps=5771, ups=0.09, wpb=64855, bsz=128, num_updates=15187, lr=9.98865e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=175440 2021-06-20 19:22:56 | INFO | train_inner | epoch 006: 267 / 3002 loss=2.518, ppl=5.73, wps=5992.7, ups=0.09, wpb=64930, bsz=128, num_updates=15188, lr=9.98865e-05, gnorm=1.929, loss_scale=8, train_wall=10, gb_free=2.8, wall=175451 2021-06-20 19:23:07 | INFO | train_inner | epoch 006: 268 / 3002 loss=2.528, ppl=5.77, wps=5858.1, ups=0.09, wpb=64795, bsz=128, num_updates=15189, lr=9.98865e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=175462 2021-06-20 19:23:18 | INFO | train_inner | epoch 006: 269 / 3002 loss=2.476, ppl=5.57, wps=5875, ups=0.09, wpb=64901, bsz=128, num_updates=15190, lr=9.98865e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=175473 2021-06-20 19:23:29 | INFO | train_inner | epoch 006: 270 / 3002 loss=2.556, ppl=5.88, wps=5790.4, ups=0.09, wpb=64850, bsz=128, num_updates=15191, lr=9.98865e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=175484 2021-06-20 19:23:40 | INFO | train_inner | epoch 006: 271 / 3002 loss=2.578, ppl=5.97, wps=5940.5, ups=0.09, wpb=64904, bsz=128, num_updates=15192, lr=9.98865e-05, gnorm=1.977, loss_scale=8, train_wall=10, gb_free=2.8, wall=175495 2021-06-20 19:23:52 | INFO | train_inner | epoch 006: 272 / 3002 loss=2.503, ppl=5.67, wps=5811.8, ups=0.09, wpb=64833, bsz=128, num_updates=15193, lr=9.98864e-05, gnorm=2.008, loss_scale=8, train_wall=11, gb_free=2.8, wall=175506 2021-06-20 19:24:03 | INFO | train_inner | epoch 006: 273 / 3002 loss=2.549, ppl=5.85, wps=5694.7, ups=0.09, wpb=64823, bsz=128, num_updates=15194, lr=9.98864e-05, gnorm=2.039, loss_scale=8, train_wall=11, gb_free=2.8, wall=175517 2021-06-20 19:24:14 | INFO | train_inner | epoch 006: 274 / 3002 loss=2.556, ppl=5.88, wps=5822.7, ups=0.09, wpb=64763, bsz=128, num_updates=15195, lr=9.98864e-05, gnorm=1.968, loss_scale=8, train_wall=11, gb_free=2.8, wall=175528 2021-06-20 19:24:25 | INFO | train_inner | epoch 006: 275 / 3002 loss=2.575, ppl=5.96, wps=5914.4, ups=0.09, wpb=64824, bsz=128, num_updates=15196, lr=9.98864e-05, gnorm=1.936, loss_scale=8, train_wall=10, gb_free=2.8, wall=175539 2021-06-20 19:24:36 | INFO | train_inner | epoch 006: 276 / 3002 loss=2.5, ppl=5.66, wps=5860.2, ups=0.09, wpb=64768, bsz=128, num_updates=15197, lr=9.98864e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=175550 2021-06-20 19:24:47 | INFO | train_inner | epoch 006: 277 / 3002 loss=2.484, ppl=5.59, wps=5861.1, ups=0.09, wpb=64886, bsz=128, num_updates=15198, lr=9.98864e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=175562 2021-06-20 19:24:58 | INFO | train_inner | epoch 006: 278 / 3002 loss=2.426, ppl=5.38, wps=5784.6, ups=0.09, wpb=64964, bsz=128, num_updates=15199, lr=9.98864e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=175573 2021-06-20 19:25:10 | INFO | train_inner | epoch 006: 279 / 3002 loss=2.466, ppl=5.52, wps=5714.1, ups=0.09, wpb=64778, bsz=128, num_updates=15200, lr=9.98864e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=175584 2021-06-20 19:25:21 | INFO | train_inner | epoch 006: 280 / 3002 loss=2.481, ppl=5.58, wps=5692.7, ups=0.09, wpb=64872, bsz=128, num_updates=15201, lr=9.98864e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=175595 2021-06-20 19:25:32 | INFO | train_inner | epoch 006: 281 / 3002 loss=2.399, ppl=5.27, wps=5799.4, ups=0.09, wpb=64860, bsz=128, num_updates=15202, lr=9.98864e-05, gnorm=2.028, loss_scale=16, train_wall=11, gb_free=2.8, wall=175607 2021-06-20 19:25:44 | INFO | train_inner | epoch 006: 282 / 3002 loss=2.4, ppl=5.28, wps=5751.2, ups=0.09, wpb=64818, bsz=128, num_updates=15203, lr=9.98864e-05, gnorm=2.022, loss_scale=16, train_wall=11, gb_free=2.8, wall=175618 2021-06-20 19:25:55 | INFO | train_inner | epoch 006: 283 / 3002 loss=2.485, ppl=5.6, wps=5799, ups=0.09, wpb=64796, bsz=128, num_updates=15204, lr=9.98864e-05, gnorm=2.014, loss_scale=16, train_wall=11, gb_free=2.8, wall=175629 2021-06-20 19:26:06 | INFO | train_inner | epoch 006: 284 / 3002 loss=2.371, ppl=5.17, wps=5735.8, ups=0.09, wpb=64783, bsz=128, num_updates=15205, lr=9.98864e-05, gnorm=1.902, loss_scale=16, train_wall=11, gb_free=2.8, wall=175640 2021-06-20 19:26:17 | INFO | train_inner | epoch 006: 285 / 3002 loss=2.487, ppl=5.61, wps=5756.7, ups=0.09, wpb=64788, bsz=128, num_updates=15206, lr=9.98863e-05, gnorm=1.971, loss_scale=16, train_wall=11, gb_free=2.8, wall=175652 2021-06-20 19:26:29 | INFO | train_inner | epoch 006: 286 / 3002 loss=2.669, ppl=6.36, wps=5692.7, ups=0.09, wpb=64789, bsz=128, num_updates=15207, lr=9.98863e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=175663 2021-06-20 19:26:40 | INFO | train_inner | epoch 006: 287 / 3002 loss=2.419, ppl=5.35, wps=5866.8, ups=0.09, wpb=64772, bsz=128, num_updates=15208, lr=9.98863e-05, gnorm=1.85, loss_scale=16, train_wall=11, gb_free=2.8, wall=175674 2021-06-20 19:26:51 | INFO | train_inner | epoch 006: 288 / 3002 loss=2.514, ppl=5.71, wps=5712.7, ups=0.09, wpb=64846, bsz=128, num_updates=15209, lr=9.98863e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=175685 2021-06-20 19:27:02 | INFO | train_inner | epoch 006: 289 / 3002 loss=2.509, ppl=5.69, wps=5930.1, ups=0.09, wpb=64797, bsz=128, num_updates=15210, lr=9.98863e-05, gnorm=1.964, loss_scale=16, train_wall=10, gb_free=2.8, wall=175696 2021-06-20 19:27:13 | INFO | train_inner | epoch 006: 290 / 3002 loss=2.426, ppl=5.37, wps=5807.1, ups=0.09, wpb=64946, bsz=128, num_updates=15211, lr=9.98863e-05, gnorm=1.934, loss_scale=16, train_wall=11, gb_free=2.8, wall=175708 2021-06-20 19:27:24 | INFO | train_inner | epoch 006: 291 / 3002 loss=2.436, ppl=5.41, wps=5961.6, ups=0.09, wpb=64948, bsz=128, num_updates=15212, lr=9.98863e-05, gnorm=1.908, loss_scale=16, train_wall=10, gb_free=2.8, wall=175718 2021-06-20 19:27:35 | INFO | train_inner | epoch 006: 292 / 3002 loss=2.417, ppl=5.34, wps=5789.9, ups=0.09, wpb=64804, bsz=128, num_updates=15213, lr=9.98863e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=175730 2021-06-20 19:27:46 | INFO | train_inner | epoch 006: 293 / 3002 loss=2.581, ppl=5.98, wps=5910.1, ups=0.09, wpb=64772, bsz=128, num_updates=15214, lr=9.98863e-05, gnorm=1.966, loss_scale=16, train_wall=10, gb_free=2.8, wall=175741 2021-06-20 19:27:57 | INFO | train_inner | epoch 006: 294 / 3002 loss=2.544, ppl=5.83, wps=5837, ups=0.09, wpb=64830, bsz=128, num_updates=15215, lr=9.98863e-05, gnorm=1.961, loss_scale=16, train_wall=11, gb_free=2.8, wall=175752 2021-06-20 19:28:08 | INFO | train_inner | epoch 006: 295 / 3002 loss=2.617, ppl=6.13, wps=5832.3, ups=0.09, wpb=64797, bsz=128, num_updates=15216, lr=9.98863e-05, gnorm=2.016, loss_scale=16, train_wall=11, gb_free=2.8, wall=175763 2021-06-20 19:28:20 | INFO | train_inner | epoch 006: 296 / 3002 loss=2.446, ppl=5.45, wps=5829.8, ups=0.09, wpb=64792, bsz=128, num_updates=15217, lr=9.98863e-05, gnorm=1.883, loss_scale=16, train_wall=11, gb_free=2.8, wall=175774 2021-06-20 19:28:31 | INFO | train_inner | epoch 006: 297 / 3002 loss=2.379, ppl=5.2, wps=5849.7, ups=0.09, wpb=64897, bsz=128, num_updates=15218, lr=9.98862e-05, gnorm=1.861, loss_scale=16, train_wall=11, gb_free=2.8, wall=175785 2021-06-20 19:28:42 | INFO | train_inner | epoch 006: 298 / 3002 loss=2.459, ppl=5.5, wps=5940.5, ups=0.09, wpb=64816, bsz=128, num_updates=15219, lr=9.98862e-05, gnorm=1.904, loss_scale=16, train_wall=10, gb_free=2.8, wall=175796 2021-06-20 19:28:53 | INFO | train_inner | epoch 006: 299 / 3002 loss=2.43, ppl=5.39, wps=5840.6, ups=0.09, wpb=64858, bsz=128, num_updates=15220, lr=9.98862e-05, gnorm=1.899, loss_scale=16, train_wall=11, gb_free=2.8, wall=175807 2021-06-20 19:29:04 | INFO | train_inner | epoch 006: 300 / 3002 loss=2.581, ppl=5.98, wps=5713.6, ups=0.09, wpb=64828, bsz=128, num_updates=15221, lr=9.98862e-05, gnorm=1.95, loss_scale=16, train_wall=11, gb_free=2.8, wall=175818 2021-06-20 19:29:15 | INFO | train_inner | epoch 006: 301 / 3002 loss=2.503, ppl=5.67, wps=5797.4, ups=0.09, wpb=64771, bsz=128, num_updates=15222, lr=9.98862e-05, gnorm=1.846, loss_scale=16, train_wall=11, gb_free=2.8, wall=175830 2021-06-20 19:29:26 | INFO | train_inner | epoch 006: 302 / 3002 loss=2.486, ppl=5.6, wps=5745.1, ups=0.09, wpb=64833, bsz=128, num_updates=15223, lr=9.98862e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=175841 2021-06-20 19:29:38 | INFO | train_inner | epoch 006: 303 / 3002 loss=2.352, ppl=5.11, wps=5862.9, ups=0.09, wpb=64886, bsz=128, num_updates=15224, lr=9.98862e-05, gnorm=1.878, loss_scale=16, train_wall=11, gb_free=2.8, wall=175852 2021-06-20 19:29:49 | INFO | train_inner | epoch 006: 304 / 3002 loss=2.489, ppl=5.61, wps=5891, ups=0.09, wpb=64862, bsz=128, num_updates=15225, lr=9.98862e-05, gnorm=2.274, loss_scale=16, train_wall=11, gb_free=2.8, wall=175863 2021-06-20 19:30:00 | INFO | train_inner | epoch 006: 305 / 3002 loss=2.489, ppl=5.61, wps=5795.9, ups=0.09, wpb=64795, bsz=128, num_updates=15226, lr=9.98862e-05, gnorm=1.967, loss_scale=16, train_wall=11, gb_free=2.8, wall=175874 2021-06-20 19:30:11 | INFO | train_inner | epoch 006: 306 / 3002 loss=2.557, ppl=5.88, wps=5726, ups=0.09, wpb=64824, bsz=128, num_updates=15227, lr=9.98862e-05, gnorm=2.051, loss_scale=16, train_wall=11, gb_free=2.8, wall=175885 2021-06-20 19:30:22 | INFO | train_inner | epoch 006: 307 / 3002 loss=2.426, ppl=5.37, wps=5739.5, ups=0.09, wpb=64755, bsz=128, num_updates=15228, lr=9.98862e-05, gnorm=1.916, loss_scale=16, train_wall=11, gb_free=2.8, wall=175897 2021-06-20 19:30:33 | INFO | train_inner | epoch 006: 308 / 3002 loss=2.371, ppl=5.17, wps=5888.3, ups=0.09, wpb=64824, bsz=128, num_updates=15229, lr=9.98862e-05, gnorm=1.911, loss_scale=16, train_wall=10, gb_free=2.8, wall=175908 2021-06-20 19:30:44 | INFO | train_inner | epoch 006: 309 / 3002 loss=2.612, ppl=6.11, wps=5836.6, ups=0.09, wpb=64771, bsz=128, num_updates=15230, lr=9.98862e-05, gnorm=1.828, loss_scale=16, train_wall=11, gb_free=2.8, wall=175919 2021-06-20 19:30:56 | INFO | train_inner | epoch 006: 310 / 3002 loss=2.515, ppl=5.71, wps=5878, ups=0.09, wpb=64895, bsz=128, num_updates=15231, lr=9.98861e-05, gnorm=1.934, loss_scale=16, train_wall=11, gb_free=2.8, wall=175930 2021-06-20 19:31:06 | INFO | train_inner | epoch 006: 311 / 3002 loss=2.335, ppl=5.05, wps=5903.5, ups=0.09, wpb=64799, bsz=128, num_updates=15232, lr=9.98861e-05, gnorm=1.916, loss_scale=16, train_wall=11, gb_free=2.8, wall=175941 2021-06-20 19:31:18 | INFO | train_inner | epoch 006: 312 / 3002 loss=2.49, ppl=5.62, wps=5769.9, ups=0.09, wpb=64794, bsz=128, num_updates=15233, lr=9.98861e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=175952 2021-06-20 19:31:29 | INFO | train_inner | epoch 006: 313 / 3002 loss=2.296, ppl=4.91, wps=5850.7, ups=0.09, wpb=64822, bsz=128, num_updates=15234, lr=9.98861e-05, gnorm=1.995, loss_scale=16, train_wall=11, gb_free=2.8, wall=175963 2021-06-20 19:31:40 | INFO | train_inner | epoch 006: 314 / 3002 loss=2.402, ppl=5.28, wps=5862.7, ups=0.09, wpb=64828, bsz=128, num_updates=15235, lr=9.98861e-05, gnorm=1.953, loss_scale=16, train_wall=11, gb_free=2.8, wall=175974 2021-06-20 19:31:51 | INFO | train_inner | epoch 006: 315 / 3002 loss=2.568, ppl=5.93, wps=5842.7, ups=0.09, wpb=64714, bsz=128, num_updates=15236, lr=9.98861e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=175985 2021-06-20 19:32:02 | INFO | train_inner | epoch 006: 316 / 3002 loss=2.517, ppl=5.73, wps=5846.7, ups=0.09, wpb=64820, bsz=128, num_updates=15237, lr=9.98861e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=175996 2021-06-20 19:32:13 | INFO | train_inner | epoch 006: 317 / 3002 loss=2.474, ppl=5.55, wps=5829.8, ups=0.09, wpb=64837, bsz=128, num_updates=15238, lr=9.98861e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=176007 2021-06-20 19:32:24 | INFO | train_inner | epoch 006: 318 / 3002 loss=2.471, ppl=5.54, wps=5765.3, ups=0.09, wpb=64786, bsz=128, num_updates=15239, lr=9.98861e-05, gnorm=2.026, loss_scale=16, train_wall=11, gb_free=2.8, wall=176019 2021-06-20 19:32:36 | INFO | train_inner | epoch 006: 319 / 3002 loss=2.464, ppl=5.52, wps=5740.7, ups=0.09, wpb=64876, bsz=128, num_updates=15240, lr=9.98861e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=176030 2021-06-20 19:32:47 | INFO | train_inner | epoch 006: 320 / 3002 loss=2.364, ppl=5.15, wps=5793, ups=0.09, wpb=64855, bsz=128, num_updates=15241, lr=9.98861e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=176041 2021-06-20 19:32:58 | INFO | train_inner | epoch 006: 321 / 3002 loss=2.525, ppl=5.76, wps=5804.8, ups=0.09, wpb=64822, bsz=128, num_updates=15242, lr=9.98861e-05, gnorm=2.058, loss_scale=16, train_wall=11, gb_free=2.8, wall=176052 2021-06-20 19:33:09 | INFO | train_inner | epoch 006: 322 / 3002 loss=2.487, ppl=5.61, wps=5780.5, ups=0.09, wpb=64803, bsz=128, num_updates=15243, lr=9.9886e-05, gnorm=2.009, loss_scale=16, train_wall=11, gb_free=2.8, wall=176064 2021-06-20 19:33:20 | INFO | train_inner | epoch 006: 323 / 3002 loss=2.532, ppl=5.79, wps=5818.5, ups=0.09, wpb=64903, bsz=128, num_updates=15244, lr=9.9886e-05, gnorm=2.016, loss_scale=16, train_wall=11, gb_free=2.8, wall=176075 2021-06-20 19:33:32 | INFO | train_inner | epoch 006: 324 / 3002 loss=2.502, ppl=5.66, wps=5747.5, ups=0.09, wpb=64783, bsz=128, num_updates=15245, lr=9.9886e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=176086 2021-06-20 19:33:43 | INFO | train_inner | epoch 006: 325 / 3002 loss=2.454, ppl=5.48, wps=5971, ups=0.09, wpb=64819, bsz=128, num_updates=15246, lr=9.9886e-05, gnorm=1.983, loss_scale=16, train_wall=10, gb_free=2.8, wall=176097 2021-06-20 19:33:54 | INFO | train_inner | epoch 006: 326 / 3002 loss=2.406, ppl=5.3, wps=5732, ups=0.09, wpb=64870, bsz=128, num_updates=15247, lr=9.9886e-05, gnorm=2.057, loss_scale=16, train_wall=11, gb_free=2.8, wall=176108 2021-06-20 19:34:05 | INFO | train_inner | epoch 006: 327 / 3002 loss=2.47, ppl=5.54, wps=5813.9, ups=0.09, wpb=64825, bsz=128, num_updates=15248, lr=9.9886e-05, gnorm=2.002, loss_scale=16, train_wall=11, gb_free=2.8, wall=176119 2021-06-20 19:34:16 | INFO | train_inner | epoch 006: 328 / 3002 loss=2.453, ppl=5.48, wps=5638.3, ups=0.09, wpb=64820, bsz=128, num_updates=15249, lr=9.9886e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=176131 2021-06-20 19:34:27 | INFO | train_inner | epoch 006: 329 / 3002 loss=2.709, ppl=6.54, wps=5892.5, ups=0.09, wpb=64812, bsz=128, num_updates=15250, lr=9.9886e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=176142 2021-06-20 19:34:39 | INFO | train_inner | epoch 006: 330 / 3002 loss=2.518, ppl=5.73, wps=5706.7, ups=0.09, wpb=64801, bsz=128, num_updates=15251, lr=9.9886e-05, gnorm=1.921, loss_scale=16, train_wall=11, gb_free=2.8, wall=176153 2021-06-20 19:34:50 | INFO | train_inner | epoch 006: 331 / 3002 loss=2.469, ppl=5.54, wps=5782.7, ups=0.09, wpb=64833, bsz=128, num_updates=15252, lr=9.9886e-05, gnorm=1.905, loss_scale=16, train_wall=11, gb_free=2.8, wall=176164 2021-06-20 19:35:01 | INFO | train_inner | epoch 006: 332 / 3002 loss=2.561, ppl=5.9, wps=5893.6, ups=0.09, wpb=64907, bsz=128, num_updates=15253, lr=9.9886e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=176175 2021-06-20 19:35:12 | INFO | train_inner | epoch 006: 333 / 3002 loss=2.657, ppl=6.31, wps=5878.9, ups=0.09, wpb=64801, bsz=128, num_updates=15254, lr=9.9886e-05, gnorm=1.954, loss_scale=16, train_wall=11, gb_free=2.8, wall=176186 2021-06-20 19:35:23 | INFO | train_inner | epoch 006: 334 / 3002 loss=2.476, ppl=5.56, wps=5926.1, ups=0.09, wpb=64927, bsz=128, num_updates=15255, lr=9.9886e-05, gnorm=1.992, loss_scale=16, train_wall=10, gb_free=2.8, wall=176197 2021-06-20 19:35:34 | INFO | train_inner | epoch 006: 335 / 3002 loss=2.508, ppl=5.69, wps=5827.4, ups=0.09, wpb=64890, bsz=128, num_updates=15256, lr=9.98859e-05, gnorm=1.934, loss_scale=16, train_wall=11, gb_free=2.8, wall=176209 2021-06-20 19:35:45 | INFO | train_inner | epoch 006: 336 / 3002 loss=2.353, ppl=5.11, wps=5863.8, ups=0.09, wpb=64882, bsz=128, num_updates=15257, lr=9.98859e-05, gnorm=1.956, loss_scale=16, train_wall=11, gb_free=2.8, wall=176220 2021-06-20 19:35:56 | INFO | train_inner | epoch 006: 337 / 3002 loss=2.244, ppl=4.74, wps=5865.2, ups=0.09, wpb=64856, bsz=128, num_updates=15258, lr=9.98859e-05, gnorm=1.915, loss_scale=16, train_wall=11, gb_free=2.8, wall=176231 2021-06-20 19:36:07 | INFO | train_inner | epoch 006: 338 / 3002 loss=2.643, ppl=6.25, wps=5792.3, ups=0.09, wpb=64811, bsz=128, num_updates=15259, lr=9.98859e-05, gnorm=2.028, loss_scale=16, train_wall=11, gb_free=2.8, wall=176242 2021-06-20 19:36:19 | INFO | train_inner | epoch 006: 339 / 3002 loss=2.265, ppl=4.81, wps=5793, ups=0.09, wpb=64854, bsz=128, num_updates=15260, lr=9.98859e-05, gnorm=1.973, loss_scale=16, train_wall=11, gb_free=2.8, wall=176253 2021-06-20 19:36:30 | INFO | train_inner | epoch 006: 340 / 3002 loss=2.648, ppl=6.27, wps=5817.6, ups=0.09, wpb=64834, bsz=128, num_updates=15261, lr=9.98859e-05, gnorm=1.979, loss_scale=16, train_wall=11, gb_free=2.8, wall=176264 2021-06-20 19:36:41 | INFO | train_inner | epoch 006: 341 / 3002 loss=2.558, ppl=5.89, wps=5865.1, ups=0.09, wpb=64876, bsz=128, num_updates=15262, lr=9.98859e-05, gnorm=2.481, loss_scale=16, train_wall=11, gb_free=2.8, wall=176275 2021-06-20 19:36:52 | INFO | train_inner | epoch 006: 342 / 3002 loss=2.465, ppl=5.52, wps=5702.3, ups=0.09, wpb=64869, bsz=128, num_updates=15263, lr=9.98859e-05, gnorm=1.913, loss_scale=16, train_wall=11, gb_free=2.8, wall=176287 2021-06-20 19:37:03 | INFO | train_inner | epoch 006: 343 / 3002 loss=2.538, ppl=5.81, wps=5890.6, ups=0.09, wpb=64928, bsz=128, num_updates=15264, lr=9.98859e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=176298 2021-06-20 19:37:14 | INFO | train_inner | epoch 006: 344 / 3002 loss=2.503, ppl=5.67, wps=5824, ups=0.09, wpb=64775, bsz=128, num_updates=15265, lr=9.98859e-05, gnorm=1.917, loss_scale=16, train_wall=11, gb_free=2.8, wall=176309 2021-06-20 19:37:26 | INFO | train_inner | epoch 006: 345 / 3002 loss=2.569, ppl=5.94, wps=5825.6, ups=0.09, wpb=64800, bsz=128, num_updates=15266, lr=9.98859e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=176320 2021-06-20 19:37:37 | INFO | train_inner | epoch 006: 346 / 3002 loss=2.381, ppl=5.21, wps=5717.4, ups=0.09, wpb=64819, bsz=128, num_updates=15267, lr=9.98859e-05, gnorm=1.916, loss_scale=16, train_wall=11, gb_free=2.8, wall=176331 2021-06-20 19:37:48 | INFO | train_inner | epoch 006: 347 / 3002 loss=2.489, ppl=5.61, wps=5912, ups=0.09, wpb=64865, bsz=128, num_updates=15268, lr=9.98858e-05, gnorm=1.948, loss_scale=16, train_wall=10, gb_free=2.8, wall=176342 2021-06-20 19:37:59 | INFO | train_inner | epoch 006: 348 / 3002 loss=2.511, ppl=5.7, wps=5783.7, ups=0.09, wpb=64801, bsz=128, num_updates=15269, lr=9.98858e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=176353 2021-06-20 19:38:10 | INFO | train_inner | epoch 006: 349 / 3002 loss=2.491, ppl=5.62, wps=5818.5, ups=0.09, wpb=64876, bsz=128, num_updates=15270, lr=9.98858e-05, gnorm=1.883, loss_scale=16, train_wall=11, gb_free=2.8, wall=176365 2021-06-20 19:38:21 | INFO | train_inner | epoch 006: 350 / 3002 loss=2.434, ppl=5.4, wps=5764.5, ups=0.09, wpb=64714, bsz=128, num_updates=15271, lr=9.98858e-05, gnorm=2.102, loss_scale=16, train_wall=11, gb_free=2.8, wall=176376 2021-06-20 19:38:33 | INFO | train_inner | epoch 006: 351 / 3002 loss=2.303, ppl=4.93, wps=5733.6, ups=0.09, wpb=64796, bsz=128, num_updates=15272, lr=9.98858e-05, gnorm=1.942, loss_scale=16, train_wall=11, gb_free=2.8, wall=176387 2021-06-20 19:38:44 | INFO | train_inner | epoch 006: 352 / 3002 loss=2.391, ppl=5.25, wps=5768.4, ups=0.09, wpb=64816, bsz=128, num_updates=15273, lr=9.98858e-05, gnorm=1.866, loss_scale=16, train_wall=11, gb_free=2.8, wall=176398 2021-06-20 19:38:55 | INFO | train_inner | epoch 006: 353 / 3002 loss=2.483, ppl=5.59, wps=5889.2, ups=0.09, wpb=64856, bsz=128, num_updates=15274, lr=9.98858e-05, gnorm=2.051, loss_scale=16, train_wall=11, gb_free=2.8, wall=176409 2021-06-20 19:39:06 | INFO | train_inner | epoch 006: 354 / 3002 loss=2.335, ppl=5.05, wps=5763.7, ups=0.09, wpb=64849, bsz=128, num_updates=15275, lr=9.98858e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=176421 2021-06-20 19:39:17 | INFO | train_inner | epoch 006: 355 / 3002 loss=2.447, ppl=5.45, wps=5776.3, ups=0.09, wpb=64764, bsz=128, num_updates=15276, lr=9.98858e-05, gnorm=1.86, loss_scale=16, train_wall=11, gb_free=2.8, wall=176432 2021-06-20 19:39:29 | INFO | train_inner | epoch 006: 356 / 3002 loss=2.565, ppl=5.92, wps=5815.9, ups=0.09, wpb=64801, bsz=128, num_updates=15277, lr=9.98858e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=176443 2021-06-20 19:39:40 | INFO | train_inner | epoch 006: 357 / 3002 loss=2.457, ppl=5.49, wps=5762, ups=0.09, wpb=64843, bsz=128, num_updates=15278, lr=9.98858e-05, gnorm=1.888, loss_scale=16, train_wall=11, gb_free=2.8, wall=176454 2021-06-20 19:39:51 | INFO | train_inner | epoch 006: 358 / 3002 loss=2.614, ppl=6.12, wps=5774.1, ups=0.09, wpb=64794, bsz=128, num_updates=15279, lr=9.98858e-05, gnorm=1.901, loss_scale=16, train_wall=11, gb_free=2.8, wall=176465 2021-06-20 19:40:02 | INFO | train_inner | epoch 006: 359 / 3002 loss=2.443, ppl=5.44, wps=5803.9, ups=0.09, wpb=64877, bsz=128, num_updates=15280, lr=9.98858e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=176477 2021-06-20 19:40:14 | INFO | train_inner | epoch 006: 360 / 3002 loss=2.332, ppl=5.04, wps=5725.4, ups=0.09, wpb=64850, bsz=128, num_updates=15281, lr=9.98857e-05, gnorm=1.877, loss_scale=16, train_wall=11, gb_free=2.8, wall=176488 2021-06-20 19:40:25 | INFO | train_inner | epoch 006: 361 / 3002 loss=2.517, ppl=5.72, wps=5827, ups=0.09, wpb=64856, bsz=128, num_updates=15282, lr=9.98857e-05, gnorm=3.886, loss_scale=16, train_wall=11, gb_free=2.8, wall=176499 2021-06-20 19:40:36 | INFO | train_inner | epoch 006: 362 / 3002 loss=2.459, ppl=5.5, wps=5699.3, ups=0.09, wpb=64781, bsz=128, num_updates=15283, lr=9.98857e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=176510 2021-06-20 19:40:47 | INFO | train_inner | epoch 006: 363 / 3002 loss=2.49, ppl=5.62, wps=5700.3, ups=0.09, wpb=64848, bsz=128, num_updates=15284, lr=9.98857e-05, gnorm=1.858, loss_scale=16, train_wall=11, gb_free=2.8, wall=176522 2021-06-20 19:40:59 | INFO | train_inner | epoch 006: 364 / 3002 loss=2.637, ppl=6.22, wps=5794.7, ups=0.09, wpb=64749, bsz=128, num_updates=15285, lr=9.98857e-05, gnorm=1.929, loss_scale=16, train_wall=11, gb_free=2.8, wall=176533 2021-06-20 19:41:10 | INFO | train_inner | epoch 006: 365 / 3002 loss=2.456, ppl=5.49, wps=5791.7, ups=0.09, wpb=64900, bsz=128, num_updates=15286, lr=9.98857e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=176544 2021-06-20 19:41:21 | INFO | train_inner | epoch 006: 366 / 3002 loss=2.52, ppl=5.74, wps=5856.7, ups=0.09, wpb=64879, bsz=128, num_updates=15287, lr=9.98857e-05, gnorm=1.951, loss_scale=16, train_wall=11, gb_free=2.8, wall=176555 2021-06-20 19:41:32 | INFO | train_inner | epoch 006: 367 / 3002 loss=2.511, ppl=5.7, wps=5787.5, ups=0.09, wpb=64870, bsz=128, num_updates=15288, lr=9.98857e-05, gnorm=1.888, loss_scale=16, train_wall=11, gb_free=2.8, wall=176566 2021-06-20 19:41:43 | INFO | train_inner | epoch 006: 368 / 3002 loss=2.399, ppl=5.28, wps=5852.1, ups=0.09, wpb=64903, bsz=128, num_updates=15289, lr=9.98857e-05, gnorm=1.88, loss_scale=16, train_wall=11, gb_free=2.8, wall=176578 2021-06-20 19:41:54 | INFO | train_inner | epoch 006: 369 / 3002 loss=2.482, ppl=5.59, wps=5770, ups=0.09, wpb=64834, bsz=128, num_updates=15290, lr=9.98857e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=176589 2021-06-20 19:42:06 | INFO | train_inner | epoch 006: 370 / 3002 loss=2.554, ppl=5.87, wps=5854.5, ups=0.09, wpb=64827, bsz=128, num_updates=15291, lr=9.98857e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=176600 2021-06-20 19:42:17 | INFO | train_inner | epoch 006: 371 / 3002 loss=2.555, ppl=5.88, wps=5817.7, ups=0.09, wpb=64732, bsz=128, num_updates=15292, lr=9.98857e-05, gnorm=2.049, loss_scale=16, train_wall=11, gb_free=2.8, wall=176611 2021-06-20 19:42:28 | INFO | train_inner | epoch 006: 372 / 3002 loss=2.398, ppl=5.27, wps=5844, ups=0.09, wpb=64797, bsz=128, num_updates=15293, lr=9.98856e-05, gnorm=1.917, loss_scale=16, train_wall=11, gb_free=2.8, wall=176622 2021-06-20 19:42:39 | INFO | train_inner | epoch 006: 373 / 3002 loss=2.574, ppl=5.96, wps=5728.3, ups=0.09, wpb=64830, bsz=128, num_updates=15294, lr=9.98856e-05, gnorm=1.918, loss_scale=16, train_wall=11, gb_free=2.8, wall=176633 2021-06-20 19:42:50 | INFO | train_inner | epoch 006: 374 / 3002 loss=2.441, ppl=5.43, wps=5852.9, ups=0.09, wpb=64862, bsz=128, num_updates=15295, lr=9.98856e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=176644 2021-06-20 19:43:01 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 19:43:12 | INFO | train_inner | epoch 006: 376 / 3002 loss=2.52, ppl=5.74, wps=2921.6, ups=0.05, wpb=64788, bsz=128, num_updates=15296, lr=9.98856e-05, gnorm=2.037, loss_scale=8, train_wall=21, gb_free=2.8, wall=176667 2021-06-20 19:43:23 | INFO | train_inner | epoch 006: 377 / 3002 loss=2.471, ppl=5.54, wps=5839.9, ups=0.09, wpb=64871, bsz=128, num_updates=15297, lr=9.98856e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=176678 2021-06-20 19:43:34 | INFO | train_inner | epoch 006: 378 / 3002 loss=2.449, ppl=5.46, wps=5899, ups=0.09, wpb=64881, bsz=128, num_updates=15298, lr=9.98856e-05, gnorm=1.985, loss_scale=8, train_wall=11, gb_free=2.8, wall=176689 2021-06-20 19:43:45 | INFO | train_inner | epoch 006: 379 / 3002 loss=2.389, ppl=5.24, wps=5882.3, ups=0.09, wpb=64834, bsz=128, num_updates=15299, lr=9.98856e-05, gnorm=1.891, loss_scale=8, train_wall=11, gb_free=2.8, wall=176700 2021-06-20 19:43:56 | INFO | train_inner | epoch 006: 380 / 3002 loss=2.513, ppl=5.71, wps=5900, ups=0.09, wpb=64802, bsz=128, num_updates=15300, lr=9.98856e-05, gnorm=1.88, loss_scale=8, train_wall=11, gb_free=2.8, wall=176711 2021-06-20 19:44:07 | INFO | train_inner | epoch 006: 381 / 3002 loss=2.47, ppl=5.54, wps=5878.1, ups=0.09, wpb=64869, bsz=128, num_updates=15301, lr=9.98856e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=176722 2021-06-20 19:44:19 | INFO | train_inner | epoch 006: 382 / 3002 loss=2.411, ppl=5.32, wps=5799.6, ups=0.09, wpb=64852, bsz=128, num_updates=15302, lr=9.98856e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=176733 2021-06-20 19:44:30 | INFO | train_inner | epoch 006: 383 / 3002 loss=2.562, ppl=5.9, wps=5733.6, ups=0.09, wpb=64883, bsz=128, num_updates=15303, lr=9.98856e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=176744 2021-06-20 19:44:41 | INFO | train_inner | epoch 006: 384 / 3002 loss=2.385, ppl=5.22, wps=5772.4, ups=0.09, wpb=64745, bsz=128, num_updates=15304, lr=9.98856e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=176756 2021-06-20 19:44:52 | INFO | train_inner | epoch 006: 385 / 3002 loss=2.489, ppl=5.61, wps=5768.1, ups=0.09, wpb=64812, bsz=128, num_updates=15305, lr=9.98856e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=176767 2021-06-20 19:45:04 | INFO | train_inner | epoch 006: 386 / 3002 loss=2.352, ppl=5.1, wps=5662.2, ups=0.09, wpb=64865, bsz=128, num_updates=15306, lr=9.98855e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=176778 2021-06-20 19:45:15 | INFO | train_inner | epoch 006: 387 / 3002 loss=2.426, ppl=5.37, wps=5801.2, ups=0.09, wpb=64832, bsz=128, num_updates=15307, lr=9.98855e-05, gnorm=1.883, loss_scale=8, train_wall=11, gb_free=2.8, wall=176789 2021-06-20 19:45:26 | INFO | train_inner | epoch 006: 388 / 3002 loss=2.444, ppl=5.44, wps=5741.1, ups=0.09, wpb=64817, bsz=128, num_updates=15308, lr=9.98855e-05, gnorm=2.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=176801 2021-06-20 19:45:37 | INFO | train_inner | epoch 006: 389 / 3002 loss=2.423, ppl=5.36, wps=5844.8, ups=0.09, wpb=64830, bsz=128, num_updates=15309, lr=9.98855e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=176812 2021-06-20 19:45:49 | INFO | train_inner | epoch 006: 390 / 3002 loss=2.496, ppl=5.64, wps=5787.4, ups=0.09, wpb=64789, bsz=128, num_updates=15310, lr=9.98855e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=176823 2021-06-20 19:46:00 | INFO | train_inner | epoch 006: 391 / 3002 loss=2.548, ppl=5.85, wps=5932, ups=0.09, wpb=64773, bsz=128, num_updates=15311, lr=9.98855e-05, gnorm=1.947, loss_scale=8, train_wall=10, gb_free=2.8, wall=176834 2021-06-20 19:46:10 | INFO | train_inner | epoch 006: 392 / 3002 loss=2.454, ppl=5.48, wps=6008.8, ups=0.09, wpb=64809, bsz=128, num_updates=15312, lr=9.98855e-05, gnorm=1.905, loss_scale=8, train_wall=10, gb_free=2.8, wall=176845 2021-06-20 19:46:21 | INFO | train_inner | epoch 006: 393 / 3002 loss=2.59, ppl=6.02, wps=5838.6, ups=0.09, wpb=64802, bsz=128, num_updates=15313, lr=9.98855e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=176856 2021-06-20 19:46:32 | INFO | train_inner | epoch 006: 394 / 3002 loss=2.375, ppl=5.19, wps=5977.7, ups=0.09, wpb=64813, bsz=128, num_updates=15314, lr=9.98855e-05, gnorm=2.019, loss_scale=8, train_wall=10, gb_free=2.8, wall=176867 2021-06-20 19:46:43 | INFO | train_inner | epoch 006: 395 / 3002 loss=2.632, ppl=6.2, wps=5944.4, ups=0.09, wpb=64836, bsz=128, num_updates=15315, lr=9.98855e-05, gnorm=1.993, loss_scale=8, train_wall=10, gb_free=2.8, wall=176878 2021-06-20 19:46:54 | INFO | train_inner | epoch 006: 396 / 3002 loss=2.504, ppl=5.67, wps=5964.7, ups=0.09, wpb=64840, bsz=128, num_updates=15316, lr=9.98855e-05, gnorm=1.903, loss_scale=8, train_wall=10, gb_free=2.8, wall=176888 2021-06-20 19:47:05 | INFO | train_inner | epoch 006: 397 / 3002 loss=2.606, ppl=6.09, wps=5940.4, ups=0.09, wpb=64759, bsz=128, num_updates=15317, lr=9.98855e-05, gnorm=2.06, loss_scale=8, train_wall=10, gb_free=2.8, wall=176899 2021-06-20 19:47:16 | INFO | train_inner | epoch 006: 398 / 3002 loss=2.491, ppl=5.62, wps=5844.1, ups=0.09, wpb=64811, bsz=128, num_updates=15318, lr=9.98854e-05, gnorm=1.868, loss_scale=8, train_wall=11, gb_free=2.8, wall=176910 2021-06-20 19:47:27 | INFO | train_inner | epoch 006: 399 / 3002 loss=2.425, ppl=5.37, wps=5840, ups=0.09, wpb=64836, bsz=128, num_updates=15319, lr=9.98854e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=176921 2021-06-20 19:47:38 | INFO | train_inner | epoch 006: 400 / 3002 loss=2.541, ppl=5.82, wps=5770.9, ups=0.09, wpb=64767, bsz=128, num_updates=15320, lr=9.98854e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=176933 2021-06-20 19:47:50 | INFO | train_inner | epoch 006: 401 / 3002 loss=2.487, ppl=5.61, wps=5779.2, ups=0.09, wpb=64805, bsz=128, num_updates=15321, lr=9.98854e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=176944 2021-06-20 19:48:01 | INFO | train_inner | epoch 006: 402 / 3002 loss=2.491, ppl=5.62, wps=5839.8, ups=0.09, wpb=64807, bsz=128, num_updates=15322, lr=9.98854e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=176955 2021-06-20 19:48:12 | INFO | train_inner | epoch 006: 403 / 3002 loss=2.43, ppl=5.39, wps=5931.8, ups=0.09, wpb=64800, bsz=128, num_updates=15323, lr=9.98854e-05, gnorm=1.885, loss_scale=8, train_wall=10, gb_free=2.8, wall=176966 2021-06-20 19:48:23 | INFO | train_inner | epoch 006: 404 / 3002 loss=2.51, ppl=5.7, wps=5833.4, ups=0.09, wpb=64798, bsz=128, num_updates=15324, lr=9.98854e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=176977 2021-06-20 19:48:34 | INFO | train_inner | epoch 006: 405 / 3002 loss=2.504, ppl=5.67, wps=5801.2, ups=0.09, wpb=64811, bsz=128, num_updates=15325, lr=9.98854e-05, gnorm=2.042, loss_scale=8, train_wall=11, gb_free=2.8, wall=176988 2021-06-20 19:48:45 | INFO | train_inner | epoch 006: 406 / 3002 loss=2.611, ppl=6.11, wps=5964.1, ups=0.09, wpb=64822, bsz=128, num_updates=15326, lr=9.98854e-05, gnorm=1.978, loss_scale=8, train_wall=10, gb_free=2.8, wall=176999 2021-06-20 19:48:56 | INFO | train_inner | epoch 006: 407 / 3002 loss=2.429, ppl=5.39, wps=5856.5, ups=0.09, wpb=64828, bsz=128, num_updates=15327, lr=9.98854e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=177010 2021-06-20 19:49:07 | INFO | train_inner | epoch 006: 408 / 3002 loss=2.437, ppl=5.42, wps=5894.5, ups=0.09, wpb=64861, bsz=128, num_updates=15328, lr=9.98854e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=177021 2021-06-20 19:49:18 | INFO | train_inner | epoch 006: 409 / 3002 loss=2.603, ppl=6.08, wps=5757.5, ups=0.09, wpb=64874, bsz=128, num_updates=15329, lr=9.98854e-05, gnorm=1.892, loss_scale=8, train_wall=11, gb_free=2.8, wall=177032 2021-06-20 19:49:29 | INFO | train_inner | epoch 006: 410 / 3002 loss=2.382, ppl=5.21, wps=5917.4, ups=0.09, wpb=64857, bsz=128, num_updates=15330, lr=9.98854e-05, gnorm=1.967, loss_scale=8, train_wall=10, gb_free=2.8, wall=177043 2021-06-20 19:49:40 | INFO | train_inner | epoch 006: 411 / 3002 loss=2.4, ppl=5.28, wps=5925, ups=0.09, wpb=64858, bsz=128, num_updates=15331, lr=9.98853e-05, gnorm=1.852, loss_scale=8, train_wall=11, gb_free=2.8, wall=177054 2021-06-20 19:49:51 | INFO | train_inner | epoch 006: 412 / 3002 loss=2.399, ppl=5.28, wps=5767.3, ups=0.09, wpb=64793, bsz=128, num_updates=15332, lr=9.98853e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=177066 2021-06-20 19:50:02 | INFO | train_inner | epoch 006: 413 / 3002 loss=2.4, ppl=5.28, wps=5793.3, ups=0.09, wpb=64844, bsz=128, num_updates=15333, lr=9.98853e-05, gnorm=2.009, loss_scale=8, train_wall=11, gb_free=2.8, wall=177077 2021-06-20 19:50:13 | INFO | train_inner | epoch 006: 414 / 3002 loss=2.582, ppl=5.99, wps=5867.7, ups=0.09, wpb=64831, bsz=128, num_updates=15334, lr=9.98853e-05, gnorm=1.985, loss_scale=8, train_wall=11, gb_free=2.8, wall=177088 2021-06-20 19:50:25 | INFO | train_inner | epoch 006: 415 / 3002 loss=2.534, ppl=5.79, wps=5822.4, ups=0.09, wpb=64880, bsz=128, num_updates=15335, lr=9.98853e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=177099 2021-06-20 19:50:36 | INFO | train_inner | epoch 006: 416 / 3002 loss=2.48, ppl=5.58, wps=5908.3, ups=0.09, wpb=64752, bsz=128, num_updates=15336, lr=9.98853e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=177110 2021-06-20 19:50:47 | INFO | train_inner | epoch 006: 417 / 3002 loss=2.499, ppl=5.65, wps=5838.8, ups=0.09, wpb=64802, bsz=128, num_updates=15337, lr=9.98853e-05, gnorm=2.471, loss_scale=8, train_wall=11, gb_free=2.8, wall=177121 2021-06-20 19:50:58 | INFO | train_inner | epoch 006: 418 / 3002 loss=2.487, ppl=5.61, wps=5923.8, ups=0.09, wpb=64872, bsz=128, num_updates=15338, lr=9.98853e-05, gnorm=1.988, loss_scale=8, train_wall=10, gb_free=2.8, wall=177132 2021-06-20 19:51:09 | INFO | train_inner | epoch 006: 419 / 3002 loss=2.367, ppl=5.16, wps=5868.4, ups=0.09, wpb=64814, bsz=128, num_updates=15339, lr=9.98853e-05, gnorm=1.884, loss_scale=8, train_wall=11, gb_free=2.8, wall=177143 2021-06-20 19:51:20 | INFO | train_inner | epoch 006: 420 / 3002 loss=2.488, ppl=5.61, wps=5794.9, ups=0.09, wpb=64836, bsz=128, num_updates=15340, lr=9.98853e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=177154 2021-06-20 19:51:31 | INFO | train_inner | epoch 006: 421 / 3002 loss=2.429, ppl=5.39, wps=5798.3, ups=0.09, wpb=64805, bsz=128, num_updates=15341, lr=9.98853e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=177165 2021-06-20 19:51:42 | INFO | train_inner | epoch 006: 422 / 3002 loss=2.437, ppl=5.41, wps=5874.1, ups=0.09, wpb=64781, bsz=128, num_updates=15342, lr=9.98853e-05, gnorm=1.872, loss_scale=8, train_wall=11, gb_free=2.8, wall=177176 2021-06-20 19:51:53 | INFO | train_inner | epoch 006: 423 / 3002 loss=2.545, ppl=5.84, wps=5910.9, ups=0.09, wpb=64814, bsz=128, num_updates=15343, lr=9.98852e-05, gnorm=2.548, loss_scale=8, train_wall=11, gb_free=2.8, wall=177187 2021-06-20 19:52:04 | INFO | train_inner | epoch 006: 424 / 3002 loss=2.469, ppl=5.54, wps=5901.7, ups=0.09, wpb=64779, bsz=128, num_updates=15344, lr=9.98852e-05, gnorm=1.911, loss_scale=8, train_wall=10, gb_free=2.8, wall=177198 2021-06-20 19:52:15 | INFO | train_inner | epoch 006: 425 / 3002 loss=2.595, ppl=6.04, wps=5817, ups=0.09, wpb=64699, bsz=128, num_updates=15345, lr=9.98852e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=177209 2021-06-20 19:52:26 | INFO | train_inner | epoch 006: 426 / 3002 loss=2.331, ppl=5.03, wps=5919.1, ups=0.09, wpb=64832, bsz=128, num_updates=15346, lr=9.98852e-05, gnorm=1.968, loss_scale=8, train_wall=10, gb_free=2.8, wall=177220 2021-06-20 19:52:37 | INFO | train_inner | epoch 006: 427 / 3002 loss=2.472, ppl=5.55, wps=5871.1, ups=0.09, wpb=64862, bsz=128, num_updates=15347, lr=9.98852e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=177231 2021-06-20 19:52:48 | INFO | train_inner | epoch 006: 428 / 3002 loss=2.495, ppl=5.64, wps=5819.5, ups=0.09, wpb=64788, bsz=128, num_updates=15348, lr=9.98852e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=177243 2021-06-20 19:52:59 | INFO | train_inner | epoch 006: 429 / 3002 loss=2.547, ppl=5.84, wps=5809.5, ups=0.09, wpb=64811, bsz=128, num_updates=15349, lr=9.98852e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=177254 2021-06-20 19:53:10 | INFO | train_inner | epoch 006: 430 / 3002 loss=2.509, ppl=5.69, wps=5931.3, ups=0.09, wpb=64825, bsz=128, num_updates=15350, lr=9.98852e-05, gnorm=2.419, loss_scale=8, train_wall=10, gb_free=2.8, wall=177265 2021-06-20 19:53:21 | INFO | train_inner | epoch 006: 431 / 3002 loss=2.408, ppl=5.31, wps=5862.5, ups=0.09, wpb=64817, bsz=128, num_updates=15351, lr=9.98852e-05, gnorm=1.868, loss_scale=8, train_wall=11, gb_free=2.8, wall=177276 2021-06-20 19:53:33 | INFO | train_inner | epoch 006: 432 / 3002 loss=2.416, ppl=5.34, wps=5842.5, ups=0.09, wpb=64850, bsz=128, num_updates=15352, lr=9.98852e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=177287 2021-06-20 19:53:44 | INFO | train_inner | epoch 006: 433 / 3002 loss=2.45, ppl=5.46, wps=5680.4, ups=0.09, wpb=64855, bsz=128, num_updates=15353, lr=9.98852e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=177298 2021-06-20 19:53:55 | INFO | train_inner | epoch 006: 434 / 3002 loss=2.489, ppl=5.62, wps=5731.9, ups=0.09, wpb=64840, bsz=128, num_updates=15354, lr=9.98852e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=177310 2021-06-20 19:54:06 | INFO | train_inner | epoch 006: 435 / 3002 loss=2.362, ppl=5.14, wps=5827.5, ups=0.09, wpb=64801, bsz=128, num_updates=15355, lr=9.98852e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=177321 2021-06-20 19:54:17 | INFO | train_inner | epoch 006: 436 / 3002 loss=2.543, ppl=5.83, wps=5834.7, ups=0.09, wpb=64876, bsz=128, num_updates=15356, lr=9.98851e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=177332 2021-06-20 19:54:29 | INFO | train_inner | epoch 006: 437 / 3002 loss=2.406, ppl=5.3, wps=5880, ups=0.09, wpb=64924, bsz=128, num_updates=15357, lr=9.98851e-05, gnorm=2.261, loss_scale=8, train_wall=11, gb_free=2.8, wall=177343 2021-06-20 19:54:40 | INFO | train_inner | epoch 006: 438 / 3002 loss=2.438, ppl=5.42, wps=5882, ups=0.09, wpb=64855, bsz=128, num_updates=15358, lr=9.98851e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=177354 2021-06-20 19:54:51 | INFO | train_inner | epoch 006: 439 / 3002 loss=2.415, ppl=5.33, wps=5821.5, ups=0.09, wpb=64799, bsz=128, num_updates=15359, lr=9.98851e-05, gnorm=2.075, loss_scale=8, train_wall=11, gb_free=2.8, wall=177365 2021-06-20 19:55:02 | INFO | train_inner | epoch 006: 440 / 3002 loss=2.486, ppl=5.6, wps=5812.4, ups=0.09, wpb=64822, bsz=128, num_updates=15360, lr=9.98851e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=177376 2021-06-20 19:55:13 | INFO | train_inner | epoch 006: 441 / 3002 loss=2.378, ppl=5.2, wps=5858.1, ups=0.09, wpb=64800, bsz=128, num_updates=15361, lr=9.98851e-05, gnorm=1.902, loss_scale=8, train_wall=11, gb_free=2.8, wall=177387 2021-06-20 19:55:24 | INFO | train_inner | epoch 006: 442 / 3002 loss=2.524, ppl=5.75, wps=5808.5, ups=0.09, wpb=64796, bsz=128, num_updates=15362, lr=9.98851e-05, gnorm=1.859, loss_scale=8, train_wall=11, gb_free=2.8, wall=177398 2021-06-20 19:55:35 | INFO | train_inner | epoch 006: 443 / 3002 loss=2.586, ppl=6, wps=5826.6, ups=0.09, wpb=64819, bsz=128, num_updates=15363, lr=9.98851e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=177410 2021-06-20 19:55:46 | INFO | train_inner | epoch 006: 444 / 3002 loss=2.396, ppl=5.26, wps=5881.8, ups=0.09, wpb=64781, bsz=128, num_updates=15364, lr=9.98851e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=177421 2021-06-20 19:55:57 | INFO | train_inner | epoch 006: 445 / 3002 loss=2.464, ppl=5.52, wps=5880.4, ups=0.09, wpb=64821, bsz=128, num_updates=15365, lr=9.98851e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=177432 2021-06-20 19:56:08 | INFO | train_inner | epoch 006: 446 / 3002 loss=2.446, ppl=5.45, wps=5943.3, ups=0.09, wpb=64812, bsz=128, num_updates=15366, lr=9.98851e-05, gnorm=1.962, loss_scale=8, train_wall=10, gb_free=2.8, wall=177442 2021-06-20 19:56:19 | INFO | train_inner | epoch 006: 447 / 3002 loss=2.48, ppl=5.58, wps=5881.3, ups=0.09, wpb=64805, bsz=128, num_updates=15367, lr=9.98851e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=177453 2021-06-20 19:56:30 | INFO | train_inner | epoch 006: 448 / 3002 loss=2.529, ppl=5.77, wps=5798.5, ups=0.09, wpb=64850, bsz=128, num_updates=15368, lr=9.9885e-05, gnorm=1.868, loss_scale=8, train_wall=11, gb_free=2.8, wall=177465 2021-06-20 19:56:42 | INFO | train_inner | epoch 006: 449 / 3002 loss=2.533, ppl=5.79, wps=5754.4, ups=0.09, wpb=64797, bsz=128, num_updates=15369, lr=9.9885e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=177476 2021-06-20 19:56:53 | INFO | train_inner | epoch 006: 450 / 3002 loss=2.507, ppl=5.68, wps=5836.6, ups=0.09, wpb=64835, bsz=128, num_updates=15370, lr=9.9885e-05, gnorm=1.908, loss_scale=8, train_wall=11, gb_free=2.8, wall=177487 2021-06-20 19:57:04 | INFO | train_inner | epoch 006: 451 / 3002 loss=2.471, ppl=5.54, wps=5920.2, ups=0.09, wpb=64867, bsz=128, num_updates=15371, lr=9.9885e-05, gnorm=1.964, loss_scale=8, train_wall=10, gb_free=2.8, wall=177498 2021-06-20 19:57:15 | INFO | train_inner | epoch 006: 452 / 3002 loss=2.45, ppl=5.46, wps=5782.2, ups=0.09, wpb=64791, bsz=128, num_updates=15372, lr=9.9885e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=177509 2021-06-20 19:57:26 | INFO | train_inner | epoch 006: 453 / 3002 loss=2.462, ppl=5.51, wps=5862.6, ups=0.09, wpb=64848, bsz=128, num_updates=15373, lr=9.9885e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=177520 2021-06-20 19:57:37 | INFO | train_inner | epoch 006: 454 / 3002 loss=2.387, ppl=5.23, wps=5795.8, ups=0.09, wpb=64790, bsz=128, num_updates=15374, lr=9.9885e-05, gnorm=1.898, loss_scale=8, train_wall=11, gb_free=2.8, wall=177531 2021-06-20 19:57:48 | INFO | train_inner | epoch 006: 455 / 3002 loss=2.301, ppl=4.93, wps=5853, ups=0.09, wpb=64903, bsz=128, num_updates=15375, lr=9.9885e-05, gnorm=1.914, loss_scale=8, train_wall=11, gb_free=2.8, wall=177543 2021-06-20 19:57:59 | INFO | train_inner | epoch 006: 456 / 3002 loss=2.453, ppl=5.48, wps=5898.4, ups=0.09, wpb=64849, bsz=128, num_updates=15376, lr=9.9885e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=177554 2021-06-20 19:58:10 | INFO | train_inner | epoch 006: 457 / 3002 loss=2.581, ppl=5.98, wps=5886.6, ups=0.09, wpb=64808, bsz=128, num_updates=15377, lr=9.9885e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=177565 2021-06-20 19:58:21 | INFO | train_inner | epoch 006: 458 / 3002 loss=2.448, ppl=5.46, wps=5807.3, ups=0.09, wpb=64840, bsz=128, num_updates=15378, lr=9.9885e-05, gnorm=2.061, loss_scale=8, train_wall=11, gb_free=2.8, wall=177576 2021-06-20 19:58:32 | INFO | train_inner | epoch 006: 459 / 3002 loss=2.436, ppl=5.41, wps=5826.6, ups=0.09, wpb=64842, bsz=128, num_updates=15379, lr=9.9885e-05, gnorm=2.081, loss_scale=8, train_wall=11, gb_free=2.8, wall=177587 2021-06-20 19:58:43 | INFO | train_inner | epoch 006: 460 / 3002 loss=2.495, ppl=5.64, wps=6020.1, ups=0.09, wpb=64837, bsz=128, num_updates=15380, lr=9.9885e-05, gnorm=1.973, loss_scale=8, train_wall=10, gb_free=2.8, wall=177598 2021-06-20 19:58:54 | INFO | train_inner | epoch 006: 461 / 3002 loss=2.468, ppl=5.53, wps=5906.6, ups=0.09, wpb=64792, bsz=128, num_updates=15381, lr=9.98849e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=177609 2021-06-20 19:59:05 | INFO | train_inner | epoch 006: 462 / 3002 loss=2.493, ppl=5.63, wps=5842.5, ups=0.09, wpb=64809, bsz=128, num_updates=15382, lr=9.98849e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=177620 2021-06-20 19:59:16 | INFO | train_inner | epoch 006: 463 / 3002 loss=2.418, ppl=5.35, wps=5849.2, ups=0.09, wpb=64800, bsz=128, num_updates=15383, lr=9.98849e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=177631 2021-06-20 19:59:28 | INFO | train_inner | epoch 006: 464 / 3002 loss=2.626, ppl=6.17, wps=5759.8, ups=0.09, wpb=64778, bsz=128, num_updates=15384, lr=9.98849e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=177642 2021-06-20 19:59:39 | INFO | train_inner | epoch 006: 465 / 3002 loss=2.392, ppl=5.25, wps=5786.7, ups=0.09, wpb=64836, bsz=128, num_updates=15385, lr=9.98849e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=177653 2021-06-20 19:59:50 | INFO | train_inner | epoch 006: 466 / 3002 loss=2.536, ppl=5.8, wps=5746, ups=0.09, wpb=64780, bsz=128, num_updates=15386, lr=9.98849e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=177664 2021-06-20 20:00:01 | INFO | train_inner | epoch 006: 467 / 3002 loss=2.45, ppl=5.46, wps=5738.2, ups=0.09, wpb=64821, bsz=128, num_updates=15387, lr=9.98849e-05, gnorm=1.907, loss_scale=8, train_wall=11, gb_free=2.8, wall=177676 2021-06-20 20:00:12 | INFO | train_inner | epoch 006: 468 / 3002 loss=2.517, ppl=5.72, wps=5923.5, ups=0.09, wpb=64826, bsz=128, num_updates=15388, lr=9.98849e-05, gnorm=1.978, loss_scale=8, train_wall=10, gb_free=2.8, wall=177687 2021-06-20 20:00:23 | INFO | train_inner | epoch 006: 469 / 3002 loss=2.559, ppl=5.89, wps=5904.9, ups=0.09, wpb=64878, bsz=128, num_updates=15389, lr=9.98849e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=177698 2021-06-20 20:00:34 | INFO | train_inner | epoch 006: 470 / 3002 loss=2.27, ppl=4.82, wps=5880.9, ups=0.09, wpb=64868, bsz=128, num_updates=15390, lr=9.98849e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=177709 2021-06-20 20:00:45 | INFO | train_inner | epoch 006: 471 / 3002 loss=2.424, ppl=5.37, wps=5915.1, ups=0.09, wpb=64898, bsz=128, num_updates=15391, lr=9.98849e-05, gnorm=1.982, loss_scale=8, train_wall=10, gb_free=2.8, wall=177720 2021-06-20 20:00:56 | INFO | train_inner | epoch 006: 472 / 3002 loss=2.343, ppl=5.07, wps=5867.3, ups=0.09, wpb=64793, bsz=128, num_updates=15392, lr=9.98849e-05, gnorm=2.004, loss_scale=8, train_wall=11, gb_free=2.8, wall=177731 2021-06-20 20:01:08 | INFO | train_inner | epoch 006: 473 / 3002 loss=2.538, ppl=5.81, wps=5799.3, ups=0.09, wpb=64835, bsz=128, num_updates=15393, lr=9.98848e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=177742 2021-06-20 20:01:19 | INFO | train_inner | epoch 006: 474 / 3002 loss=2.543, ppl=5.83, wps=5799.5, ups=0.09, wpb=64815, bsz=128, num_updates=15394, lr=9.98848e-05, gnorm=1.896, loss_scale=8, train_wall=11, gb_free=2.8, wall=177753 2021-06-20 20:01:30 | INFO | train_inner | epoch 006: 475 / 3002 loss=2.364, ppl=5.15, wps=5830.7, ups=0.09, wpb=64865, bsz=128, num_updates=15395, lr=9.98848e-05, gnorm=1.886, loss_scale=8, train_wall=11, gb_free=2.8, wall=177764 2021-06-20 20:01:41 | INFO | train_inner | epoch 006: 476 / 3002 loss=2.453, ppl=5.48, wps=5855.9, ups=0.09, wpb=64876, bsz=128, num_updates=15396, lr=9.98848e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=177775 2021-06-20 20:01:52 | INFO | train_inner | epoch 006: 477 / 3002 loss=2.734, ppl=6.65, wps=5953.4, ups=0.09, wpb=64857, bsz=128, num_updates=15397, lr=9.98848e-05, gnorm=1.967, loss_scale=8, train_wall=10, gb_free=2.8, wall=177786 2021-06-20 20:02:03 | INFO | train_inner | epoch 006: 478 / 3002 loss=2.478, ppl=5.57, wps=5844.2, ups=0.09, wpb=64748, bsz=128, num_updates=15398, lr=9.98848e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=177797 2021-06-20 20:02:14 | INFO | train_inner | epoch 006: 479 / 3002 loss=2.563, ppl=5.91, wps=5773.5, ups=0.09, wpb=64775, bsz=128, num_updates=15399, lr=9.98848e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=177808 2021-06-20 20:02:25 | INFO | train_inner | epoch 006: 480 / 3002 loss=2.391, ppl=5.25, wps=5982.7, ups=0.09, wpb=64779, bsz=128, num_updates=15400, lr=9.98848e-05, gnorm=1.967, loss_scale=8, train_wall=10, gb_free=2.8, wall=177819 2021-06-20 20:02:36 | INFO | train_inner | epoch 006: 481 / 3002 loss=2.434, ppl=5.4, wps=6004.3, ups=0.09, wpb=64860, bsz=128, num_updates=15401, lr=9.98848e-05, gnorm=1.992, loss_scale=8, train_wall=10, gb_free=2.8, wall=177830 2021-06-20 20:02:47 | INFO | train_inner | epoch 006: 482 / 3002 loss=2.543, ppl=5.83, wps=5874.7, ups=0.09, wpb=64848, bsz=128, num_updates=15402, lr=9.98848e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=177841 2021-06-20 20:02:58 | INFO | train_inner | epoch 006: 483 / 3002 loss=2.382, ppl=5.21, wps=5987.2, ups=0.09, wpb=64833, bsz=128, num_updates=15403, lr=9.98848e-05, gnorm=1.959, loss_scale=8, train_wall=10, gb_free=2.8, wall=177852 2021-06-20 20:03:09 | INFO | train_inner | epoch 006: 484 / 3002 loss=2.504, ppl=5.67, wps=5955.8, ups=0.09, wpb=64957, bsz=128, num_updates=15404, lr=9.98848e-05, gnorm=1.976, loss_scale=8, train_wall=10, gb_free=2.8, wall=177863 2021-06-20 20:03:20 | INFO | train_inner | epoch 006: 485 / 3002 loss=2.416, ppl=5.34, wps=5807.4, ups=0.09, wpb=64839, bsz=128, num_updates=15405, lr=9.98848e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=177874 2021-06-20 20:03:31 | INFO | train_inner | epoch 006: 486 / 3002 loss=2.468, ppl=5.53, wps=5892.9, ups=0.09, wpb=64768, bsz=128, num_updates=15406, lr=9.98847e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=177885 2021-06-20 20:03:42 | INFO | train_inner | epoch 006: 487 / 3002 loss=2.39, ppl=5.24, wps=5908.8, ups=0.09, wpb=64847, bsz=128, num_updates=15407, lr=9.98847e-05, gnorm=1.835, loss_scale=8, train_wall=11, gb_free=2.8, wall=177896 2021-06-20 20:03:53 | INFO | train_inner | epoch 006: 488 / 3002 loss=2.42, ppl=5.35, wps=5832.4, ups=0.09, wpb=64821, bsz=128, num_updates=15408, lr=9.98847e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=177907 2021-06-20 20:04:04 | INFO | train_inner | epoch 006: 489 / 3002 loss=2.544, ppl=5.83, wps=5881.7, ups=0.09, wpb=64826, bsz=128, num_updates=15409, lr=9.98847e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=177918 2021-06-20 20:04:15 | INFO | train_inner | epoch 006: 490 / 3002 loss=2.591, ppl=6.02, wps=5733.5, ups=0.09, wpb=64776, bsz=128, num_updates=15410, lr=9.98847e-05, gnorm=1.902, loss_scale=8, train_wall=11, gb_free=2.8, wall=177929 2021-06-20 20:04:26 | INFO | train_inner | epoch 006: 491 / 3002 loss=2.354, ppl=5.11, wps=5910.6, ups=0.09, wpb=64862, bsz=128, num_updates=15411, lr=9.98847e-05, gnorm=1.878, loss_scale=8, train_wall=10, gb_free=2.8, wall=177940 2021-06-20 20:04:37 | INFO | train_inner | epoch 006: 492 / 3002 loss=2.375, ppl=5.19, wps=5931.7, ups=0.09, wpb=64869, bsz=128, num_updates=15412, lr=9.98847e-05, gnorm=1.928, loss_scale=8, train_wall=10, gb_free=2.8, wall=177951 2021-06-20 20:04:48 | INFO | train_inner | epoch 006: 493 / 3002 loss=2.394, ppl=5.26, wps=5858.1, ups=0.09, wpb=64793, bsz=128, num_updates=15413, lr=9.98847e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=177962 2021-06-20 20:04:59 | INFO | train_inner | epoch 006: 494 / 3002 loss=2.356, ppl=5.12, wps=5947, ups=0.09, wpb=64943, bsz=128, num_updates=15414, lr=9.98847e-05, gnorm=2.078, loss_scale=8, train_wall=10, gb_free=2.8, wall=177973 2021-06-20 20:05:10 | INFO | train_inner | epoch 006: 495 / 3002 loss=2.623, ppl=6.16, wps=5794.9, ups=0.09, wpb=64832, bsz=128, num_updates=15415, lr=9.98847e-05, gnorm=2.105, loss_scale=8, train_wall=11, gb_free=2.8, wall=177985 2021-06-20 20:05:21 | INFO | train_inner | epoch 006: 496 / 3002 loss=2.466, ppl=5.53, wps=5741.8, ups=0.09, wpb=64831, bsz=128, num_updates=15416, lr=9.98847e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=177996 2021-06-20 20:05:32 | INFO | train_inner | epoch 006: 497 / 3002 loss=2.519, ppl=5.73, wps=5887.5, ups=0.09, wpb=64819, bsz=128, num_updates=15417, lr=9.98847e-05, gnorm=2.008, loss_scale=8, train_wall=11, gb_free=2.8, wall=178007 2021-06-20 20:05:44 | INFO | train_inner | epoch 006: 498 / 3002 loss=2.361, ppl=5.14, wps=5853.9, ups=0.09, wpb=64885, bsz=128, num_updates=15418, lr=9.98846e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=178018 2021-06-20 20:05:55 | INFO | train_inner | epoch 006: 499 / 3002 loss=2.496, ppl=5.64, wps=5792.4, ups=0.09, wpb=64717, bsz=128, num_updates=15419, lr=9.98846e-05, gnorm=1.89, loss_scale=8, train_wall=11, gb_free=2.8, wall=178029 2021-06-20 20:06:06 | INFO | train_inner | epoch 006: 500 / 3002 loss=2.499, ppl=5.65, wps=5854.8, ups=0.09, wpb=64797, bsz=128, num_updates=15420, lr=9.98846e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=178040 2021-06-20 20:06:17 | INFO | train_inner | epoch 006: 501 / 3002 loss=2.429, ppl=5.39, wps=5793.2, ups=0.09, wpb=64831, bsz=128, num_updates=15421, lr=9.98846e-05, gnorm=1.845, loss_scale=8, train_wall=11, gb_free=2.8, wall=178051 2021-06-20 20:06:28 | INFO | train_inner | epoch 006: 502 / 3002 loss=2.553, ppl=5.87, wps=5879.1, ups=0.09, wpb=64884, bsz=128, num_updates=15422, lr=9.98846e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=178062 2021-06-20 20:06:39 | INFO | train_inner | epoch 006: 503 / 3002 loss=2.43, ppl=5.39, wps=5860.3, ups=0.09, wpb=64871, bsz=128, num_updates=15423, lr=9.98846e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=178073 2021-06-20 20:06:50 | INFO | train_inner | epoch 006: 504 / 3002 loss=2.504, ppl=5.67, wps=5962.4, ups=0.09, wpb=64782, bsz=128, num_updates=15424, lr=9.98846e-05, gnorm=1.915, loss_scale=16, train_wall=10, gb_free=2.8, wall=178084 2021-06-20 20:07:01 | INFO | train_inner | epoch 006: 505 / 3002 loss=2.469, ppl=5.54, wps=5821, ups=0.09, wpb=64817, bsz=128, num_updates=15425, lr=9.98846e-05, gnorm=2.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=178095 2021-06-20 20:07:12 | INFO | train_inner | epoch 006: 506 / 3002 loss=2.573, ppl=5.95, wps=5927.3, ups=0.09, wpb=64835, bsz=128, num_updates=15426, lr=9.98846e-05, gnorm=1.949, loss_scale=16, train_wall=10, gb_free=2.8, wall=178106 2021-06-20 20:07:23 | INFO | train_inner | epoch 006: 507 / 3002 loss=2.296, ppl=4.91, wps=5837.4, ups=0.09, wpb=64859, bsz=128, num_updates=15427, lr=9.98846e-05, gnorm=2.002, loss_scale=16, train_wall=11, gb_free=2.8, wall=178118 2021-06-20 20:07:34 | INFO | train_inner | epoch 006: 508 / 3002 loss=2.498, ppl=5.65, wps=5915.7, ups=0.09, wpb=64776, bsz=128, num_updates=15428, lr=9.98846e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=178128 2021-06-20 20:07:45 | INFO | train_inner | epoch 006: 509 / 3002 loss=2.556, ppl=5.88, wps=5885.5, ups=0.09, wpb=64908, bsz=128, num_updates=15429, lr=9.98846e-05, gnorm=1.919, loss_scale=16, train_wall=11, gb_free=2.8, wall=178139 2021-06-20 20:07:56 | INFO | train_inner | epoch 006: 510 / 3002 loss=2.411, ppl=5.32, wps=5853.6, ups=0.09, wpb=64747, bsz=128, num_updates=15430, lr=9.98846e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=178151 2021-06-20 20:08:07 | INFO | train_inner | epoch 006: 511 / 3002 loss=2.519, ppl=5.73, wps=5924.4, ups=0.09, wpb=64794, bsz=128, num_updates=15431, lr=9.98845e-05, gnorm=1.944, loss_scale=16, train_wall=10, gb_free=2.8, wall=178161 2021-06-20 20:08:18 | INFO | train_inner | epoch 006: 512 / 3002 loss=2.292, ppl=4.9, wps=5908.2, ups=0.09, wpb=64832, bsz=128, num_updates=15432, lr=9.98845e-05, gnorm=1.851, loss_scale=16, train_wall=11, gb_free=2.8, wall=178172 2021-06-20 20:08:29 | INFO | train_inner | epoch 006: 513 / 3002 loss=2.582, ppl=5.99, wps=5878.8, ups=0.09, wpb=64821, bsz=128, num_updates=15433, lr=9.98845e-05, gnorm=2.118, loss_scale=16, train_wall=11, gb_free=2.8, wall=178183 2021-06-20 20:08:40 | INFO | train_inner | epoch 006: 514 / 3002 loss=2.48, ppl=5.58, wps=5765.9, ups=0.09, wpb=64817, bsz=128, num_updates=15434, lr=9.98845e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=178195 2021-06-20 20:08:52 | INFO | train_inner | epoch 006: 515 / 3002 loss=2.518, ppl=5.73, wps=5786, ups=0.09, wpb=64841, bsz=128, num_updates=15435, lr=9.98845e-05, gnorm=1.978, loss_scale=16, train_wall=11, gb_free=2.8, wall=178206 2021-06-20 20:09:02 | INFO | train_inner | epoch 006: 516 / 3002 loss=2.454, ppl=5.48, wps=5947.6, ups=0.09, wpb=64784, bsz=128, num_updates=15436, lr=9.98845e-05, gnorm=1.887, loss_scale=16, train_wall=10, gb_free=2.8, wall=178217 2021-06-20 20:09:14 | INFO | train_inner | epoch 006: 517 / 3002 loss=2.375, ppl=5.19, wps=5861, ups=0.09, wpb=64819, bsz=128, num_updates=15437, lr=9.98845e-05, gnorm=1.851, loss_scale=16, train_wall=11, gb_free=2.8, wall=178228 2021-06-20 20:09:25 | INFO | train_inner | epoch 006: 518 / 3002 loss=2.402, ppl=5.29, wps=5890.7, ups=0.09, wpb=64824, bsz=128, num_updates=15438, lr=9.98845e-05, gnorm=1.892, loss_scale=16, train_wall=11, gb_free=2.8, wall=178239 2021-06-20 20:09:36 | INFO | train_inner | epoch 006: 519 / 3002 loss=2.549, ppl=5.85, wps=5845.5, ups=0.09, wpb=64835, bsz=128, num_updates=15439, lr=9.98845e-05, gnorm=1.994, loss_scale=16, train_wall=11, gb_free=2.8, wall=178250 2021-06-20 20:09:47 | INFO | train_inner | epoch 006: 520 / 3002 loss=2.509, ppl=5.69, wps=5944.3, ups=0.09, wpb=64802, bsz=128, num_updates=15440, lr=9.98845e-05, gnorm=1.871, loss_scale=16, train_wall=10, gb_free=2.8, wall=178261 2021-06-20 20:09:58 | INFO | train_inner | epoch 006: 521 / 3002 loss=2.576, ppl=5.96, wps=5831.8, ups=0.09, wpb=64839, bsz=128, num_updates=15441, lr=9.98845e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=178272 2021-06-20 20:10:09 | INFO | train_inner | epoch 006: 522 / 3002 loss=2.438, ppl=5.42, wps=5886.8, ups=0.09, wpb=64869, bsz=128, num_updates=15442, lr=9.98845e-05, gnorm=1.896, loss_scale=16, train_wall=11, gb_free=2.8, wall=178283 2021-06-20 20:10:20 | INFO | train_inner | epoch 006: 523 / 3002 loss=2.368, ppl=5.16, wps=5894, ups=0.09, wpb=64897, bsz=128, num_updates=15443, lr=9.98844e-05, gnorm=1.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=178294 2021-06-20 20:10:31 | INFO | train_inner | epoch 006: 524 / 3002 loss=2.541, ppl=5.82, wps=5882.1, ups=0.09, wpb=64915, bsz=128, num_updates=15444, lr=9.98844e-05, gnorm=1.908, loss_scale=16, train_wall=11, gb_free=2.8, wall=178305 2021-06-20 20:10:41 | INFO | train_inner | epoch 006: 525 / 3002 loss=2.502, ppl=5.66, wps=6086.9, ups=0.09, wpb=64855, bsz=128, num_updates=15445, lr=9.98844e-05, gnorm=1.926, loss_scale=16, train_wall=10, gb_free=2.8, wall=178316 2021-06-20 20:10:52 | INFO | train_inner | epoch 006: 526 / 3002 loss=2.448, ppl=5.45, wps=5915.9, ups=0.09, wpb=64764, bsz=128, num_updates=15446, lr=9.98844e-05, gnorm=1.983, loss_scale=16, train_wall=11, gb_free=2.8, wall=178327 2021-06-20 20:11:04 | INFO | train_inner | epoch 006: 527 / 3002 loss=2.411, ppl=5.32, wps=5698.6, ups=0.09, wpb=64788, bsz=128, num_updates=15447, lr=9.98844e-05, gnorm=2.018, loss_scale=16, train_wall=11, gb_free=2.8, wall=178338 2021-06-20 20:11:14 | INFO | train_inner | epoch 006: 528 / 3002 loss=2.427, ppl=5.38, wps=6032.4, ups=0.09, wpb=64858, bsz=128, num_updates=15448, lr=9.98844e-05, gnorm=2.01, loss_scale=16, train_wall=10, gb_free=2.8, wall=178349 2021-06-20 20:11:25 | INFO | train_inner | epoch 006: 529 / 3002 loss=2.508, ppl=5.69, wps=5927.7, ups=0.09, wpb=64895, bsz=128, num_updates=15449, lr=9.98844e-05, gnorm=1.969, loss_scale=16, train_wall=10, gb_free=2.8, wall=178360 2021-06-20 20:11:37 | INFO | train_inner | epoch 006: 530 / 3002 loss=2.502, ppl=5.67, wps=5810.3, ups=0.09, wpb=64868, bsz=128, num_updates=15450, lr=9.98844e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=178371 2021-06-20 20:11:48 | INFO | train_inner | epoch 006: 531 / 3002 loss=2.376, ppl=5.19, wps=5866.1, ups=0.09, wpb=64748, bsz=128, num_updates=15451, lr=9.98844e-05, gnorm=1.811, loss_scale=16, train_wall=11, gb_free=2.8, wall=178382 2021-06-20 20:11:59 | INFO | train_inner | epoch 006: 532 / 3002 loss=2.318, ppl=4.98, wps=5783.3, ups=0.09, wpb=64935, bsz=128, num_updates=15452, lr=9.98844e-05, gnorm=2.098, loss_scale=16, train_wall=11, gb_free=2.8, wall=178393 2021-06-20 20:12:10 | INFO | train_inner | epoch 006: 533 / 3002 loss=2.518, ppl=5.73, wps=5772.2, ups=0.09, wpb=64900, bsz=128, num_updates=15453, lr=9.98844e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=178404 2021-06-20 20:12:21 | INFO | train_inner | epoch 006: 534 / 3002 loss=2.423, ppl=5.36, wps=5777.7, ups=0.09, wpb=64822, bsz=128, num_updates=15454, lr=9.98844e-05, gnorm=1.905, loss_scale=16, train_wall=11, gb_free=2.8, wall=178416 2021-06-20 20:12:33 | INFO | train_inner | epoch 006: 535 / 3002 loss=2.537, ppl=5.8, wps=5735.9, ups=0.09, wpb=64801, bsz=128, num_updates=15455, lr=9.98844e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=178427 2021-06-20 20:12:44 | INFO | train_inner | epoch 006: 536 / 3002 loss=2.403, ppl=5.29, wps=5902.7, ups=0.09, wpb=64799, bsz=128, num_updates=15456, lr=9.98843e-05, gnorm=1.986, loss_scale=16, train_wall=10, gb_free=2.8, wall=178438 2021-06-20 20:12:55 | INFO | train_inner | epoch 006: 537 / 3002 loss=2.552, ppl=5.87, wps=5811.9, ups=0.09, wpb=64786, bsz=128, num_updates=15457, lr=9.98843e-05, gnorm=2.038, loss_scale=16, train_wall=11, gb_free=2.8, wall=178449 2021-06-20 20:13:06 | INFO | train_inner | epoch 006: 538 / 3002 loss=2.416, ppl=5.34, wps=5812.4, ups=0.09, wpb=64850, bsz=128, num_updates=15458, lr=9.98843e-05, gnorm=1.87, loss_scale=16, train_wall=11, gb_free=2.8, wall=178460 2021-06-20 20:13:17 | INFO | train_inner | epoch 006: 539 / 3002 loss=2.568, ppl=5.93, wps=5798.1, ups=0.09, wpb=64756, bsz=128, num_updates=15459, lr=9.98843e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=178471 2021-06-20 20:13:28 | INFO | train_inner | epoch 006: 540 / 3002 loss=2.477, ppl=5.57, wps=5807.6, ups=0.09, wpb=64776, bsz=128, num_updates=15460, lr=9.98843e-05, gnorm=1.993, loss_scale=16, train_wall=11, gb_free=2.8, wall=178483 2021-06-20 20:13:39 | INFO | train_inner | epoch 006: 541 / 3002 loss=2.527, ppl=5.76, wps=5822.1, ups=0.09, wpb=64831, bsz=128, num_updates=15461, lr=9.98843e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=178494 2021-06-20 20:13:51 | INFO | train_inner | epoch 006: 542 / 3002 loss=2.572, ppl=5.95, wps=5789.5, ups=0.09, wpb=64886, bsz=128, num_updates=15462, lr=9.98843e-05, gnorm=1.973, loss_scale=16, train_wall=11, gb_free=2.8, wall=178505 2021-06-20 20:14:02 | INFO | train_inner | epoch 006: 543 / 3002 loss=2.529, ppl=5.77, wps=5876.5, ups=0.09, wpb=64884, bsz=128, num_updates=15463, lr=9.98843e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=178516 2021-06-20 20:14:13 | INFO | train_inner | epoch 006: 544 / 3002 loss=2.64, ppl=6.23, wps=5863.3, ups=0.09, wpb=64814, bsz=128, num_updates=15464, lr=9.98843e-05, gnorm=1.988, loss_scale=16, train_wall=11, gb_free=2.8, wall=178527 2021-06-20 20:14:24 | INFO | train_inner | epoch 006: 545 / 3002 loss=2.409, ppl=5.31, wps=5863, ups=0.09, wpb=64799, bsz=128, num_updates=15465, lr=9.98843e-05, gnorm=1.945, loss_scale=16, train_wall=11, gb_free=2.8, wall=178538 2021-06-20 20:14:35 | INFO | train_inner | epoch 006: 546 / 3002 loss=2.621, ppl=6.15, wps=5810.9, ups=0.09, wpb=64895, bsz=128, num_updates=15466, lr=9.98843e-05, gnorm=2.021, loss_scale=16, train_wall=11, gb_free=2.8, wall=178549 2021-06-20 20:14:46 | INFO | train_inner | epoch 006: 547 / 3002 loss=2.534, ppl=5.79, wps=5928.7, ups=0.09, wpb=64834, bsz=128, num_updates=15467, lr=9.98843e-05, gnorm=2.271, loss_scale=16, train_wall=10, gb_free=2.8, wall=178560 2021-06-20 20:14:57 | INFO | train_inner | epoch 006: 548 / 3002 loss=2.516, ppl=5.72, wps=5798.6, ups=0.09, wpb=64828, bsz=128, num_updates=15468, lr=9.98842e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=178571 2021-06-20 20:15:08 | INFO | train_inner | epoch 006: 549 / 3002 loss=2.631, ppl=6.19, wps=5808.3, ups=0.09, wpb=64870, bsz=128, num_updates=15469, lr=9.98842e-05, gnorm=1.868, loss_scale=16, train_wall=11, gb_free=2.8, wall=178582 2021-06-20 20:15:19 | INFO | train_inner | epoch 006: 550 / 3002 loss=2.504, ppl=5.67, wps=5838.1, ups=0.09, wpb=64851, bsz=128, num_updates=15470, lr=9.98842e-05, gnorm=2.002, loss_scale=16, train_wall=11, gb_free=2.8, wall=178594 2021-06-20 20:15:30 | INFO | train_inner | epoch 006: 551 / 3002 loss=2.472, ppl=5.55, wps=5863.1, ups=0.09, wpb=64856, bsz=128, num_updates=15471, lr=9.98842e-05, gnorm=1.992, loss_scale=16, train_wall=11, gb_free=2.8, wall=178605 2021-06-20 20:15:41 | INFO | train_inner | epoch 006: 552 / 3002 loss=2.399, ppl=5.27, wps=5837.2, ups=0.09, wpb=64867, bsz=128, num_updates=15472, lr=9.98842e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=178616 2021-06-20 20:15:52 | INFO | train_inner | epoch 006: 553 / 3002 loss=2.328, ppl=5.02, wps=5893.5, ups=0.09, wpb=64908, bsz=128, num_updates=15473, lr=9.98842e-05, gnorm=1.879, loss_scale=16, train_wall=11, gb_free=2.8, wall=178627 2021-06-20 20:16:03 | INFO | train_inner | epoch 006: 554 / 3002 loss=2.319, ppl=4.99, wps=5867, ups=0.09, wpb=64833, bsz=128, num_updates=15474, lr=9.98842e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=178638 2021-06-20 20:16:15 | INFO | train_inner | epoch 006: 555 / 3002 loss=2.332, ppl=5.04, wps=5785.9, ups=0.09, wpb=64834, bsz=128, num_updates=15475, lr=9.98842e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=178649 2021-06-20 20:16:26 | INFO | train_inner | epoch 006: 556 / 3002 loss=2.678, ppl=6.4, wps=5816.9, ups=0.09, wpb=64854, bsz=128, num_updates=15476, lr=9.98842e-05, gnorm=2.01, loss_scale=16, train_wall=11, gb_free=2.8, wall=178660 2021-06-20 20:16:37 | INFO | train_inner | epoch 006: 557 / 3002 loss=2.401, ppl=5.28, wps=5812.4, ups=0.09, wpb=64713, bsz=128, num_updates=15477, lr=9.98842e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=178671 2021-06-20 20:16:48 | INFO | train_inner | epoch 006: 558 / 3002 loss=2.364, ppl=5.15, wps=5896, ups=0.09, wpb=64773, bsz=128, num_updates=15478, lr=9.98842e-05, gnorm=1.942, loss_scale=16, train_wall=11, gb_free=2.8, wall=178682 2021-06-20 20:16:59 | INFO | train_inner | epoch 006: 559 / 3002 loss=2.613, ppl=6.12, wps=5918, ups=0.09, wpb=64873, bsz=128, num_updates=15479, lr=9.98842e-05, gnorm=1.994, loss_scale=16, train_wall=11, gb_free=2.8, wall=178693 2021-06-20 20:17:10 | INFO | train_inner | epoch 006: 560 / 3002 loss=2.439, ppl=5.42, wps=5918, ups=0.09, wpb=64798, bsz=128, num_updates=15480, lr=9.98842e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=178704 2021-06-20 20:17:21 | INFO | train_inner | epoch 006: 561 / 3002 loss=2.394, ppl=5.26, wps=5931.7, ups=0.09, wpb=64846, bsz=128, num_updates=15481, lr=9.98841e-05, gnorm=1.902, loss_scale=16, train_wall=10, gb_free=2.8, wall=178715 2021-06-20 20:17:32 | INFO | train_inner | epoch 006: 562 / 3002 loss=2.462, ppl=5.51, wps=5818.6, ups=0.09, wpb=64909, bsz=128, num_updates=15482, lr=9.98841e-05, gnorm=1.971, loss_scale=16, train_wall=11, gb_free=2.8, wall=178726 2021-06-20 20:17:43 | INFO | train_inner | epoch 006: 563 / 3002 loss=2.388, ppl=5.23, wps=5760.5, ups=0.09, wpb=64923, bsz=128, num_updates=15483, lr=9.98841e-05, gnorm=1.961, loss_scale=16, train_wall=11, gb_free=2.8, wall=178738 2021-06-20 20:17:54 | INFO | train_inner | epoch 006: 564 / 3002 loss=2.4, ppl=5.28, wps=5761.9, ups=0.09, wpb=64743, bsz=128, num_updates=15484, lr=9.98841e-05, gnorm=1.939, loss_scale=16, train_wall=11, gb_free=2.8, wall=178749 2021-06-20 20:18:05 | INFO | train_inner | epoch 006: 565 / 3002 loss=2.374, ppl=5.18, wps=5880.7, ups=0.09, wpb=64764, bsz=128, num_updates=15485, lr=9.98841e-05, gnorm=2.058, loss_scale=16, train_wall=11, gb_free=2.8, wall=178760 2021-06-20 20:18:17 | INFO | train_inner | epoch 006: 566 / 3002 loss=2.55, ppl=5.86, wps=5753, ups=0.09, wpb=64834, bsz=128, num_updates=15486, lr=9.98841e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=178771 2021-06-20 20:18:28 | INFO | train_inner | epoch 006: 567 / 3002 loss=2.538, ppl=5.81, wps=5767.2, ups=0.09, wpb=64768, bsz=128, num_updates=15487, lr=9.98841e-05, gnorm=2.025, loss_scale=16, train_wall=11, gb_free=2.8, wall=178782 2021-06-20 20:18:39 | INFO | train_inner | epoch 006: 568 / 3002 loss=2.564, ppl=5.91, wps=5871.8, ups=0.09, wpb=64817, bsz=128, num_updates=15488, lr=9.98841e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=178793 2021-06-20 20:18:50 | INFO | train_inner | epoch 006: 569 / 3002 loss=2.567, ppl=5.92, wps=5865.4, ups=0.09, wpb=64819, bsz=128, num_updates=15489, lr=9.98841e-05, gnorm=1.942, loss_scale=16, train_wall=11, gb_free=2.8, wall=178804 2021-06-20 20:19:01 | INFO | train_inner | epoch 006: 570 / 3002 loss=2.291, ppl=4.89, wps=5895.9, ups=0.09, wpb=64893, bsz=128, num_updates=15490, lr=9.98841e-05, gnorm=1.88, loss_scale=16, train_wall=11, gb_free=2.8, wall=178815 2021-06-20 20:19:12 | INFO | train_inner | epoch 006: 571 / 3002 loss=2.432, ppl=5.4, wps=5840.9, ups=0.09, wpb=64815, bsz=128, num_updates=15491, lr=9.98841e-05, gnorm=1.879, loss_scale=16, train_wall=11, gb_free=2.8, wall=178827 2021-06-20 20:19:23 | INFO | train_inner | epoch 006: 572 / 3002 loss=2.528, ppl=5.77, wps=5861.1, ups=0.09, wpb=64774, bsz=128, num_updates=15492, lr=9.98841e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=178838 2021-06-20 20:19:34 | INFO | train_inner | epoch 006: 573 / 3002 loss=2.627, ppl=6.18, wps=5949.2, ups=0.09, wpb=64820, bsz=128, num_updates=15493, lr=9.9884e-05, gnorm=2.023, loss_scale=16, train_wall=10, gb_free=2.8, wall=178848 2021-06-20 20:19:45 | INFO | train_inner | epoch 006: 574 / 3002 loss=2.332, ppl=5.03, wps=5828.4, ups=0.09, wpb=64881, bsz=128, num_updates=15494, lr=9.9884e-05, gnorm=1.838, loss_scale=16, train_wall=11, gb_free=2.8, wall=178860 2021-06-20 20:19:56 | INFO | train_inner | epoch 006: 575 / 3002 loss=2.642, ppl=6.24, wps=5792.7, ups=0.09, wpb=64744, bsz=128, num_updates=15495, lr=9.9884e-05, gnorm=2.051, loss_scale=16, train_wall=11, gb_free=2.8, wall=178871 2021-06-20 20:20:07 | INFO | train_inner | epoch 006: 576 / 3002 loss=2.388, ppl=5.23, wps=5881.5, ups=0.09, wpb=64795, bsz=128, num_updates=15496, lr=9.9884e-05, gnorm=1.978, loss_scale=16, train_wall=11, gb_free=2.8, wall=178882 2021-06-20 20:20:19 | INFO | train_inner | epoch 006: 577 / 3002 loss=2.442, ppl=5.44, wps=5803.5, ups=0.09, wpb=64840, bsz=128, num_updates=15497, lr=9.9884e-05, gnorm=2.012, loss_scale=16, train_wall=11, gb_free=2.8, wall=178893 2021-06-20 20:20:30 | INFO | train_inner | epoch 006: 578 / 3002 loss=2.563, ppl=5.91, wps=5849.9, ups=0.09, wpb=64882, bsz=128, num_updates=15498, lr=9.9884e-05, gnorm=2.032, loss_scale=16, train_wall=11, gb_free=2.8, wall=178904 2021-06-20 20:20:41 | INFO | train_inner | epoch 006: 579 / 3002 loss=2.455, ppl=5.48, wps=5811.7, ups=0.09, wpb=64799, bsz=128, num_updates=15499, lr=9.9884e-05, gnorm=2.038, loss_scale=16, train_wall=11, gb_free=2.8, wall=178915 2021-06-20 20:20:52 | INFO | train_inner | epoch 006: 580 / 3002 loss=2.346, ppl=5.08, wps=5719.4, ups=0.09, wpb=64800, bsz=128, num_updates=15500, lr=9.9884e-05, gnorm=2.018, loss_scale=16, train_wall=11, gb_free=2.8, wall=178927 2021-06-20 20:21:03 | INFO | train_inner | epoch 006: 581 / 3002 loss=2.501, ppl=5.66, wps=5801.5, ups=0.09, wpb=64779, bsz=128, num_updates=15501, lr=9.9884e-05, gnorm=1.86, loss_scale=16, train_wall=11, gb_free=2.8, wall=178938 2021-06-20 20:21:15 | INFO | train_inner | epoch 006: 582 / 3002 loss=2.436, ppl=5.41, wps=5752.4, ups=0.09, wpb=64851, bsz=128, num_updates=15502, lr=9.9884e-05, gnorm=1.874, loss_scale=16, train_wall=11, gb_free=2.8, wall=178949 2021-06-20 20:21:26 | INFO | train_inner | epoch 006: 583 / 3002 loss=2.481, ppl=5.58, wps=5806.9, ups=0.09, wpb=64826, bsz=128, num_updates=15503, lr=9.9884e-05, gnorm=1.881, loss_scale=16, train_wall=11, gb_free=2.8, wall=178960 2021-06-20 20:21:37 | INFO | train_inner | epoch 006: 584 / 3002 loss=2.386, ppl=5.23, wps=5943.7, ups=0.09, wpb=64814, bsz=128, num_updates=15504, lr=9.9884e-05, gnorm=2.017, loss_scale=16, train_wall=10, gb_free=2.8, wall=178971 2021-06-20 20:21:48 | INFO | train_inner | epoch 006: 585 / 3002 loss=2.434, ppl=5.4, wps=5749.4, ups=0.09, wpb=64828, bsz=128, num_updates=15505, lr=9.9884e-05, gnorm=1.859, loss_scale=16, train_wall=11, gb_free=2.8, wall=178982 2021-06-20 20:21:59 | INFO | train_inner | epoch 006: 586 / 3002 loss=2.518, ppl=5.73, wps=5791.4, ups=0.09, wpb=64737, bsz=128, num_updates=15506, lr=9.98839e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=178994 2021-06-20 20:22:10 | INFO | train_inner | epoch 006: 587 / 3002 loss=2.301, ppl=4.93, wps=5916.2, ups=0.09, wpb=64845, bsz=128, num_updates=15507, lr=9.98839e-05, gnorm=2.037, loss_scale=16, train_wall=11, gb_free=2.8, wall=179004 2021-06-20 20:22:21 | INFO | train_inner | epoch 006: 588 / 3002 loss=2.412, ppl=5.32, wps=5815.3, ups=0.09, wpb=64770, bsz=128, num_updates=15508, lr=9.98839e-05, gnorm=2.017, loss_scale=16, train_wall=11, gb_free=2.8, wall=179016 2021-06-20 20:22:32 | INFO | train_inner | epoch 006: 589 / 3002 loss=2.514, ppl=5.71, wps=5786.4, ups=0.09, wpb=64768, bsz=128, num_updates=15509, lr=9.98839e-05, gnorm=2.08, loss_scale=16, train_wall=11, gb_free=2.8, wall=179027 2021-06-20 20:22:43 | INFO | train_inner | epoch 006: 590 / 3002 loss=2.448, ppl=5.46, wps=5983.7, ups=0.09, wpb=64905, bsz=128, num_updates=15510, lr=9.98839e-05, gnorm=2.041, loss_scale=16, train_wall=10, gb_free=2.8, wall=179038 2021-06-20 20:22:54 | INFO | train_inner | epoch 006: 591 / 3002 loss=2.589, ppl=6.02, wps=5807.3, ups=0.09, wpb=64755, bsz=128, num_updates=15511, lr=9.98839e-05, gnorm=2.007, loss_scale=16, train_wall=11, gb_free=2.8, wall=179049 2021-06-20 20:23:05 | INFO | train_inner | epoch 006: 592 / 3002 loss=2.379, ppl=5.2, wps=5925.8, ups=0.09, wpb=64826, bsz=128, num_updates=15512, lr=9.98839e-05, gnorm=1.942, loss_scale=16, train_wall=10, gb_free=2.8, wall=179060 2021-06-20 20:23:16 | INFO | train_inner | epoch 006: 593 / 3002 loss=2.41, ppl=5.32, wps=5853.2, ups=0.09, wpb=64789, bsz=128, num_updates=15513, lr=9.98839e-05, gnorm=1.847, loss_scale=16, train_wall=11, gb_free=2.8, wall=179071 2021-06-20 20:23:28 | INFO | train_inner | epoch 006: 594 / 3002 loss=2.481, ppl=5.58, wps=5830.7, ups=0.09, wpb=64809, bsz=128, num_updates=15514, lr=9.98839e-05, gnorm=2.236, loss_scale=16, train_wall=11, gb_free=2.8, wall=179082 2021-06-20 20:23:39 | INFO | train_inner | epoch 006: 595 / 3002 loss=2.526, ppl=5.76, wps=5826.5, ups=0.09, wpb=64829, bsz=128, num_updates=15515, lr=9.98839e-05, gnorm=2.011, loss_scale=16, train_wall=11, gb_free=2.8, wall=179093 2021-06-20 20:23:50 | INFO | train_inner | epoch 006: 596 / 3002 loss=2.383, ppl=5.22, wps=5973.7, ups=0.09, wpb=64883, bsz=128, num_updates=15516, lr=9.98839e-05, gnorm=1.905, loss_scale=16, train_wall=10, gb_free=2.8, wall=179104 2021-06-20 20:24:01 | INFO | train_inner | epoch 006: 597 / 3002 loss=2.484, ppl=5.59, wps=5872, ups=0.09, wpb=64883, bsz=128, num_updates=15517, lr=9.98839e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=179115 2021-06-20 20:24:12 | INFO | train_inner | epoch 006: 598 / 3002 loss=2.49, ppl=5.62, wps=5793.2, ups=0.09, wpb=64824, bsz=128, num_updates=15518, lr=9.98838e-05, gnorm=1.859, loss_scale=16, train_wall=11, gb_free=2.8, wall=179126 2021-06-20 20:24:23 | INFO | train_inner | epoch 006: 599 / 3002 loss=2.438, ppl=5.42, wps=5869.3, ups=0.09, wpb=64791, bsz=128, num_updates=15519, lr=9.98838e-05, gnorm=1.996, loss_scale=16, train_wall=11, gb_free=2.8, wall=179137 2021-06-20 20:24:34 | INFO | train_inner | epoch 006: 600 / 3002 loss=2.552, ppl=5.87, wps=5903.3, ups=0.09, wpb=64781, bsz=128, num_updates=15520, lr=9.98838e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=179148 2021-06-20 20:24:45 | INFO | train_inner | epoch 006: 601 / 3002 loss=2.413, ppl=5.33, wps=5781.3, ups=0.09, wpb=64768, bsz=128, num_updates=15521, lr=9.98838e-05, gnorm=1.953, loss_scale=16, train_wall=11, gb_free=2.8, wall=179159 2021-06-20 20:24:56 | INFO | train_inner | epoch 006: 602 / 3002 loss=2.469, ppl=5.54, wps=5858.5, ups=0.09, wpb=64872, bsz=128, num_updates=15522, lr=9.98838e-05, gnorm=1.889, loss_scale=16, train_wall=11, gb_free=2.8, wall=179170 2021-06-20 20:25:07 | INFO | train_inner | epoch 006: 603 / 3002 loss=2.526, ppl=5.76, wps=5807, ups=0.09, wpb=64742, bsz=128, num_updates=15523, lr=9.98838e-05, gnorm=3.059, loss_scale=16, train_wall=11, gb_free=2.8, wall=179182 2021-06-20 20:25:18 | INFO | train_inner | epoch 006: 604 / 3002 loss=2.564, ppl=5.91, wps=5809.2, ups=0.09, wpb=64816, bsz=128, num_updates=15524, lr=9.98838e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=179193 2021-06-20 20:25:30 | INFO | train_inner | epoch 006: 605 / 3002 loss=2.558, ppl=5.89, wps=5836.8, ups=0.09, wpb=64840, bsz=128, num_updates=15525, lr=9.98838e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=179204 2021-06-20 20:25:41 | INFO | train_inner | epoch 006: 606 / 3002 loss=2.605, ppl=6.08, wps=5795.6, ups=0.09, wpb=64878, bsz=128, num_updates=15526, lr=9.98838e-05, gnorm=1.953, loss_scale=16, train_wall=11, gb_free=2.8, wall=179215 2021-06-20 20:25:52 | INFO | train_inner | epoch 006: 607 / 3002 loss=2.345, ppl=5.08, wps=5736.3, ups=0.09, wpb=64837, bsz=128, num_updates=15527, lr=9.98838e-05, gnorm=1.948, loss_scale=16, train_wall=11, gb_free=2.8, wall=179226 2021-06-20 20:26:03 | INFO | train_inner | epoch 006: 608 / 3002 loss=2.397, ppl=5.27, wps=5846.7, ups=0.09, wpb=64826, bsz=128, num_updates=15528, lr=9.98838e-05, gnorm=1.934, loss_scale=16, train_wall=11, gb_free=2.8, wall=179237 2021-06-20 20:26:14 | INFO | train_inner | epoch 006: 609 / 3002 loss=2.47, ppl=5.54, wps=5900.4, ups=0.09, wpb=64855, bsz=128, num_updates=15529, lr=9.98838e-05, gnorm=1.883, loss_scale=16, train_wall=11, gb_free=2.8, wall=179248 2021-06-20 20:26:25 | INFO | train_inner | epoch 006: 610 / 3002 loss=2.449, ppl=5.46, wps=5805.2, ups=0.09, wpb=64815, bsz=128, num_updates=15530, lr=9.98838e-05, gnorm=2.062, loss_scale=16, train_wall=11, gb_free=2.8, wall=179260 2021-06-20 20:26:36 | INFO | train_inner | epoch 006: 611 / 3002 loss=2.409, ppl=5.31, wps=5833.3, ups=0.09, wpb=64884, bsz=128, num_updates=15531, lr=9.98837e-05, gnorm=1.875, loss_scale=16, train_wall=11, gb_free=2.8, wall=179271 2021-06-20 20:26:48 | INFO | train_inner | epoch 006: 612 / 3002 loss=2.454, ppl=5.48, wps=5806.8, ups=0.09, wpb=64964, bsz=128, num_updates=15532, lr=9.98837e-05, gnorm=1.913, loss_scale=16, train_wall=11, gb_free=2.8, wall=179282 2021-06-20 20:26:59 | INFO | train_inner | epoch 006: 613 / 3002 loss=2.383, ppl=5.22, wps=5842.1, ups=0.09, wpb=64788, bsz=128, num_updates=15533, lr=9.98837e-05, gnorm=1.831, loss_scale=16, train_wall=11, gb_free=2.8, wall=179293 2021-06-20 20:27:10 | INFO | train_inner | epoch 006: 614 / 3002 loss=2.356, ppl=5.12, wps=5712.6, ups=0.09, wpb=64773, bsz=128, num_updates=15534, lr=9.98837e-05, gnorm=2.068, loss_scale=16, train_wall=11, gb_free=2.8, wall=179304 2021-06-20 20:27:21 | INFO | train_inner | epoch 006: 615 / 3002 loss=2.696, ppl=6.48, wps=5856.5, ups=0.09, wpb=64809, bsz=128, num_updates=15535, lr=9.98837e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=179315 2021-06-20 20:27:32 | INFO | train_inner | epoch 006: 616 / 3002 loss=2.427, ppl=5.38, wps=5832.4, ups=0.09, wpb=64807, bsz=128, num_updates=15536, lr=9.98837e-05, gnorm=2.134, loss_scale=16, train_wall=11, gb_free=2.8, wall=179327 2021-06-20 20:27:43 | INFO | train_inner | epoch 006: 617 / 3002 loss=2.432, ppl=5.4, wps=6014.7, ups=0.09, wpb=64840, bsz=128, num_updates=15537, lr=9.98837e-05, gnorm=2.045, loss_scale=16, train_wall=10, gb_free=2.8, wall=179337 2021-06-20 20:27:54 | INFO | train_inner | epoch 006: 618 / 3002 loss=2.344, ppl=5.08, wps=5862.7, ups=0.09, wpb=64873, bsz=128, num_updates=15538, lr=9.98837e-05, gnorm=1.876, loss_scale=16, train_wall=11, gb_free=2.8, wall=179348 2021-06-20 20:28:05 | INFO | train_inner | epoch 006: 619 / 3002 loss=2.444, ppl=5.44, wps=5873.3, ups=0.09, wpb=64874, bsz=128, num_updates=15539, lr=9.98837e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=179359 2021-06-20 20:28:16 | INFO | train_inner | epoch 006: 620 / 3002 loss=2.52, ppl=5.74, wps=5856.4, ups=0.09, wpb=64850, bsz=128, num_updates=15540, lr=9.98837e-05, gnorm=1.856, loss_scale=16, train_wall=11, gb_free=2.8, wall=179370 2021-06-20 20:28:27 | INFO | train_inner | epoch 006: 621 / 3002 loss=2.619, ppl=6.14, wps=5875.2, ups=0.09, wpb=64768, bsz=128, num_updates=15541, lr=9.98837e-05, gnorm=1.867, loss_scale=16, train_wall=11, gb_free=2.8, wall=179382 2021-06-20 20:28:38 | INFO | train_inner | epoch 006: 622 / 3002 loss=2.422, ppl=5.36, wps=5827, ups=0.09, wpb=64813, bsz=128, num_updates=15542, lr=9.98837e-05, gnorm=1.844, loss_scale=16, train_wall=11, gb_free=2.8, wall=179393 2021-06-20 20:28:49 | INFO | train_inner | epoch 006: 623 / 3002 loss=2.447, ppl=5.45, wps=5876.1, ups=0.09, wpb=64835, bsz=128, num_updates=15543, lr=9.98836e-05, gnorm=1.902, loss_scale=16, train_wall=11, gb_free=2.8, wall=179404 2021-06-20 20:29:00 | INFO | train_inner | epoch 006: 624 / 3002 loss=2.592, ppl=6.03, wps=5838.2, ups=0.09, wpb=64821, bsz=128, num_updates=15544, lr=9.98836e-05, gnorm=1.875, loss_scale=16, train_wall=11, gb_free=2.8, wall=179415 2021-06-20 20:29:11 | INFO | train_inner | epoch 006: 625 / 3002 loss=2.439, ppl=5.42, wps=5914.1, ups=0.09, wpb=64868, bsz=128, num_updates=15545, lr=9.98836e-05, gnorm=1.92, loss_scale=16, train_wall=10, gb_free=2.8, wall=179426 2021-06-20 20:29:22 | INFO | train_inner | epoch 006: 626 / 3002 loss=2.524, ppl=5.75, wps=5934.9, ups=0.09, wpb=64845, bsz=128, num_updates=15546, lr=9.98836e-05, gnorm=1.949, loss_scale=16, train_wall=10, gb_free=2.8, wall=179437 2021-06-20 20:29:33 | INFO | train_inner | epoch 006: 627 / 3002 loss=2.389, ppl=5.24, wps=5813.5, ups=0.09, wpb=64835, bsz=128, num_updates=15547, lr=9.98836e-05, gnorm=1.906, loss_scale=16, train_wall=11, gb_free=2.8, wall=179448 2021-06-20 20:29:44 | INFO | train_inner | epoch 006: 628 / 3002 loss=2.571, ppl=5.94, wps=5885.4, ups=0.09, wpb=64800, bsz=128, num_updates=15548, lr=9.98836e-05, gnorm=2.006, loss_scale=16, train_wall=11, gb_free=2.8, wall=179459 2021-06-20 20:29:56 | INFO | train_inner | epoch 006: 629 / 3002 loss=2.406, ppl=5.3, wps=5797.2, ups=0.09, wpb=64817, bsz=128, num_updates=15549, lr=9.98836e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=179470 2021-06-20 20:30:07 | INFO | train_inner | epoch 006: 630 / 3002 loss=2.386, ppl=5.23, wps=5821.9, ups=0.09, wpb=64863, bsz=128, num_updates=15550, lr=9.98836e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=179481 2021-06-20 20:30:18 | INFO | train_inner | epoch 006: 631 / 3002 loss=2.336, ppl=5.05, wps=5939.3, ups=0.09, wpb=64864, bsz=128, num_updates=15551, lr=9.98836e-05, gnorm=1.874, loss_scale=32, train_wall=10, gb_free=2.8, wall=179492 2021-06-20 20:30:29 | INFO | train_inner | epoch 006: 632 / 3002 loss=2.489, ppl=5.61, wps=5885.9, ups=0.09, wpb=64821, bsz=128, num_updates=15552, lr=9.98836e-05, gnorm=1.974, loss_scale=32, train_wall=11, gb_free=2.8, wall=179503 2021-06-20 20:30:40 | INFO | train_inner | epoch 006: 633 / 3002 loss=2.493, ppl=5.63, wps=5895.7, ups=0.09, wpb=64777, bsz=128, num_updates=15553, lr=9.98836e-05, gnorm=1.96, loss_scale=32, train_wall=11, gb_free=2.8, wall=179514 2021-06-20 20:30:51 | INFO | train_inner | epoch 006: 634 / 3002 loss=2.43, ppl=5.39, wps=5894.7, ups=0.09, wpb=64780, bsz=128, num_updates=15554, lr=9.98836e-05, gnorm=1.926, loss_scale=32, train_wall=10, gb_free=2.8, wall=179525 2021-06-20 20:31:02 | INFO | train_inner | epoch 006: 635 / 3002 loss=2.489, ppl=5.61, wps=5940.4, ups=0.09, wpb=64776, bsz=128, num_updates=15555, lr=9.98836e-05, gnorm=1.968, loss_scale=32, train_wall=10, gb_free=2.8, wall=179536 2021-06-20 20:31:13 | INFO | train_inner | epoch 006: 636 / 3002 loss=2.381, ppl=5.21, wps=5841.7, ups=0.09, wpb=64835, bsz=128, num_updates=15556, lr=9.98835e-05, gnorm=1.918, loss_scale=32, train_wall=11, gb_free=2.8, wall=179547 2021-06-20 20:31:24 | INFO | train_inner | epoch 006: 637 / 3002 loss=2.587, ppl=6.01, wps=5874.1, ups=0.09, wpb=64829, bsz=128, num_updates=15557, lr=9.98835e-05, gnorm=1.904, loss_scale=32, train_wall=11, gb_free=2.8, wall=179558 2021-06-20 20:31:35 | INFO | train_inner | epoch 006: 638 / 3002 loss=2.517, ppl=5.72, wps=5815.7, ups=0.09, wpb=64871, bsz=128, num_updates=15558, lr=9.98835e-05, gnorm=1.972, loss_scale=32, train_wall=11, gb_free=2.8, wall=179569 2021-06-20 20:31:46 | INFO | train_inner | epoch 006: 639 / 3002 loss=2.439, ppl=5.42, wps=5854, ups=0.09, wpb=64872, bsz=128, num_updates=15559, lr=9.98835e-05, gnorm=1.95, loss_scale=32, train_wall=11, gb_free=2.8, wall=179580 2021-06-20 20:31:57 | INFO | train_inner | epoch 006: 640 / 3002 loss=2.56, ppl=5.9, wps=5846.1, ups=0.09, wpb=64813, bsz=128, num_updates=15560, lr=9.98835e-05, gnorm=1.917, loss_scale=32, train_wall=11, gb_free=2.8, wall=179591 2021-06-20 20:32:08 | INFO | train_inner | epoch 006: 641 / 3002 loss=2.579, ppl=5.98, wps=5898.6, ups=0.09, wpb=64777, bsz=128, num_updates=15561, lr=9.98835e-05, gnorm=1.869, loss_scale=32, train_wall=11, gb_free=2.8, wall=179602 2021-06-20 20:32:19 | INFO | train_inner | epoch 006: 642 / 3002 loss=2.441, ppl=5.43, wps=5909.4, ups=0.09, wpb=64889, bsz=128, num_updates=15562, lr=9.98835e-05, gnorm=1.849, loss_scale=32, train_wall=11, gb_free=2.8, wall=179613 2021-06-20 20:32:30 | INFO | train_inner | epoch 006: 643 / 3002 loss=2.511, ppl=5.7, wps=5779, ups=0.09, wpb=64836, bsz=128, num_updates=15563, lr=9.98835e-05, gnorm=1.941, loss_scale=32, train_wall=11, gb_free=2.8, wall=179625 2021-06-20 20:32:41 | INFO | train_inner | epoch 006: 644 / 3002 loss=2.461, ppl=5.5, wps=5805.4, ups=0.09, wpb=64884, bsz=128, num_updates=15564, lr=9.98835e-05, gnorm=1.999, loss_scale=32, train_wall=11, gb_free=2.8, wall=179636 2021-06-20 20:32:53 | INFO | train_inner | epoch 006: 645 / 3002 loss=2.403, ppl=5.29, wps=5850.2, ups=0.09, wpb=64840, bsz=128, num_updates=15565, lr=9.98835e-05, gnorm=1.975, loss_scale=32, train_wall=11, gb_free=2.8, wall=179647 2021-06-20 20:33:03 | INFO | train_inner | epoch 006: 646 / 3002 loss=2.451, ppl=5.47, wps=5981.8, ups=0.09, wpb=64955, bsz=128, num_updates=15566, lr=9.98835e-05, gnorm=1.953, loss_scale=32, train_wall=10, gb_free=2.8, wall=179658 2021-06-20 20:33:14 | INFO | train_inner | epoch 006: 647 / 3002 loss=2.537, ppl=5.8, wps=6009.9, ups=0.09, wpb=64827, bsz=128, num_updates=15567, lr=9.98835e-05, gnorm=1.963, loss_scale=32, train_wall=10, gb_free=2.8, wall=179669 2021-06-20 20:33:25 | INFO | train_inner | epoch 006: 648 / 3002 loss=2.577, ppl=5.97, wps=5789.9, ups=0.09, wpb=64844, bsz=128, num_updates=15568, lr=9.98834e-05, gnorm=1.962, loss_scale=32, train_wall=11, gb_free=2.8, wall=179680 2021-06-20 20:33:36 | INFO | train_inner | epoch 006: 649 / 3002 loss=2.485, ppl=5.6, wps=5867.7, ups=0.09, wpb=64802, bsz=128, num_updates=15569, lr=9.98834e-05, gnorm=1.994, loss_scale=32, train_wall=11, gb_free=2.8, wall=179691 2021-06-20 20:33:48 | INFO | train_inner | epoch 006: 650 / 3002 loss=2.577, ppl=5.97, wps=5766.6, ups=0.09, wpb=64794, bsz=128, num_updates=15570, lr=9.98834e-05, gnorm=1.909, loss_scale=32, train_wall=11, gb_free=2.8, wall=179702 2021-06-20 20:33:59 | INFO | train_inner | epoch 006: 651 / 3002 loss=2.5, ppl=5.66, wps=5777, ups=0.09, wpb=64874, bsz=128, num_updates=15571, lr=9.98834e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=179713 2021-06-20 20:34:10 | INFO | train_inner | epoch 006: 652 / 3002 loss=2.582, ppl=5.99, wps=5898.7, ups=0.09, wpb=64820, bsz=128, num_updates=15572, lr=9.98834e-05, gnorm=2.03, loss_scale=32, train_wall=11, gb_free=2.8, wall=179724 2021-06-20 20:34:21 | INFO | train_inner | epoch 006: 653 / 3002 loss=2.553, ppl=5.87, wps=5835.5, ups=0.09, wpb=64788, bsz=128, num_updates=15573, lr=9.98834e-05, gnorm=1.898, loss_scale=32, train_wall=11, gb_free=2.8, wall=179735 2021-06-20 20:34:32 | INFO | train_inner | epoch 006: 654 / 3002 loss=2.53, ppl=5.78, wps=5901.5, ups=0.09, wpb=64809, bsz=128, num_updates=15574, lr=9.98834e-05, gnorm=1.917, loss_scale=32, train_wall=11, gb_free=2.8, wall=179746 2021-06-20 20:34:43 | INFO | train_inner | epoch 006: 655 / 3002 loss=2.523, ppl=5.75, wps=5856.4, ups=0.09, wpb=64727, bsz=128, num_updates=15575, lr=9.98834e-05, gnorm=1.912, loss_scale=32, train_wall=11, gb_free=2.8, wall=179757 2021-06-20 20:34:54 | INFO | train_inner | epoch 006: 656 / 3002 loss=2.293, ppl=4.9, wps=5742.2, ups=0.09, wpb=64806, bsz=128, num_updates=15576, lr=9.98834e-05, gnorm=1.874, loss_scale=32, train_wall=11, gb_free=2.8, wall=179769 2021-06-20 20:35:05 | INFO | train_inner | epoch 006: 657 / 3002 loss=2.464, ppl=5.52, wps=5829.7, ups=0.09, wpb=64837, bsz=128, num_updates=15577, lr=9.98834e-05, gnorm=1.95, loss_scale=32, train_wall=11, gb_free=2.8, wall=179780 2021-06-20 20:35:16 | INFO | train_inner | epoch 006: 658 / 3002 loss=2.462, ppl=5.51, wps=5894.7, ups=0.09, wpb=64846, bsz=128, num_updates=15578, lr=9.98834e-05, gnorm=1.933, loss_scale=32, train_wall=11, gb_free=2.8, wall=179791 2021-06-20 20:35:28 | INFO | train_inner | epoch 006: 659 / 3002 loss=2.601, ppl=6.07, wps=5805.4, ups=0.09, wpb=64669, bsz=128, num_updates=15579, lr=9.98834e-05, gnorm=1.908, loss_scale=32, train_wall=11, gb_free=2.8, wall=179802 2021-06-20 20:35:39 | INFO | train_inner | epoch 006: 660 / 3002 loss=2.613, ppl=6.12, wps=5809.8, ups=0.09, wpb=64781, bsz=128, num_updates=15580, lr=9.98834e-05, gnorm=1.868, loss_scale=32, train_wall=11, gb_free=2.8, wall=179813 2021-06-20 20:35:50 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 2021-06-20 20:36:01 | INFO | train_inner | epoch 006: 662 / 3002 loss=2.477, ppl=5.57, wps=2920.4, ups=0.05, wpb=64779, bsz=128, num_updates=15581, lr=9.98833e-05, gnorm=1.921, loss_scale=16, train_wall=21, gb_free=2.8, wall=179835 2021-06-20 20:36:12 | INFO | train_inner | epoch 006: 663 / 3002 loss=2.409, ppl=5.31, wps=5821.3, ups=0.09, wpb=64829, bsz=128, num_updates=15582, lr=9.98833e-05, gnorm=2.042, loss_scale=16, train_wall=11, gb_free=2.8, wall=179846 2021-06-20 20:36:23 | INFO | train_inner | epoch 006: 664 / 3002 loss=2.508, ppl=5.69, wps=5806.5, ups=0.09, wpb=64840, bsz=128, num_updates=15583, lr=9.98833e-05, gnorm=1.983, loss_scale=16, train_wall=11, gb_free=2.8, wall=179858 2021-06-20 20:36:34 | INFO | train_inner | epoch 006: 665 / 3002 loss=2.482, ppl=5.59, wps=5940, ups=0.09, wpb=64849, bsz=128, num_updates=15584, lr=9.98833e-05, gnorm=2.169, loss_scale=16, train_wall=10, gb_free=2.8, wall=179868 2021-06-20 20:36:45 | INFO | train_inner | epoch 006: 666 / 3002 loss=2.487, ppl=5.61, wps=5832, ups=0.09, wpb=64888, bsz=128, num_updates=15585, lr=9.98833e-05, gnorm=2, loss_scale=16, train_wall=11, gb_free=2.8, wall=179880 2021-06-20 20:36:56 | INFO | train_inner | epoch 006: 667 / 3002 loss=2.479, ppl=5.58, wps=5986.4, ups=0.09, wpb=64854, bsz=128, num_updates=15586, lr=9.98833e-05, gnorm=1.822, loss_scale=16, train_wall=10, gb_free=2.8, wall=179890 2021-06-20 20:37:07 | INFO | train_inner | epoch 006: 668 / 3002 loss=2.515, ppl=5.72, wps=5980, ups=0.09, wpb=64931, bsz=128, num_updates=15587, lr=9.98833e-05, gnorm=2.058, loss_scale=16, train_wall=10, gb_free=2.8, wall=179901 2021-06-20 20:37:18 | INFO | train_inner | epoch 006: 669 / 3002 loss=2.37, ppl=5.17, wps=5932, ups=0.09, wpb=64880, bsz=128, num_updates=15588, lr=9.98833e-05, gnorm=1.885, loss_scale=16, train_wall=10, gb_free=2.8, wall=179912 2021-06-20 20:37:29 | INFO | train_inner | epoch 006: 670 / 3002 loss=2.375, ppl=5.19, wps=5866.3, ups=0.09, wpb=64867, bsz=128, num_updates=15589, lr=9.98833e-05, gnorm=1.939, loss_scale=16, train_wall=11, gb_free=2.8, wall=179923 2021-06-20 20:37:40 | INFO | train_inner | epoch 006: 671 / 3002 loss=2.447, ppl=5.45, wps=5848.7, ups=0.09, wpb=64753, bsz=128, num_updates=15590, lr=9.98833e-05, gnorm=1.877, loss_scale=16, train_wall=11, gb_free=2.8, wall=179934 2021-06-20 20:37:51 | INFO | train_inner | epoch 006: 672 / 3002 loss=2.373, ppl=5.18, wps=5787.1, ups=0.09, wpb=64727, bsz=128, num_updates=15591, lr=9.98833e-05, gnorm=1.871, loss_scale=16, train_wall=11, gb_free=2.8, wall=179946 2021-06-20 20:38:02 | INFO | train_inner | epoch 006: 673 / 3002 loss=2.632, ppl=6.2, wps=5768.6, ups=0.09, wpb=64837, bsz=128, num_updates=15592, lr=9.98833e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=179957 2021-06-20 20:38:14 | INFO | train_inner | epoch 006: 674 / 3002 loss=2.507, ppl=5.68, wps=5805.4, ups=0.09, wpb=64849, bsz=128, num_updates=15593, lr=9.98832e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=179968 2021-06-20 20:38:25 | INFO | train_inner | epoch 006: 675 / 3002 loss=2.431, ppl=5.39, wps=5842, ups=0.09, wpb=64752, bsz=128, num_updates=15594, lr=9.98832e-05, gnorm=1.892, loss_scale=16, train_wall=11, gb_free=2.8, wall=179979 2021-06-20 20:38:36 | INFO | train_inner | epoch 006: 676 / 3002 loss=2.491, ppl=5.62, wps=5943.7, ups=0.09, wpb=64871, bsz=128, num_updates=15595, lr=9.98832e-05, gnorm=2.038, loss_scale=16, train_wall=10, gb_free=2.8, wall=179990 2021-06-20 20:38:47 | INFO | train_inner | epoch 006: 677 / 3002 loss=2.445, ppl=5.44, wps=5830.7, ups=0.09, wpb=64810, bsz=128, num_updates=15596, lr=9.98832e-05, gnorm=1.852, loss_scale=16, train_wall=11, gb_free=2.8, wall=180001 2021-06-20 20:38:58 | INFO | train_inner | epoch 006: 678 / 3002 loss=2.569, ppl=5.93, wps=5786.1, ups=0.09, wpb=64743, bsz=128, num_updates=15597, lr=9.98832e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=180012 2021-06-20 20:39:09 | INFO | train_inner | epoch 006: 679 / 3002 loss=2.478, ppl=5.57, wps=5866.5, ups=0.09, wpb=64735, bsz=128, num_updates=15598, lr=9.98832e-05, gnorm=1.834, loss_scale=16, train_wall=11, gb_free=2.8, wall=180023 2021-06-20 20:39:20 | INFO | train_inner | epoch 006: 680 / 3002 loss=2.456, ppl=5.49, wps=5880.6, ups=0.09, wpb=64754, bsz=128, num_updates=15599, lr=9.98832e-05, gnorm=1.893, loss_scale=16, train_wall=11, gb_free=2.8, wall=180034 2021-06-20 20:39:31 | INFO | train_inner | epoch 006: 681 / 3002 loss=2.432, ppl=5.4, wps=5775.3, ups=0.09, wpb=64842, bsz=128, num_updates=15600, lr=9.98832e-05, gnorm=1.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=180046 2021-06-20 20:39:42 | INFO | train_inner | epoch 006: 682 / 3002 loss=2.423, ppl=5.36, wps=5982.6, ups=0.09, wpb=64858, bsz=128, num_updates=15601, lr=9.98832e-05, gnorm=2.023, loss_scale=16, train_wall=10, gb_free=2.8, wall=180056 2021-06-20 20:39:53 | INFO | train_inner | epoch 006: 683 / 3002 loss=2.455, ppl=5.48, wps=5804.2, ups=0.09, wpb=64837, bsz=128, num_updates=15602, lr=9.98832e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=180068 2021-06-20 20:40:04 | INFO | train_inner | epoch 006: 684 / 3002 loss=2.465, ppl=5.52, wps=5846.3, ups=0.09, wpb=64824, bsz=128, num_updates=15603, lr=9.98832e-05, gnorm=1.971, loss_scale=16, train_wall=11, gb_free=2.8, wall=180079 2021-06-20 20:40:15 | INFO | train_inner | epoch 006: 685 / 3002 loss=2.472, ppl=5.55, wps=5828.7, ups=0.09, wpb=64890, bsz=128, num_updates=15604, lr=9.98832e-05, gnorm=1.956, loss_scale=16, train_wall=11, gb_free=2.8, wall=180090 2021-06-20 20:40:27 | INFO | train_inner | epoch 006: 686 / 3002 loss=2.424, ppl=5.37, wps=5803.7, ups=0.09, wpb=64838, bsz=128, num_updates=15605, lr=9.98832e-05, gnorm=1.964, loss_scale=16, train_wall=11, gb_free=2.8, wall=180101 2021-06-20 20:40:37 | INFO | train_inner | epoch 006: 687 / 3002 loss=2.567, ppl=5.93, wps=5940.4, ups=0.09, wpb=64819, bsz=128, num_updates=15606, lr=9.98831e-05, gnorm=2.006, loss_scale=16, train_wall=10, gb_free=2.8, wall=180112 2021-06-20 20:40:49 | INFO | train_inner | epoch 006: 688 / 3002 loss=2.552, ppl=5.86, wps=5876.5, ups=0.09, wpb=64804, bsz=128, num_updates=15607, lr=9.98831e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=180123 2021-06-20 20:41:00 | INFO | train_inner | epoch 006: 689 / 3002 loss=2.472, ppl=5.55, wps=5799, ups=0.09, wpb=64848, bsz=128, num_updates=15608, lr=9.98831e-05, gnorm=1.849, loss_scale=16, train_wall=11, gb_free=2.8, wall=180134 2021-06-20 20:41:11 | INFO | train_inner | epoch 006: 690 / 3002 loss=2.444, ppl=5.44, wps=5721.4, ups=0.09, wpb=64762, bsz=128, num_updates=15609, lr=9.98831e-05, gnorm=1.904, loss_scale=16, train_wall=11, gb_free=2.8, wall=180145 2021-06-20 20:41:22 | INFO | train_inner | epoch 006: 691 / 3002 loss=2.466, ppl=5.53, wps=5773.4, ups=0.09, wpb=64753, bsz=128, num_updates=15610, lr=9.98831e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=180157 2021-06-20 20:41:33 | INFO | train_inner | epoch 006: 692 / 3002 loss=2.546, ppl=5.84, wps=5881.8, ups=0.09, wpb=64861, bsz=128, num_updates=15611, lr=9.98831e-05, gnorm=1.97, loss_scale=16, train_wall=11, gb_free=2.8, wall=180168 2021-06-20 20:41:44 | INFO | train_inner | epoch 006: 693 / 3002 loss=2.533, ppl=5.79, wps=5819.2, ups=0.09, wpb=64898, bsz=128, num_updates=15612, lr=9.98831e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=180179 2021-06-20 20:41:56 | INFO | train_inner | epoch 006: 694 / 3002 loss=2.482, ppl=5.59, wps=5767.2, ups=0.09, wpb=64807, bsz=128, num_updates=15613, lr=9.98831e-05, gnorm=1.861, loss_scale=16, train_wall=11, gb_free=2.8, wall=180190 2021-06-20 20:42:07 | INFO | train_inner | epoch 006: 695 / 3002 loss=2.449, ppl=5.46, wps=5875.4, ups=0.09, wpb=64905, bsz=128, num_updates=15614, lr=9.98831e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=180201 2021-06-20 20:42:18 | INFO | train_inner | epoch 006: 696 / 3002 loss=2.596, ppl=6.05, wps=5784, ups=0.09, wpb=64804, bsz=128, num_updates=15615, lr=9.98831e-05, gnorm=2.094, loss_scale=16, train_wall=11, gb_free=2.8, wall=180212 2021-06-20 20:42:29 | INFO | train_inner | epoch 006: 697 / 3002 loss=2.526, ppl=5.76, wps=5937.6, ups=0.09, wpb=64836, bsz=128, num_updates=15616, lr=9.98831e-05, gnorm=2.05, loss_scale=16, train_wall=10, gb_free=2.8, wall=180223 2021-06-20 20:42:40 | INFO | train_inner | epoch 006: 698 / 3002 loss=2.345, ppl=5.08, wps=5781.1, ups=0.09, wpb=64763, bsz=128, num_updates=15617, lr=9.98831e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=180234 2021-06-20 20:42:51 | INFO | train_inner | epoch 006: 699 / 3002 loss=2.475, ppl=5.56, wps=5845.4, ups=0.09, wpb=64793, bsz=128, num_updates=15618, lr=9.9883e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=180245 2021-06-20 20:43:02 | INFO | train_inner | epoch 006: 700 / 3002 loss=2.554, ppl=5.87, wps=5883.7, ups=0.09, wpb=64849, bsz=128, num_updates=15619, lr=9.9883e-05, gnorm=2.051, loss_scale=16, train_wall=11, gb_free=2.8, wall=180256 2021-06-20 20:43:13 | INFO | train_inner | epoch 006: 701 / 3002 loss=2.463, ppl=5.51, wps=5886, ups=0.09, wpb=64870, bsz=128, num_updates=15620, lr=9.9883e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=180267 2021-06-20 20:43:24 | INFO | train_inner | epoch 006: 702 / 3002 loss=2.498, ppl=5.65, wps=5789.4, ups=0.09, wpb=64858, bsz=128, num_updates=15621, lr=9.9883e-05, gnorm=1.861, loss_scale=16, train_wall=11, gb_free=2.8, wall=180279 2021-06-20 20:43:36 | INFO | train_inner | epoch 006: 703 / 3002 loss=2.514, ppl=5.71, wps=5704.2, ups=0.09, wpb=64796, bsz=128, num_updates=15622, lr=9.9883e-05, gnorm=1.902, loss_scale=16, train_wall=11, gb_free=2.8, wall=180290 2021-06-20 20:43:47 | INFO | train_inner | epoch 006: 704 / 3002 loss=2.383, ppl=5.21, wps=5814.7, ups=0.09, wpb=64827, bsz=128, num_updates=15623, lr=9.9883e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=180301 2021-06-20 20:43:58 | INFO | train_inner | epoch 006: 705 / 3002 loss=2.736, ppl=6.66, wps=5812.9, ups=0.09, wpb=64739, bsz=128, num_updates=15624, lr=9.9883e-05, gnorm=2, loss_scale=16, train_wall=11, gb_free=2.8, wall=180312 2021-06-20 20:44:09 | INFO | train_inner | epoch 006: 706 / 3002 loss=2.442, ppl=5.44, wps=5945.6, ups=0.09, wpb=64887, bsz=128, num_updates=15625, lr=9.9883e-05, gnorm=1.975, loss_scale=16, train_wall=10, gb_free=2.8, wall=180323 2021-06-20 20:44:20 | INFO | train_inner | epoch 006: 707 / 3002 loss=2.456, ppl=5.49, wps=5885.8, ups=0.09, wpb=64863, bsz=128, num_updates=15626, lr=9.9883e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=180334 2021-06-20 20:44:31 | INFO | train_inner | epoch 006: 708 / 3002 loss=2.501, ppl=5.66, wps=5856.3, ups=0.09, wpb=64803, bsz=128, num_updates=15627, lr=9.9883e-05, gnorm=2.023, loss_scale=16, train_wall=11, gb_free=2.8, wall=180345 2021-06-20 20:44:42 | INFO | train_inner | epoch 006: 709 / 3002 loss=2.519, ppl=5.73, wps=5868.4, ups=0.09, wpb=64881, bsz=128, num_updates=15628, lr=9.9883e-05, gnorm=1.877, loss_scale=16, train_wall=11, gb_free=2.8, wall=180356 2021-06-20 20:44:53 | INFO | train_inner | epoch 006: 710 / 3002 loss=2.597, ppl=6.05, wps=5944.8, ups=0.09, wpb=64875, bsz=128, num_updates=15629, lr=9.9883e-05, gnorm=1.895, loss_scale=16, train_wall=10, gb_free=2.8, wall=180367 2021-06-20 20:45:04 | INFO | train_inner | epoch 006: 711 / 3002 loss=2.586, ppl=6.01, wps=5806.8, ups=0.09, wpb=64869, bsz=128, num_updates=15630, lr=9.9883e-05, gnorm=1.952, loss_scale=16, train_wall=11, gb_free=2.8, wall=180378 2021-06-20 20:45:15 | INFO | train_inner | epoch 006: 712 / 3002 loss=2.57, ppl=5.94, wps=5896, ups=0.09, wpb=64875, bsz=128, num_updates=15631, lr=9.98829e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=180389 2021-06-20 20:45:26 | INFO | train_inner | epoch 006: 713 / 3002 loss=2.555, ppl=5.88, wps=5886.5, ups=0.09, wpb=64809, bsz=128, num_updates=15632, lr=9.98829e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=180401 2021-06-20 20:45:37 | INFO | train_inner | epoch 006: 714 / 3002 loss=2.422, ppl=5.36, wps=5900, ups=0.09, wpb=64819, bsz=128, num_updates=15633, lr=9.98829e-05, gnorm=1.848, loss_scale=16, train_wall=11, gb_free=2.8, wall=180411 2021-06-20 20:45:48 | INFO | train_inner | epoch 006: 715 / 3002 loss=2.554, ppl=5.87, wps=5922.4, ups=0.09, wpb=64738, bsz=128, num_updates=15634, lr=9.98829e-05, gnorm=1.856, loss_scale=16, train_wall=10, gb_free=2.8, wall=180422 2021-06-20 20:45:59 | INFO | train_inner | epoch 006: 716 / 3002 loss=2.58, ppl=5.98, wps=5775.8, ups=0.09, wpb=64840, bsz=128, num_updates=15635, lr=9.98829e-05, gnorm=1.964, loss_scale=16, train_wall=11, gb_free=2.8, wall=180434 2021-06-20 20:46:11 | INFO | train_inner | epoch 006: 717 / 3002 loss=2.524, ppl=5.75, wps=5769.5, ups=0.09, wpb=64864, bsz=128, num_updates=15636, lr=9.98829e-05, gnorm=1.981, loss_scale=16, train_wall=11, gb_free=2.8, wall=180445 2021-06-20 20:46:22 | INFO | train_inner | epoch 006: 718 / 3002 loss=2.537, ppl=5.8, wps=5846, ups=0.09, wpb=64832, bsz=128, num_updates=15637, lr=9.98829e-05, gnorm=1.93, loss_scale=16, train_wall=11, gb_free=2.8, wall=180456 2021-06-20 20:46:33 | INFO | train_inner | epoch 006: 719 / 3002 loss=2.458, ppl=5.5, wps=5885.2, ups=0.09, wpb=64815, bsz=128, num_updates=15638, lr=9.98829e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=180467 2021-06-20 20:46:44 | INFO | train_inner | epoch 006: 720 / 3002 loss=2.379, ppl=5.2, wps=5812.2, ups=0.09, wpb=64864, bsz=128, num_updates=15639, lr=9.98829e-05, gnorm=1.914, loss_scale=16, train_wall=11, gb_free=2.8, wall=180478 2021-06-20 20:46:55 | INFO | train_inner | epoch 006: 721 / 3002 loss=2.348, ppl=5.09, wps=5830.6, ups=0.09, wpb=64828, bsz=128, num_updates=15640, lr=9.98829e-05, gnorm=1.948, loss_scale=16, train_wall=11, gb_free=2.8, wall=180489 2021-06-20 20:47:06 | INFO | train_inner | epoch 006: 722 / 3002 loss=2.336, ppl=5.05, wps=5966.1, ups=0.09, wpb=64813, bsz=128, num_updates=15641, lr=9.98829e-05, gnorm=1.885, loss_scale=16, train_wall=10, gb_free=2.8, wall=180500 2021-06-20 20:47:17 | INFO | train_inner | epoch 006: 723 / 3002 loss=2.45, ppl=5.47, wps=5749.7, ups=0.09, wpb=64823, bsz=128, num_updates=15642, lr=9.98829e-05, gnorm=2.13, loss_scale=16, train_wall=11, gb_free=2.8, wall=180511 2021-06-20 20:47:28 | INFO | train_inner | epoch 006: 724 / 3002 loss=2.698, ppl=6.49, wps=5847.6, ups=0.09, wpb=64846, bsz=128, num_updates=15643, lr=9.98828e-05, gnorm=2.078, loss_scale=16, train_wall=11, gb_free=2.8, wall=180522 2021-06-20 20:47:39 | INFO | train_inner | epoch 006: 725 / 3002 loss=2.415, ppl=5.33, wps=5863.5, ups=0.09, wpb=64852, bsz=128, num_updates=15644, lr=9.98828e-05, gnorm=2.078, loss_scale=16, train_wall=11, gb_free=2.8, wall=180534 2021-06-20 20:47:50 | INFO | train_inner | epoch 006: 726 / 3002 loss=2.505, ppl=5.68, wps=5837, ups=0.09, wpb=64859, bsz=128, num_updates=15645, lr=9.98828e-05, gnorm=1.87, loss_scale=16, train_wall=11, gb_free=2.8, wall=180545 2021-06-20 20:48:02 | INFO | train_inner | epoch 006: 727 / 3002 loss=2.635, ppl=6.21, wps=5780.2, ups=0.09, wpb=64853, bsz=128, num_updates=15646, lr=9.98828e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=180556 2021-06-20 20:48:13 | INFO | train_inner | epoch 006: 728 / 3002 loss=2.558, ppl=5.89, wps=5797.6, ups=0.09, wpb=64805, bsz=128, num_updates=15647, lr=9.98828e-05, gnorm=1.923, loss_scale=16, train_wall=11, gb_free=2.8, wall=180567 2021-06-20 20:48:24 | INFO | train_inner | epoch 006: 729 / 3002 loss=2.316, ppl=4.98, wps=5871.4, ups=0.09, wpb=64856, bsz=128, num_updates=15648, lr=9.98828e-05, gnorm=1.936, loss_scale=16, train_wall=11, gb_free=2.8, wall=180578 2021-06-20 20:48:35 | INFO | train_inner | epoch 006: 730 / 3002 loss=2.476, ppl=5.57, wps=5851.9, ups=0.09, wpb=64884, bsz=128, num_updates=15649, lr=9.98828e-05, gnorm=1.918, loss_scale=16, train_wall=11, gb_free=2.8, wall=180589 2021-06-20 20:48:46 | INFO | train_inner | epoch 006: 731 / 3002 loss=2.353, ppl=5.11, wps=5892.3, ups=0.09, wpb=64934, bsz=128, num_updates=15650, lr=9.98828e-05, gnorm=1.948, loss_scale=16, train_wall=11, gb_free=2.8, wall=180600 2021-06-20 20:48:57 | INFO | train_inner | epoch 006: 732 / 3002 loss=2.509, ppl=5.69, wps=5836.3, ups=0.09, wpb=64820, bsz=128, num_updates=15651, lr=9.98828e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=180611 2021-06-20 20:49:08 | INFO | train_inner | epoch 006: 733 / 3002 loss=2.562, ppl=5.9, wps=5845.8, ups=0.09, wpb=64826, bsz=128, num_updates=15652, lr=9.98828e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=180622 2021-06-20 20:49:19 | INFO | train_inner | epoch 006: 734 / 3002 loss=2.442, ppl=5.43, wps=5850.4, ups=0.09, wpb=64834, bsz=128, num_updates=15653, lr=9.98828e-05, gnorm=1.915, loss_scale=16, train_wall=11, gb_free=2.8, wall=180634 2021-06-20 20:49:30 | INFO | train_inner | epoch 006: 735 / 3002 loss=2.483, ppl=5.59, wps=5964.2, ups=0.09, wpb=64817, bsz=128, num_updates=15654, lr=9.98828e-05, gnorm=1.935, loss_scale=16, train_wall=10, gb_free=2.8, wall=180644 2021-06-20 20:49:41 | INFO | train_inner | epoch 006: 736 / 3002 loss=2.527, ppl=5.77, wps=5854.3, ups=0.09, wpb=64828, bsz=128, num_updates=15655, lr=9.98828e-05, gnorm=1.867, loss_scale=16, train_wall=11, gb_free=2.8, wall=180655 2021-06-20 20:49:52 | INFO | train_inner | epoch 006: 737 / 3002 loss=2.368, ppl=5.16, wps=5803.2, ups=0.09, wpb=64811, bsz=128, num_updates=15656, lr=9.98827e-05, gnorm=1.945, loss_scale=16, train_wall=11, gb_free=2.8, wall=180667 2021-06-20 20:50:03 | INFO | train_inner | epoch 006: 738 / 3002 loss=2.545, ppl=5.83, wps=5802.9, ups=0.09, wpb=64782, bsz=128, num_updates=15657, lr=9.98827e-05, gnorm=2.222, loss_scale=16, train_wall=11, gb_free=2.8, wall=180678 2021-06-20 20:50:15 | INFO | train_inner | epoch 006: 739 / 3002 loss=2.344, ppl=5.08, wps=5835, ups=0.09, wpb=64816, bsz=128, num_updates=15658, lr=9.98827e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=180689 2021-06-20 20:50:26 | INFO | train_inner | epoch 006: 740 / 3002 loss=2.577, ppl=5.97, wps=5846.4, ups=0.09, wpb=64823, bsz=128, num_updates=15659, lr=9.98827e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=180700 2021-06-20 20:50:37 | INFO | train_inner | epoch 006: 741 / 3002 loss=2.517, ppl=5.73, wps=5728.9, ups=0.09, wpb=64790, bsz=128, num_updates=15660, lr=9.98827e-05, gnorm=1.902, loss_scale=16, train_wall=11, gb_free=2.8, wall=180711 2021-06-20 20:50:48 | INFO | train_inner | epoch 006: 742 / 3002 loss=2.434, ppl=5.4, wps=5774.8, ups=0.09, wpb=64811, bsz=128, num_updates=15661, lr=9.98827e-05, gnorm=1.975, loss_scale=16, train_wall=11, gb_free=2.8, wall=180723 2021-06-20 20:50:59 | INFO | train_inner | epoch 006: 743 / 3002 loss=2.38, ppl=5.2, wps=5806.5, ups=0.09, wpb=64853, bsz=128, num_updates=15662, lr=9.98827e-05, gnorm=1.935, loss_scale=16, train_wall=11, gb_free=2.8, wall=180734 2021-06-20 20:51:10 | INFO | train_inner | epoch 006: 744 / 3002 loss=2.566, ppl=5.92, wps=5897.3, ups=0.09, wpb=64864, bsz=128, num_updates=15663, lr=9.98827e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=180745 2021-06-20 20:51:21 | INFO | train_inner | epoch 006: 745 / 3002 loss=2.33, ppl=5.03, wps=5816.3, ups=0.09, wpb=64839, bsz=128, num_updates=15664, lr=9.98827e-05, gnorm=1.901, loss_scale=16, train_wall=11, gb_free=2.8, wall=180756 2021-06-20 20:51:33 | INFO | train_inner | epoch 006: 746 / 3002 loss=2.532, ppl=5.79, wps=5766.1, ups=0.09, wpb=64812, bsz=128, num_updates=15665, lr=9.98827e-05, gnorm=1.954, loss_scale=16, train_wall=11, gb_free=2.8, wall=180767 2021-06-20 20:51:44 | INFO | train_inner | epoch 006: 747 / 3002 loss=2.462, ppl=5.51, wps=5789.8, ups=0.09, wpb=64800, bsz=128, num_updates=15666, lr=9.98827e-05, gnorm=1.901, loss_scale=16, train_wall=11, gb_free=2.8, wall=180778 2021-06-20 20:51:55 | INFO | train_inner | epoch 006: 748 / 3002 loss=2.624, ppl=6.16, wps=5807.6, ups=0.09, wpb=64758, bsz=128, num_updates=15667, lr=9.98827e-05, gnorm=1.861, loss_scale=16, train_wall=11, gb_free=2.8, wall=180789 2021-06-20 20:52:06 | INFO | train_inner | epoch 006: 749 / 3002 loss=2.601, ppl=6.07, wps=5879.1, ups=0.09, wpb=64850, bsz=128, num_updates=15668, lr=9.98826e-05, gnorm=1.912, loss_scale=16, train_wall=11, gb_free=2.8, wall=180800 2021-06-20 20:52:17 | INFO | train_inner | epoch 006: 750 / 3002 loss=2.507, ppl=5.69, wps=5809.1, ups=0.09, wpb=64730, bsz=128, num_updates=15669, lr=9.98826e-05, gnorm=1.912, loss_scale=16, train_wall=11, gb_free=2.8, wall=180812 2021-06-20 20:52:28 | INFO | train_inner | epoch 006: 751 / 3002 loss=2.469, ppl=5.54, wps=5951.2, ups=0.09, wpb=64867, bsz=128, num_updates=15670, lr=9.98826e-05, gnorm=1.912, loss_scale=16, train_wall=10, gb_free=2.8, wall=180822 2021-06-20 20:52:39 | INFO | train_inner | epoch 006: 752 / 3002 loss=2.375, ppl=5.19, wps=5750.5, ups=0.09, wpb=64834, bsz=128, num_updates=15671, lr=9.98826e-05, gnorm=1.832, loss_scale=16, train_wall=11, gb_free=2.8, wall=180834 2021-06-20 20:52:51 | INFO | train_inner | epoch 006: 753 / 3002 loss=2.5, ppl=5.66, wps=5835.7, ups=0.09, wpb=64827, bsz=128, num_updates=15672, lr=9.98826e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=180845 2021-06-20 20:53:02 | INFO | train_inner | epoch 006: 754 / 3002 loss=2.471, ppl=5.54, wps=5804.9, ups=0.09, wpb=64831, bsz=128, num_updates=15673, lr=9.98826e-05, gnorm=2.008, loss_scale=16, train_wall=11, gb_free=2.8, wall=180856 2021-06-20 20:53:13 | INFO | train_inner | epoch 006: 755 / 3002 loss=2.506, ppl=5.68, wps=5924.8, ups=0.09, wpb=64843, bsz=128, num_updates=15674, lr=9.98826e-05, gnorm=1.838, loss_scale=16, train_wall=11, gb_free=2.8, wall=180867 2021-06-20 20:53:24 | INFO | train_inner | epoch 006: 756 / 3002 loss=2.551, ppl=5.86, wps=5800.8, ups=0.09, wpb=64835, bsz=128, num_updates=15675, lr=9.98826e-05, gnorm=1.921, loss_scale=16, train_wall=11, gb_free=2.8, wall=180878 2021-06-20 20:53:35 | INFO | train_inner | epoch 006: 757 / 3002 loss=2.36, ppl=5.13, wps=5924.9, ups=0.09, wpb=64812, bsz=128, num_updates=15676, lr=9.98826e-05, gnorm=1.851, loss_scale=16, train_wall=10, gb_free=2.8, wall=180889 2021-06-20 20:53:46 | INFO | train_inner | epoch 006: 758 / 3002 loss=2.393, ppl=5.25, wps=5785.9, ups=0.09, wpb=64854, bsz=128, num_updates=15677, lr=9.98826e-05, gnorm=1.816, loss_scale=16, train_wall=11, gb_free=2.8, wall=180900 2021-06-20 20:53:57 | INFO | train_inner | epoch 006: 759 / 3002 loss=2.614, ppl=6.12, wps=5980.8, ups=0.09, wpb=64840, bsz=128, num_updates=15678, lr=9.98826e-05, gnorm=2.029, loss_scale=16, train_wall=10, gb_free=2.8, wall=180911 2021-06-20 20:54:08 | INFO | train_inner | epoch 006: 760 / 3002 loss=2.427, ppl=5.38, wps=5895.4, ups=0.09, wpb=64896, bsz=128, num_updates=15679, lr=9.98826e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=180922 2021-06-20 20:54:19 | INFO | train_inner | epoch 006: 761 / 3002 loss=2.494, ppl=5.64, wps=5825.8, ups=0.09, wpb=64777, bsz=128, num_updates=15680, lr=9.98826e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=180933 2021-06-20 20:54:30 | INFO | train_inner | epoch 006: 762 / 3002 loss=2.449, ppl=5.46, wps=5948.9, ups=0.09, wpb=64736, bsz=128, num_updates=15681, lr=9.98825e-05, gnorm=1.892, loss_scale=16, train_wall=10, gb_free=2.8, wall=180944 2021-06-20 20:54:41 | INFO | train_inner | epoch 006: 763 / 3002 loss=2.495, ppl=5.64, wps=5908.7, ups=0.09, wpb=64787, bsz=128, num_updates=15682, lr=9.98825e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=180955 2021-06-20 20:54:52 | INFO | train_inner | epoch 006: 764 / 3002 loss=2.55, ppl=5.86, wps=5827, ups=0.09, wpb=64910, bsz=128, num_updates=15683, lr=9.98825e-05, gnorm=1.921, loss_scale=16, train_wall=11, gb_free=2.8, wall=180966 2021-06-20 20:55:03 | INFO | train_inner | epoch 006: 765 / 3002 loss=2.34, ppl=5.06, wps=5813.5, ups=0.09, wpb=64863, bsz=128, num_updates=15684, lr=9.98825e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=180977 2021-06-20 20:55:14 | INFO | train_inner | epoch 006: 766 / 3002 loss=2.551, ppl=5.86, wps=5834.4, ups=0.09, wpb=64912, bsz=128, num_updates=15685, lr=9.98825e-05, gnorm=2.741, loss_scale=16, train_wall=11, gb_free=2.8, wall=180989 2021-06-20 20:55:25 | INFO | train_inner | epoch 006: 767 / 3002 loss=2.521, ppl=5.74, wps=5801.2, ups=0.09, wpb=64891, bsz=128, num_updates=15686, lr=9.98825e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=181000 2021-06-20 20:55:37 | INFO | train_inner | epoch 006: 768 / 3002 loss=2.496, ppl=5.64, wps=5822.7, ups=0.09, wpb=64793, bsz=128, num_updates=15687, lr=9.98825e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=181011 2021-06-20 20:55:48 | INFO | train_inner | epoch 006: 769 / 3002 loss=2.476, ppl=5.56, wps=5735.5, ups=0.09, wpb=64810, bsz=128, num_updates=15688, lr=9.98825e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=181022 2021-06-20 20:55:59 | INFO | train_inner | epoch 006: 770 / 3002 loss=2.6, ppl=6.06, wps=5937.7, ups=0.09, wpb=64835, bsz=128, num_updates=15689, lr=9.98825e-05, gnorm=2.025, loss_scale=16, train_wall=10, gb_free=2.8, wall=181033 2021-06-20 20:56:10 | INFO | train_inner | epoch 006: 771 / 3002 loss=2.526, ppl=5.76, wps=5897, ups=0.09, wpb=64804, bsz=128, num_updates=15690, lr=9.98825e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=181044 2021-06-20 20:56:21 | INFO | train_inner | epoch 006: 772 / 3002 loss=2.438, ppl=5.42, wps=5853.5, ups=0.09, wpb=64896, bsz=128, num_updates=15691, lr=9.98825e-05, gnorm=1.859, loss_scale=16, train_wall=11, gb_free=2.8, wall=181055 2021-06-20 20:56:32 | INFO | train_inner | epoch 006: 773 / 3002 loss=2.467, ppl=5.53, wps=5817.9, ups=0.09, wpb=64821, bsz=128, num_updates=15692, lr=9.98825e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=181066 2021-06-20 20:56:43 | INFO | train_inner | epoch 006: 774 / 3002 loss=2.628, ppl=6.18, wps=5780.7, ups=0.09, wpb=64835, bsz=128, num_updates=15693, lr=9.98824e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=181078 2021-06-20 20:56:54 | INFO | train_inner | epoch 006: 775 / 3002 loss=2.416, ppl=5.34, wps=5808.8, ups=0.09, wpb=64755, bsz=128, num_updates=15694, lr=9.98824e-05, gnorm=1.871, loss_scale=16, train_wall=11, gb_free=2.8, wall=181089 2021-06-20 20:57:06 | INFO | train_inner | epoch 006: 776 / 3002 loss=2.664, ppl=6.34, wps=5781.9, ups=0.09, wpb=64745, bsz=128, num_updates=15695, lr=9.98824e-05, gnorm=2.022, loss_scale=16, train_wall=11, gb_free=2.8, wall=181100 2021-06-20 20:57:17 | INFO | train_inner | epoch 006: 777 / 3002 loss=2.604, ppl=6.08, wps=5832.4, ups=0.09, wpb=64791, bsz=128, num_updates=15696, lr=9.98824e-05, gnorm=2.026, loss_scale=16, train_wall=11, gb_free=2.8, wall=181111 2021-06-20 20:57:28 | INFO | train_inner | epoch 006: 778 / 3002 loss=2.515, ppl=5.71, wps=5742.3, ups=0.09, wpb=64725, bsz=128, num_updates=15697, lr=9.98824e-05, gnorm=1.909, loss_scale=16, train_wall=11, gb_free=2.8, wall=181122 2021-06-20 20:57:39 | INFO | train_inner | epoch 006: 779 / 3002 loss=2.491, ppl=5.62, wps=5890.4, ups=0.09, wpb=64805, bsz=128, num_updates=15698, lr=9.98824e-05, gnorm=1.896, loss_scale=16, train_wall=11, gb_free=2.8, wall=181133 2021-06-20 20:57:50 | INFO | train_inner | epoch 006: 780 / 3002 loss=2.686, ppl=6.43, wps=5878.9, ups=0.09, wpb=64864, bsz=128, num_updates=15699, lr=9.98824e-05, gnorm=1.923, loss_scale=16, train_wall=11, gb_free=2.8, wall=181144 2021-06-20 20:58:01 | INFO | train_inner | epoch 006: 781 / 3002 loss=2.415, ppl=5.33, wps=5758.5, ups=0.09, wpb=64762, bsz=128, num_updates=15700, lr=9.98824e-05, gnorm=1.884, loss_scale=16, train_wall=11, gb_free=2.8, wall=181156 2021-06-20 20:58:12 | INFO | train_inner | epoch 006: 782 / 3002 loss=2.411, ppl=5.32, wps=5879.1, ups=0.09, wpb=64849, bsz=128, num_updates=15701, lr=9.98824e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=181167 2021-06-20 20:58:23 | INFO | train_inner | epoch 006: 783 / 3002 loss=2.559, ppl=5.89, wps=5879.7, ups=0.09, wpb=64838, bsz=128, num_updates=15702, lr=9.98824e-05, gnorm=1.867, loss_scale=16, train_wall=11, gb_free=2.8, wall=181178 2021-06-20 20:58:34 | INFO | train_inner | epoch 006: 784 / 3002 loss=2.415, ppl=5.33, wps=5854.5, ups=0.09, wpb=64817, bsz=128, num_updates=15703, lr=9.98824e-05, gnorm=1.95, loss_scale=16, train_wall=11, gb_free=2.8, wall=181189 2021-06-20 20:58:45 | INFO | train_inner | epoch 006: 785 / 3002 loss=2.589, ppl=6.02, wps=5845.4, ups=0.09, wpb=64830, bsz=128, num_updates=15704, lr=9.98824e-05, gnorm=2.167, loss_scale=16, train_wall=11, gb_free=2.8, wall=181200 2021-06-20 20:58:56 | INFO | train_inner | epoch 006: 786 / 3002 loss=2.361, ppl=5.14, wps=5942.4, ups=0.09, wpb=64863, bsz=128, num_updates=15705, lr=9.98824e-05, gnorm=1.956, loss_scale=16, train_wall=10, gb_free=2.8, wall=181211 2021-06-20 20:59:07 | INFO | train_inner | epoch 006: 787 / 3002 loss=2.495, ppl=5.64, wps=5993.2, ups=0.09, wpb=64826, bsz=128, num_updates=15706, lr=9.98823e-05, gnorm=1.951, loss_scale=16, train_wall=10, gb_free=2.8, wall=181221 2021-06-20 20:59:18 | INFO | train_inner | epoch 006: 788 / 3002 loss=2.47, ppl=5.54, wps=5883.5, ups=0.09, wpb=64881, bsz=128, num_updates=15707, lr=9.98823e-05, gnorm=1.956, loss_scale=16, train_wall=11, gb_free=2.8, wall=181233 2021-06-20 20:59:29 | INFO | train_inner | epoch 006: 789 / 3002 loss=2.501, ppl=5.66, wps=5938.9, ups=0.09, wpb=64873, bsz=128, num_updates=15708, lr=9.98823e-05, gnorm=2.097, loss_scale=32, train_wall=10, gb_free=2.8, wall=181243 2021-06-20 20:59:40 | INFO | train_inner | epoch 006: 790 / 3002 loss=2.519, ppl=5.73, wps=5966.2, ups=0.09, wpb=64792, bsz=128, num_updates=15709, lr=9.98823e-05, gnorm=1.879, loss_scale=32, train_wall=10, gb_free=2.8, wall=181254 2021-06-20 20:59:51 | INFO | train_inner | epoch 006: 791 / 3002 loss=2.29, ppl=4.89, wps=5961.3, ups=0.09, wpb=64878, bsz=128, num_updates=15710, lr=9.98823e-05, gnorm=1.895, loss_scale=32, train_wall=10, gb_free=2.8, wall=181265 2021-06-20 21:00:02 | INFO | train_inner | epoch 006: 792 / 3002 loss=2.615, ppl=6.13, wps=5884.2, ups=0.09, wpb=64758, bsz=128, num_updates=15711, lr=9.98823e-05, gnorm=1.939, loss_scale=32, train_wall=11, gb_free=2.8, wall=181276 2021-06-20 21:00:13 | INFO | train_inner | epoch 006: 793 / 3002 loss=2.412, ppl=5.32, wps=5856.8, ups=0.09, wpb=64859, bsz=128, num_updates=15712, lr=9.98823e-05, gnorm=1.927, loss_scale=32, train_wall=11, gb_free=2.8, wall=181287 2021-06-20 21:00:24 | INFO | train_inner | epoch 006: 794 / 3002 loss=2.467, ppl=5.53, wps=5876.2, ups=0.09, wpb=64874, bsz=128, num_updates=15713, lr=9.98823e-05, gnorm=1.896, loss_scale=32, train_wall=11, gb_free=2.8, wall=181298 2021-06-20 21:00:35 | INFO | train_inner | epoch 006: 795 / 3002 loss=2.409, ppl=5.31, wps=5878.3, ups=0.09, wpb=64779, bsz=128, num_updates=15714, lr=9.98823e-05, gnorm=1.996, loss_scale=32, train_wall=11, gb_free=2.8, wall=181309 2021-06-20 21:00:46 | INFO | train_inner | epoch 006: 796 / 3002 loss=2.325, ppl=5.01, wps=5869.4, ups=0.09, wpb=64882, bsz=128, num_updates=15715, lr=9.98823e-05, gnorm=1.844, loss_scale=32, train_wall=11, gb_free=2.8, wall=181320 2021-06-20 21:00:57 | INFO | train_inner | epoch 006: 797 / 3002 loss=2.437, ppl=5.42, wps=5785.9, ups=0.09, wpb=64880, bsz=128, num_updates=15716, lr=9.98823e-05, gnorm=1.921, loss_scale=32, train_wall=11, gb_free=2.8, wall=181332 2021-06-20 21:01:08 | INFO | train_inner | epoch 006: 798 / 3002 loss=2.45, ppl=5.46, wps=5790.2, ups=0.09, wpb=64738, bsz=128, num_updates=15717, lr=9.98823e-05, gnorm=1.883, loss_scale=32, train_wall=11, gb_free=2.8, wall=181343 2021-06-20 21:01:20 | INFO | train_inner | epoch 006: 799 / 3002 loss=2.501, ppl=5.66, wps=5791.9, ups=0.09, wpb=64823, bsz=128, num_updates=15718, lr=9.98822e-05, gnorm=1.954, loss_scale=32, train_wall=11, gb_free=2.8, wall=181354 2021-06-20 21:01:31 | INFO | train_inner | epoch 006: 800 / 3002 loss=2.405, ppl=5.29, wps=5924.1, ups=0.09, wpb=64868, bsz=128, num_updates=15719, lr=9.98822e-05, gnorm=1.89, loss_scale=32, train_wall=10, gb_free=2.8, wall=181365 2021-06-20 21:01:42 | INFO | train_inner | epoch 006: 801 / 3002 loss=2.584, ppl=5.99, wps=5890.8, ups=0.09, wpb=64882, bsz=128, num_updates=15720, lr=9.98822e-05, gnorm=1.981, loss_scale=32, train_wall=11, gb_free=2.8, wall=181376 2021-06-20 21:01:53 | INFO | train_inner | epoch 006: 802 / 3002 loss=2.445, ppl=5.44, wps=5792.6, ups=0.09, wpb=64860, bsz=128, num_updates=15721, lr=9.98822e-05, gnorm=1.985, loss_scale=32, train_wall=11, gb_free=2.8, wall=181387 2021-06-20 21:02:04 | INFO | train_inner | epoch 006: 803 / 3002 loss=2.444, ppl=5.44, wps=5796.1, ups=0.09, wpb=64869, bsz=128, num_updates=15722, lr=9.98822e-05, gnorm=1.868, loss_scale=32, train_wall=11, gb_free=2.8, wall=181398 2021-06-20 21:02:15 | INFO | train_inner | epoch 006: 804 / 3002 loss=2.494, ppl=5.63, wps=5801.3, ups=0.09, wpb=64856, bsz=128, num_updates=15723, lr=9.98822e-05, gnorm=1.928, loss_scale=32, train_wall=11, gb_free=2.8, wall=181409 2021-06-20 21:02:26 | INFO | train_inner | epoch 006: 805 / 3002 loss=2.677, ppl=6.4, wps=5883.7, ups=0.09, wpb=64747, bsz=128, num_updates=15724, lr=9.98822e-05, gnorm=2.025, loss_scale=32, train_wall=11, gb_free=2.8, wall=181420 2021-06-20 21:02:37 | INFO | train_inner | epoch 006: 806 / 3002 loss=2.611, ppl=6.11, wps=5819.6, ups=0.09, wpb=64840, bsz=128, num_updates=15725, lr=9.98822e-05, gnorm=1.939, loss_scale=32, train_wall=11, gb_free=2.8, wall=181432 2021-06-20 21:02:48 | INFO | train_inner | epoch 006: 807 / 3002 loss=2.577, ppl=5.97, wps=5883.9, ups=0.09, wpb=64851, bsz=128, num_updates=15726, lr=9.98822e-05, gnorm=1.912, loss_scale=32, train_wall=11, gb_free=2.8, wall=181443 2021-06-20 21:02:59 | INFO | train_inner | epoch 006: 808 / 3002 loss=2.312, ppl=4.97, wps=5877.3, ups=0.09, wpb=64895, bsz=128, num_updates=15727, lr=9.98822e-05, gnorm=1.973, loss_scale=32, train_wall=11, gb_free=2.8, wall=181454 2021-06-20 21:03:11 | INFO | train_inner | epoch 006: 809 / 3002 loss=2.486, ppl=5.6, wps=5784.6, ups=0.09, wpb=64788, bsz=128, num_updates=15728, lr=9.98822e-05, gnorm=1.877, loss_scale=32, train_wall=11, gb_free=2.8, wall=181465 2021-06-20 21:03:22 | INFO | train_inner | epoch 006: 810 / 3002 loss=2.496, ppl=5.64, wps=5856.8, ups=0.09, wpb=64845, bsz=128, num_updates=15729, lr=9.98822e-05, gnorm=1.858, loss_scale=32, train_wall=11, gb_free=2.8, wall=181476 2021-06-20 21:03:33 | INFO | train_inner | epoch 006: 811 / 3002 loss=2.51, ppl=5.69, wps=5786.1, ups=0.09, wpb=64773, bsz=128, num_updates=15730, lr=9.98822e-05, gnorm=1.885, loss_scale=32, train_wall=11, gb_free=2.8, wall=181487 2021-06-20 21:03:44 | INFO | train_inner | epoch 006: 812 / 3002 loss=2.534, ppl=5.79, wps=5867.5, ups=0.09, wpb=64767, bsz=128, num_updates=15731, lr=9.98821e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=181498 2021-06-20 21:03:55 | INFO | train_inner | epoch 006: 813 / 3002 loss=2.511, ppl=5.7, wps=5894.2, ups=0.09, wpb=64742, bsz=128, num_updates=15732, lr=9.98821e-05, gnorm=1.855, loss_scale=32, train_wall=10, gb_free=2.8, wall=181509 2021-06-20 21:04:06 | INFO | train_inner | epoch 006: 814 / 3002 loss=2.437, ppl=5.42, wps=5860.6, ups=0.09, wpb=64907, bsz=128, num_updates=15733, lr=9.98821e-05, gnorm=1.957, loss_scale=32, train_wall=11, gb_free=2.8, wall=181520 2021-06-20 21:04:17 | INFO | train_inner | epoch 006: 815 / 3002 loss=2.435, ppl=5.41, wps=5947.2, ups=0.09, wpb=64827, bsz=128, num_updates=15734, lr=9.98821e-05, gnorm=1.863, loss_scale=32, train_wall=10, gb_free=2.8, wall=181531 2021-06-20 21:04:28 | INFO | train_inner | epoch 006: 816 / 3002 loss=2.343, ppl=5.07, wps=5876.5, ups=0.09, wpb=64789, bsz=128, num_updates=15735, lr=9.98821e-05, gnorm=1.837, loss_scale=32, train_wall=11, gb_free=2.8, wall=181542 2021-06-20 21:04:39 | INFO | train_inner | epoch 006: 817 / 3002 loss=2.527, ppl=5.76, wps=5746.2, ups=0.09, wpb=64816, bsz=128, num_updates=15736, lr=9.98821e-05, gnorm=1.926, loss_scale=32, train_wall=11, gb_free=2.8, wall=181553 2021-06-20 21:04:50 | INFO | train_inner | epoch 006: 818 / 3002 loss=2.516, ppl=5.72, wps=5799.1, ups=0.09, wpb=64792, bsz=128, num_updates=15737, lr=9.98821e-05, gnorm=1.95, loss_scale=32, train_wall=11, gb_free=2.8, wall=181565 2021-06-20 21:05:01 | INFO | train_inner | epoch 006: 819 / 3002 loss=2.483, ppl=5.59, wps=6001.9, ups=0.09, wpb=64867, bsz=128, num_updates=15738, lr=9.98821e-05, gnorm=1.899, loss_scale=32, train_wall=10, gb_free=2.8, wall=181575 2021-06-20 21:05:12 | INFO | train_inner | epoch 006: 820 / 3002 loss=2.673, ppl=6.38, wps=5750.8, ups=0.09, wpb=64771, bsz=128, num_updates=15739, lr=9.98821e-05, gnorm=2.126, loss_scale=32, train_wall=11, gb_free=2.8, wall=181587 2021-06-20 21:05:24 | INFO | train_inner | epoch 006: 821 / 3002 loss=2.505, ppl=5.68, wps=5776.3, ups=0.09, wpb=64827, bsz=128, num_updates=15740, lr=9.98821e-05, gnorm=1.934, loss_scale=32, train_wall=11, gb_free=2.8, wall=181598 2021-06-20 21:05:35 | INFO | train_inner | epoch 006: 822 / 3002 loss=2.323, ppl=5, wps=5797, ups=0.09, wpb=64828, bsz=128, num_updates=15741, lr=9.98821e-05, gnorm=1.962, loss_scale=32, train_wall=11, gb_free=2.8, wall=181609 2021-06-20 21:05:46 | INFO | train_inner | epoch 006: 823 / 3002 loss=2.401, ppl=5.28, wps=5740.9, ups=0.09, wpb=64850, bsz=128, num_updates=15742, lr=9.98821e-05, gnorm=2.053, loss_scale=32, train_wall=11, gb_free=2.8, wall=181620 2021-06-20 21:05:57 | INFO | train_inner | epoch 006: 824 / 3002 loss=2.292, ppl=4.9, wps=5853.8, ups=0.09, wpb=64823, bsz=128, num_updates=15743, lr=9.9882e-05, gnorm=1.908, loss_scale=32, train_wall=11, gb_free=2.8, wall=181631 2021-06-20 21:06:08 | INFO | train_inner | epoch 006: 825 / 3002 loss=2.492, ppl=5.63, wps=5892.3, ups=0.09, wpb=64867, bsz=128, num_updates=15744, lr=9.9882e-05, gnorm=2.019, loss_scale=32, train_wall=11, gb_free=2.8, wall=181643 2021-06-20 21:06:19 | INFO | train_inner | epoch 006: 826 / 3002 loss=2.356, ppl=5.12, wps=5771.2, ups=0.09, wpb=64881, bsz=128, num_updates=15745, lr=9.9882e-05, gnorm=1.823, loss_scale=32, train_wall=11, gb_free=2.8, wall=181654 2021-06-20 21:06:31 | INFO | train_inner | epoch 006: 827 / 3002 loss=2.508, ppl=5.69, wps=5840.6, ups=0.09, wpb=64802, bsz=128, num_updates=15746, lr=9.9882e-05, gnorm=1.921, loss_scale=32, train_wall=11, gb_free=2.8, wall=181665 2021-06-20 21:06:42 | INFO | train_inner | epoch 006: 828 / 3002 loss=2.358, ppl=5.13, wps=5819, ups=0.09, wpb=64804, bsz=128, num_updates=15747, lr=9.9882e-05, gnorm=1.854, loss_scale=32, train_wall=11, gb_free=2.8, wall=181676 2021-06-20 21:06:53 | INFO | train_inner | epoch 006: 829 / 3002 loss=2.419, ppl=5.35, wps=5764, ups=0.09, wpb=64856, bsz=128, num_updates=15748, lr=9.9882e-05, gnorm=1.934, loss_scale=32, train_wall=11, gb_free=2.8, wall=181687 2021-06-20 21:07:04 | INFO | train_inner | epoch 006: 830 / 3002 loss=2.504, ppl=5.67, wps=5725, ups=0.09, wpb=64808, bsz=128, num_updates=15749, lr=9.9882e-05, gnorm=1.875, loss_scale=32, train_wall=11, gb_free=2.8, wall=181699 2021-06-20 21:07:15 | INFO | train_inner | epoch 006: 831 / 3002 loss=2.5, ppl=5.66, wps=5969, ups=0.09, wpb=64760, bsz=128, num_updates=15750, lr=9.9882e-05, gnorm=1.871, loss_scale=32, train_wall=10, gb_free=2.8, wall=181709 2021-06-20 21:07:26 | INFO | train_inner | epoch 006: 832 / 3002 loss=2.501, ppl=5.66, wps=5966, ups=0.09, wpb=64799, bsz=128, num_updates=15751, lr=9.9882e-05, gnorm=1.958, loss_scale=32, train_wall=10, gb_free=2.8, wall=181720 2021-06-20 21:07:37 | INFO | train_inner | epoch 006: 833 / 3002 loss=2.535, ppl=5.79, wps=5962, ups=0.09, wpb=64886, bsz=128, num_updates=15752, lr=9.9882e-05, gnorm=1.903, loss_scale=32, train_wall=10, gb_free=2.8, wall=181731 2021-06-20 21:07:48 | INFO | train_inner | epoch 006: 834 / 3002 loss=2.487, ppl=5.61, wps=5878.8, ups=0.09, wpb=64843, bsz=128, num_updates=15753, lr=9.9882e-05, gnorm=1.997, loss_scale=32, train_wall=11, gb_free=2.8, wall=181742 2021-06-20 21:07:59 | INFO | train_inner | epoch 006: 835 / 3002 loss=2.339, ppl=5.06, wps=5836.8, ups=0.09, wpb=64762, bsz=128, num_updates=15754, lr=9.9882e-05, gnorm=1.934, loss_scale=32, train_wall=11, gb_free=2.8, wall=181753 2021-06-20 21:08:10 | INFO | train_inner | epoch 006: 836 / 3002 loss=2.559, ppl=5.89, wps=5879.9, ups=0.09, wpb=64898, bsz=128, num_updates=15755, lr=9.9882e-05, gnorm=1.958, loss_scale=32, train_wall=11, gb_free=2.8, wall=181764 2021-06-20 21:08:21 | INFO | train_inner | epoch 006: 837 / 3002 loss=2.527, ppl=5.76, wps=5777, ups=0.09, wpb=64828, bsz=128, num_updates=15756, lr=9.98819e-05, gnorm=1.933, loss_scale=32, train_wall=11, gb_free=2.8, wall=181776 2021-06-20 21:08:32 | INFO | train_inner | epoch 006: 838 / 3002 loss=2.362, ppl=5.14, wps=5787.2, ups=0.09, wpb=64835, bsz=128, num_updates=15757, lr=9.98819e-05, gnorm=1.923, loss_scale=32, train_wall=11, gb_free=2.8, wall=181787 2021-06-20 21:08:43 | INFO | train_inner | epoch 006: 839 / 3002 loss=2.494, ppl=5.63, wps=5911.1, ups=0.09, wpb=64804, bsz=128, num_updates=15758, lr=9.98819e-05, gnorm=1.877, loss_scale=32, train_wall=11, gb_free=2.8, wall=181798 2021-06-20 21:08:54 | INFO | train_inner | epoch 006: 840 / 3002 loss=2.484, ppl=5.6, wps=5894.9, ups=0.09, wpb=64843, bsz=128, num_updates=15759, lr=9.98819e-05, gnorm=1.938, loss_scale=32, train_wall=11, gb_free=2.8, wall=181809 2021-06-20 21:09:05 | INFO | train_inner | epoch 006: 841 / 3002 loss=2.459, ppl=5.5, wps=5868.7, ups=0.09, wpb=64811, bsz=128, num_updates=15760, lr=9.98819e-05, gnorm=1.896, loss_scale=32, train_wall=11, gb_free=2.8, wall=181820 2021-06-20 21:09:16 | INFO | train_inner | epoch 006: 842 / 3002 loss=2.468, ppl=5.53, wps=5905.3, ups=0.09, wpb=64843, bsz=128, num_updates=15761, lr=9.98819e-05, gnorm=1.889, loss_scale=32, train_wall=10, gb_free=2.8, wall=181831 2021-06-20 21:09:27 | INFO | train_inner | epoch 006: 843 / 3002 loss=2.446, ppl=5.45, wps=5978.1, ups=0.09, wpb=64929, bsz=128, num_updates=15762, lr=9.98819e-05, gnorm=1.976, loss_scale=32, train_wall=10, gb_free=2.8, wall=181842 2021-06-20 21:09:38 | INFO | train_inner | epoch 006: 844 / 3002 loss=2.448, ppl=5.46, wps=5818.1, ups=0.09, wpb=64775, bsz=128, num_updates=15763, lr=9.98819e-05, gnorm=1.93, loss_scale=32, train_wall=11, gb_free=2.8, wall=181853 2021-06-20 21:09:50 | INFO | train_inner | epoch 006: 845 / 3002 loss=2.593, ppl=6.03, wps=5736.3, ups=0.09, wpb=64848, bsz=128, num_updates=15764, lr=9.98819e-05, gnorm=1.933, loss_scale=32, train_wall=11, gb_free=2.8, wall=181864 2021-06-20 21:10:01 | INFO | train_inner | epoch 006: 846 / 3002 loss=2.459, ppl=5.5, wps=5852, ups=0.09, wpb=64831, bsz=128, num_updates=15765, lr=9.98819e-05, gnorm=1.91, loss_scale=32, train_wall=11, gb_free=2.8, wall=181875 2021-06-20 21:10:12 | INFO | train_inner | epoch 006: 847 / 3002 loss=2.397, ppl=5.27, wps=5856.9, ups=0.09, wpb=64827, bsz=128, num_updates=15766, lr=9.98819e-05, gnorm=2.044, loss_scale=32, train_wall=11, gb_free=2.8, wall=181886 2021-06-20 21:10:23 | INFO | train_inner | epoch 006: 848 / 3002 loss=2.538, ppl=5.81, wps=5853.2, ups=0.09, wpb=64812, bsz=128, num_updates=15767, lr=9.98819e-05, gnorm=1.92, loss_scale=32, train_wall=11, gb_free=2.8, wall=181897 2021-06-20 21:10:34 | INFO | train_inner | epoch 006: 849 / 3002 loss=2.566, ppl=5.92, wps=5823.8, ups=0.09, wpb=64877, bsz=128, num_updates=15768, lr=9.98818e-05, gnorm=1.992, loss_scale=32, train_wall=11, gb_free=2.8, wall=181908 2021-06-20 21:10:45 | INFO | train_inner | epoch 006: 850 / 3002 loss=2.499, ppl=5.65, wps=5849.4, ups=0.09, wpb=64730, bsz=128, num_updates=15769, lr=9.98818e-05, gnorm=1.929, loss_scale=32, train_wall=11, gb_free=2.8, wall=181919 2021-06-20 21:10:56 | INFO | train_inner | epoch 006: 851 / 3002 loss=2.369, ppl=5.17, wps=5855.5, ups=0.09, wpb=64826, bsz=128, num_updates=15770, lr=9.98818e-05, gnorm=2.03, loss_scale=32, train_wall=11, gb_free=2.8, wall=181931 2021-06-20 21:11:07 | INFO | train_inner | epoch 006: 852 / 3002 loss=2.602, ppl=6.07, wps=5968.9, ups=0.09, wpb=64896, bsz=128, num_updates=15771, lr=9.98818e-05, gnorm=1.91, loss_scale=32, train_wall=10, gb_free=2.8, wall=181941 2021-06-20 21:11:18 | INFO | train_inner | epoch 006: 853 / 3002 loss=2.485, ppl=5.6, wps=5752.3, ups=0.09, wpb=64841, bsz=128, num_updates=15772, lr=9.98818e-05, gnorm=1.986, loss_scale=32, train_wall=11, gb_free=2.8, wall=181953 2021-06-20 21:11:29 | INFO | train_inner | epoch 006: 854 / 3002 loss=2.424, ppl=5.37, wps=5901.1, ups=0.09, wpb=64857, bsz=128, num_updates=15773, lr=9.98818e-05, gnorm=1.852, loss_scale=32, train_wall=11, gb_free=2.8, wall=181964 2021-06-20 21:11:40 | INFO | train_inner | epoch 006: 855 / 3002 loss=2.386, ppl=5.23, wps=5858.1, ups=0.09, wpb=64826, bsz=128, num_updates=15774, lr=9.98818e-05, gnorm=1.879, loss_scale=32, train_wall=11, gb_free=2.8, wall=181975 2021-06-20 21:11:51 | INFO | train_inner | epoch 006: 856 / 3002 loss=2.446, ppl=5.45, wps=5915.7, ups=0.09, wpb=64802, bsz=128, num_updates=15775, lr=9.98818e-05, gnorm=1.917, loss_scale=32, train_wall=10, gb_free=2.8, wall=181986 2021-06-20 21:12:02 | INFO | train_inner | epoch 006: 857 / 3002 loss=2.626, ppl=6.17, wps=5818.7, ups=0.09, wpb=64833, bsz=128, num_updates=15776, lr=9.98818e-05, gnorm=1.869, loss_scale=32, train_wall=11, gb_free=2.8, wall=181997 2021-06-20 21:12:14 | INFO | train_inner | epoch 006: 858 / 3002 loss=2.339, ppl=5.06, wps=5838.3, ups=0.09, wpb=64854, bsz=128, num_updates=15777, lr=9.98818e-05, gnorm=1.813, loss_scale=32, train_wall=11, gb_free=2.8, wall=182008 2021-06-20 21:12:25 | INFO | train_inner | epoch 006: 859 / 3002 loss=2.342, ppl=5.07, wps=5899.9, ups=0.09, wpb=64920, bsz=128, num_updates=15778, lr=9.98818e-05, gnorm=1.859, loss_scale=32, train_wall=11, gb_free=2.8, wall=182019 2021-06-20 21:12:36 | INFO | train_inner | epoch 006: 860 / 3002 loss=2.623, ppl=6.16, wps=5926.3, ups=0.09, wpb=64875, bsz=128, num_updates=15779, lr=9.98818e-05, gnorm=1.931, loss_scale=32, train_wall=10, gb_free=2.8, wall=182030 2021-06-20 21:12:47 | INFO | train_inner | epoch 006: 861 / 3002 loss=2.603, ppl=6.08, wps=5858.3, ups=0.09, wpb=64786, bsz=128, num_updates=15780, lr=9.98818e-05, gnorm=1.985, loss_scale=32, train_wall=11, gb_free=2.8, wall=182041 2021-06-20 21:12:58 | INFO | train_inner | epoch 006: 862 / 3002 loss=2.53, ppl=5.77, wps=5810.7, ups=0.09, wpb=64756, bsz=128, num_updates=15781, lr=9.98817e-05, gnorm=1.89, loss_scale=32, train_wall=11, gb_free=2.8, wall=182052 2021-06-20 21:13:09 | INFO | train_inner | epoch 006: 863 / 3002 loss=2.376, ppl=5.19, wps=5716.9, ups=0.09, wpb=64839, bsz=128, num_updates=15782, lr=9.98817e-05, gnorm=2.16, loss_scale=32, train_wall=11, gb_free=2.8, wall=182063 2021-06-20 21:13:20 | INFO | train_inner | epoch 006: 864 / 3002 loss=2.404, ppl=5.29, wps=5907, ups=0.09, wpb=64888, bsz=128, num_updates=15783, lr=9.98817e-05, gnorm=1.86, loss_scale=32, train_wall=11, gb_free=2.8, wall=182074 2021-06-20 21:13:31 | INFO | train_inner | epoch 006: 865 / 3002 loss=2.36, ppl=5.13, wps=5852.9, ups=0.09, wpb=64765, bsz=128, num_updates=15784, lr=9.98817e-05, gnorm=1.867, loss_scale=32, train_wall=11, gb_free=2.8, wall=182085 2021-06-20 21:13:42 | INFO | train_inner | epoch 006: 866 / 3002 loss=2.504, ppl=5.67, wps=5862.1, ups=0.09, wpb=64877, bsz=128, num_updates=15785, lr=9.98817e-05, gnorm=1.97, loss_scale=32, train_wall=11, gb_free=2.8, wall=182097 2021-06-20 21:13:53 | INFO | train_inner | epoch 006: 867 / 3002 loss=2.561, ppl=5.9, wps=5846.1, ups=0.09, wpb=64812, bsz=128, num_updates=15786, lr=9.98817e-05, gnorm=1.919, loss_scale=32, train_wall=11, gb_free=2.8, wall=182108 2021-06-20 21:14:04 | INFO | train_inner | epoch 006: 868 / 3002 loss=2.193, ppl=4.57, wps=5885.6, ups=0.09, wpb=64801, bsz=128, num_updates=15787, lr=9.98817e-05, gnorm=1.912, loss_scale=32, train_wall=11, gb_free=2.8, wall=182119 2021-06-20 21:14:15 | INFO | train_inner | epoch 006: 869 / 3002 loss=2.4, ppl=5.28, wps=5948, ups=0.09, wpb=64795, bsz=128, num_updates=15788, lr=9.98817e-05, gnorm=1.852, loss_scale=32, train_wall=10, gb_free=2.8, wall=182130 2021-06-20 21:14:26 | INFO | train_inner | epoch 006: 870 / 3002 loss=2.439, ppl=5.42, wps=5842.5, ups=0.09, wpb=64840, bsz=128, num_updates=15789, lr=9.98817e-05, gnorm=1.967, loss_scale=32, train_wall=11, gb_free=2.8, wall=182141 2021-06-20 21:14:37 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 2021-06-20 21:14:48 | INFO | train_inner | epoch 006: 872 / 3002 loss=2.549, ppl=5.85, wps=2940.5, ups=0.05, wpb=64829, bsz=128, num_updates=15790, lr=9.98817e-05, gnorm=1.973, loss_scale=16, train_wall=21, gb_free=2.8, wall=182163 2021-06-20 21:14:59 | INFO | train_inner | epoch 006: 873 / 3002 loss=2.371, ppl=5.17, wps=5842.4, ups=0.09, wpb=64836, bsz=128, num_updates=15791, lr=9.98817e-05, gnorm=1.872, loss_scale=16, train_wall=11, gb_free=2.8, wall=182174 2021-06-20 21:15:11 | INFO | train_inner | epoch 006: 874 / 3002 loss=2.469, ppl=5.54, wps=5869, ups=0.09, wpb=64919, bsz=128, num_updates=15792, lr=9.98817e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=182185 2021-06-20 21:15:21 | INFO | train_inner | epoch 006: 875 / 3002 loss=2.421, ppl=5.36, wps=5910.9, ups=0.09, wpb=64814, bsz=128, num_updates=15793, lr=9.98816e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=182196 2021-06-20 21:15:32 | INFO | train_inner | epoch 006: 876 / 3002 loss=2.468, ppl=5.53, wps=5959.3, ups=0.09, wpb=64807, bsz=128, num_updates=15794, lr=9.98816e-05, gnorm=1.938, loss_scale=16, train_wall=10, gb_free=2.8, wall=182207 2021-06-20 21:15:44 | INFO | train_inner | epoch 006: 877 / 3002 loss=2.399, ppl=5.27, wps=5757, ups=0.09, wpb=64896, bsz=128, num_updates=15795, lr=9.98816e-05, gnorm=1.946, loss_scale=16, train_wall=11, gb_free=2.8, wall=182218 2021-06-20 21:15:55 | INFO | train_inner | epoch 006: 878 / 3002 loss=2.643, ppl=6.25, wps=5840, ups=0.09, wpb=64819, bsz=128, num_updates=15796, lr=9.98816e-05, gnorm=1.994, loss_scale=16, train_wall=11, gb_free=2.8, wall=182229 2021-06-20 21:16:06 | INFO | train_inner | epoch 006: 879 / 3002 loss=2.355, ppl=5.12, wps=5989.2, ups=0.09, wpb=64843, bsz=128, num_updates=15797, lr=9.98816e-05, gnorm=1.914, loss_scale=16, train_wall=10, gb_free=2.8, wall=182240 2021-06-20 21:16:17 | INFO | train_inner | epoch 006: 880 / 3002 loss=2.574, ppl=5.96, wps=5719, ups=0.09, wpb=64801, bsz=128, num_updates=15798, lr=9.98816e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=182251 2021-06-20 21:16:28 | INFO | train_inner | epoch 006: 881 / 3002 loss=2.529, ppl=5.77, wps=5893.6, ups=0.09, wpb=64802, bsz=128, num_updates=15799, lr=9.98816e-05, gnorm=1.916, loss_scale=16, train_wall=11, gb_free=2.8, wall=182262 2021-06-20 21:16:39 | INFO | train_inner | epoch 006: 882 / 3002 loss=2.432, ppl=5.4, wps=5822.8, ups=0.09, wpb=64925, bsz=128, num_updates=15800, lr=9.98816e-05, gnorm=1.917, loss_scale=16, train_wall=11, gb_free=2.8, wall=182273 2021-06-20 21:16:50 | INFO | train_inner | epoch 006: 883 / 3002 loss=2.296, ppl=4.91, wps=5755.6, ups=0.09, wpb=64831, bsz=128, num_updates=15801, lr=9.98816e-05, gnorm=1.873, loss_scale=16, train_wall=11, gb_free=2.8, wall=182285 2021-06-20 21:17:01 | INFO | train_inner | epoch 006: 884 / 3002 loss=2.456, ppl=5.49, wps=5877.8, ups=0.09, wpb=64793, bsz=128, num_updates=15802, lr=9.98816e-05, gnorm=1.913, loss_scale=16, train_wall=11, gb_free=2.8, wall=182296 2021-06-20 21:17:13 | INFO | train_inner | epoch 006: 885 / 3002 loss=2.366, ppl=5.16, wps=5768.5, ups=0.09, wpb=64860, bsz=128, num_updates=15803, lr=9.98816e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=182307 2021-06-20 21:17:24 | INFO | train_inner | epoch 006: 886 / 3002 loss=2.56, ppl=5.9, wps=5695.2, ups=0.09, wpb=64738, bsz=128, num_updates=15804, lr=9.98816e-05, gnorm=1.975, loss_scale=16, train_wall=11, gb_free=2.8, wall=182318 2021-06-20 21:17:35 | INFO | train_inner | epoch 006: 887 / 3002 loss=2.464, ppl=5.52, wps=5930.9, ups=0.09, wpb=64828, bsz=128, num_updates=15805, lr=9.98816e-05, gnorm=1.935, loss_scale=16, train_wall=10, gb_free=2.8, wall=182329 2021-06-20 21:17:46 | INFO | train_inner | epoch 006: 888 / 3002 loss=2.464, ppl=5.52, wps=5865.2, ups=0.09, wpb=64804, bsz=128, num_updates=15806, lr=9.98815e-05, gnorm=1.83, loss_scale=16, train_wall=11, gb_free=2.8, wall=182340 2021-06-20 21:17:57 | INFO | train_inner | epoch 006: 889 / 3002 loss=2.438, ppl=5.42, wps=5815.6, ups=0.09, wpb=64915, bsz=128, num_updates=15807, lr=9.98815e-05, gnorm=1.929, loss_scale=16, train_wall=11, gb_free=2.8, wall=182351 2021-06-20 21:18:08 | INFO | train_inner | epoch 006: 890 / 3002 loss=2.546, ppl=5.84, wps=5913.3, ups=0.09, wpb=64896, bsz=128, num_updates=15808, lr=9.98815e-05, gnorm=1.891, loss_scale=16, train_wall=11, gb_free=2.8, wall=182362 2021-06-20 21:18:19 | INFO | train_inner | epoch 006: 891 / 3002 loss=2.551, ppl=5.86, wps=5968.3, ups=0.09, wpb=64844, bsz=128, num_updates=15809, lr=9.98815e-05, gnorm=1.926, loss_scale=16, train_wall=10, gb_free=2.8, wall=182373 2021-06-20 21:18:30 | INFO | train_inner | epoch 006: 892 / 3002 loss=2.575, ppl=5.96, wps=5897.5, ups=0.09, wpb=64853, bsz=128, num_updates=15810, lr=9.98815e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=182384 2021-06-20 21:18:41 | INFO | train_inner | epoch 006: 893 / 3002 loss=2.402, ppl=5.28, wps=5764.3, ups=0.09, wpb=64743, bsz=128, num_updates=15811, lr=9.98815e-05, gnorm=1.982, loss_scale=16, train_wall=11, gb_free=2.8, wall=182395 2021-06-20 21:18:52 | INFO | train_inner | epoch 006: 894 / 3002 loss=2.411, ppl=5.32, wps=5840.6, ups=0.09, wpb=64876, bsz=128, num_updates=15812, lr=9.98815e-05, gnorm=1.899, loss_scale=16, train_wall=11, gb_free=2.8, wall=182407 2021-06-20 21:19:03 | INFO | train_inner | epoch 006: 895 / 3002 loss=2.409, ppl=5.31, wps=5797.6, ups=0.09, wpb=64802, bsz=128, num_updates=15813, lr=9.98815e-05, gnorm=2.007, loss_scale=16, train_wall=11, gb_free=2.8, wall=182418 2021-06-20 21:19:14 | INFO | train_inner | epoch 006: 896 / 3002 loss=2.429, ppl=5.38, wps=5950.3, ups=0.09, wpb=64792, bsz=128, num_updates=15814, lr=9.98815e-05, gnorm=1.957, loss_scale=16, train_wall=10, gb_free=2.8, wall=182429 2021-06-20 21:19:25 | INFO | train_inner | epoch 006: 897 / 3002 loss=2.565, ppl=5.92, wps=5870.1, ups=0.09, wpb=64792, bsz=128, num_updates=15815, lr=9.98815e-05, gnorm=1.87, loss_scale=16, train_wall=11, gb_free=2.8, wall=182440 2021-06-20 21:19:37 | INFO | train_inner | epoch 006: 898 / 3002 loss=2.572, ppl=5.94, wps=5809.8, ups=0.09, wpb=64852, bsz=128, num_updates=15816, lr=9.98815e-05, gnorm=2.011, loss_scale=16, train_wall=11, gb_free=2.8, wall=182451 2021-06-20 21:19:48 | INFO | train_inner | epoch 006: 899 / 3002 loss=2.612, ppl=6.11, wps=5759.7, ups=0.09, wpb=64789, bsz=128, num_updates=15817, lr=9.98815e-05, gnorm=2.022, loss_scale=16, train_wall=11, gb_free=2.8, wall=182462 2021-06-20 21:19:59 | INFO | train_inner | epoch 006: 900 / 3002 loss=2.252, ppl=4.76, wps=5874.8, ups=0.09, wpb=64860, bsz=128, num_updates=15818, lr=9.98814e-05, gnorm=2.006, loss_scale=16, train_wall=11, gb_free=2.8, wall=182473 2021-06-20 21:20:10 | INFO | train_inner | epoch 006: 901 / 3002 loss=2.442, ppl=5.43, wps=5895.1, ups=0.09, wpb=64852, bsz=128, num_updates=15819, lr=9.98814e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=182484 2021-06-20 21:20:21 | INFO | train_inner | epoch 006: 902 / 3002 loss=2.557, ppl=5.89, wps=5866.4, ups=0.09, wpb=64825, bsz=128, num_updates=15820, lr=9.98814e-05, gnorm=1.892, loss_scale=16, train_wall=11, gb_free=2.8, wall=182495 2021-06-20 21:20:32 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 21:20:43 | INFO | train_inner | epoch 006: 904 / 3002 loss=2.495, ppl=5.64, wps=2970.2, ups=0.05, wpb=64845, bsz=128, num_updates=15821, lr=9.98814e-05, gnorm=1.844, loss_scale=8, train_wall=21, gb_free=2.8, wall=182517 2021-06-20 21:20:54 | INFO | train_inner | epoch 006: 905 / 3002 loss=2.585, ppl=6, wps=5866.7, ups=0.09, wpb=64746, bsz=128, num_updates=15822, lr=9.98814e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=182528 2021-06-20 21:21:05 | INFO | train_inner | epoch 006: 906 / 3002 loss=2.398, ppl=5.27, wps=5844.4, ups=0.09, wpb=64836, bsz=128, num_updates=15823, lr=9.98814e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=182539 2021-06-20 21:21:16 | INFO | train_inner | epoch 006: 907 / 3002 loss=2.427, ppl=5.38, wps=5775.5, ups=0.09, wpb=64784, bsz=128, num_updates=15824, lr=9.98814e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=182550 2021-06-20 21:21:27 | INFO | train_inner | epoch 006: 908 / 3002 loss=2.635, ppl=6.21, wps=5787.3, ups=0.09, wpb=64791, bsz=128, num_updates=15825, lr=9.98814e-05, gnorm=1.891, loss_scale=8, train_wall=11, gb_free=2.8, wall=182562 2021-06-20 21:21:39 | INFO | train_inner | epoch 006: 909 / 3002 loss=2.329, ppl=5.03, wps=5715.8, ups=0.09, wpb=64751, bsz=128, num_updates=15826, lr=9.98814e-05, gnorm=1.882, loss_scale=8, train_wall=11, gb_free=2.8, wall=182573 2021-06-20 21:21:50 | INFO | train_inner | epoch 006: 910 / 3002 loss=2.468, ppl=5.53, wps=5840.6, ups=0.09, wpb=64819, bsz=128, num_updates=15827, lr=9.98814e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=182584 2021-06-20 21:22:01 | INFO | train_inner | epoch 006: 911 / 3002 loss=2.395, ppl=5.26, wps=5847.6, ups=0.09, wpb=64812, bsz=128, num_updates=15828, lr=9.98814e-05, gnorm=1.869, loss_scale=8, train_wall=11, gb_free=2.8, wall=182595 2021-06-20 21:22:12 | INFO | train_inner | epoch 006: 912 / 3002 loss=2.438, ppl=5.42, wps=5812, ups=0.09, wpb=64737, bsz=128, num_updates=15829, lr=9.98814e-05, gnorm=1.857, loss_scale=8, train_wall=11, gb_free=2.8, wall=182606 2021-06-20 21:22:23 | INFO | train_inner | epoch 006: 913 / 3002 loss=2.538, ppl=5.81, wps=5917.5, ups=0.09, wpb=64829, bsz=128, num_updates=15830, lr=9.98814e-05, gnorm=2.025, loss_scale=8, train_wall=11, gb_free=2.8, wall=182617 2021-06-20 21:22:34 | INFO | train_inner | epoch 006: 914 / 3002 loss=2.43, ppl=5.39, wps=5823.1, ups=0.09, wpb=64842, bsz=128, num_updates=15831, lr=9.98813e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=182628 2021-06-20 21:22:45 | INFO | train_inner | epoch 006: 915 / 3002 loss=2.47, ppl=5.54, wps=5842.5, ups=0.09, wpb=64824, bsz=128, num_updates=15832, lr=9.98813e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=182639 2021-06-20 21:22:56 | INFO | train_inner | epoch 006: 916 / 3002 loss=2.249, ppl=4.75, wps=5863.1, ups=0.09, wpb=64893, bsz=128, num_updates=15833, lr=9.98813e-05, gnorm=1.892, loss_scale=8, train_wall=11, gb_free=2.8, wall=182650 2021-06-20 21:23:07 | INFO | train_inner | epoch 006: 917 / 3002 loss=2.487, ppl=5.61, wps=5745.1, ups=0.09, wpb=64801, bsz=128, num_updates=15834, lr=9.98813e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=182662 2021-06-20 21:23:18 | INFO | train_inner | epoch 006: 918 / 3002 loss=2.443, ppl=5.44, wps=5886.9, ups=0.09, wpb=64881, bsz=128, num_updates=15835, lr=9.98813e-05, gnorm=1.833, loss_scale=8, train_wall=11, gb_free=2.8, wall=182673 2021-06-20 21:23:29 | INFO | train_inner | epoch 006: 919 / 3002 loss=2.485, ppl=5.6, wps=5887.2, ups=0.09, wpb=64768, bsz=128, num_updates=15836, lr=9.98813e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=182684 2021-06-20 21:23:41 | INFO | train_inner | epoch 006: 920 / 3002 loss=2.466, ppl=5.53, wps=5837.9, ups=0.09, wpb=64844, bsz=128, num_updates=15837, lr=9.98813e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=182695 2021-06-20 21:23:51 | INFO | train_inner | epoch 006: 921 / 3002 loss=2.362, ppl=5.14, wps=5920.7, ups=0.09, wpb=64808, bsz=128, num_updates=15838, lr=9.98813e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=182706 2021-06-20 21:24:03 | INFO | train_inner | epoch 006: 922 / 3002 loss=2.429, ppl=5.39, wps=5752.9, ups=0.09, wpb=64820, bsz=128, num_updates=15839, lr=9.98813e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=182717 2021-06-20 21:24:14 | INFO | train_inner | epoch 006: 923 / 3002 loss=2.306, ppl=4.95, wps=5854.8, ups=0.09, wpb=64857, bsz=128, num_updates=15840, lr=9.98813e-05, gnorm=1.831, loss_scale=8, train_wall=11, gb_free=2.8, wall=182728 2021-06-20 21:24:25 | INFO | train_inner | epoch 006: 924 / 3002 loss=2.476, ppl=5.56, wps=5785.5, ups=0.09, wpb=64791, bsz=128, num_updates=15841, lr=9.98813e-05, gnorm=2.691, loss_scale=8, train_wall=11, gb_free=2.8, wall=182739 2021-06-20 21:24:36 | INFO | train_inner | epoch 006: 925 / 3002 loss=2.442, ppl=5.43, wps=5831.9, ups=0.09, wpb=64862, bsz=128, num_updates=15842, lr=9.98813e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=182750 2021-06-20 21:24:47 | INFO | train_inner | epoch 006: 926 / 3002 loss=2.4, ppl=5.28, wps=5745.8, ups=0.09, wpb=64767, bsz=128, num_updates=15843, lr=9.98812e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=182762 2021-06-20 21:24:58 | INFO | train_inner | epoch 006: 927 / 3002 loss=2.444, ppl=5.44, wps=5863.5, ups=0.09, wpb=64843, bsz=128, num_updates=15844, lr=9.98812e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=182773 2021-06-20 21:25:10 | INFO | train_inner | epoch 006: 928 / 3002 loss=2.483, ppl=5.59, wps=5878.6, ups=0.09, wpb=64884, bsz=128, num_updates=15845, lr=9.98812e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=182784 2021-06-20 21:25:21 | INFO | train_inner | epoch 006: 929 / 3002 loss=2.388, ppl=5.24, wps=5902.4, ups=0.09, wpb=64830, bsz=128, num_updates=15846, lr=9.98812e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=182795 2021-06-20 21:25:32 | INFO | train_inner | epoch 006: 930 / 3002 loss=2.394, ppl=5.26, wps=5846.4, ups=0.09, wpb=64816, bsz=128, num_updates=15847, lr=9.98812e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=182806 2021-06-20 21:25:42 | INFO | train_inner | epoch 006: 931 / 3002 loss=2.39, ppl=5.24, wps=5961.7, ups=0.09, wpb=64873, bsz=128, num_updates=15848, lr=9.98812e-05, gnorm=1.935, loss_scale=8, train_wall=10, gb_free=2.8, wall=182817 2021-06-20 21:25:53 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-20 21:26:04 | INFO | train_inner | epoch 006: 933 / 3002 loss=2.544, ppl=5.83, wps=2958.5, ups=0.05, wpb=64810, bsz=128, num_updates=15849, lr=9.98812e-05, gnorm=1.937, loss_scale=4, train_wall=21, gb_free=2.8, wall=182839 2021-06-20 21:26:16 | INFO | train_inner | epoch 006: 934 / 3002 loss=2.374, ppl=5.18, wps=5748.3, ups=0.09, wpb=64771, bsz=128, num_updates=15850, lr=9.98812e-05, gnorm=1.888, loss_scale=4, train_wall=11, gb_free=2.8, wall=182850 2021-06-20 21:26:27 | INFO | train_inner | epoch 006: 935 / 3002 loss=2.452, ppl=5.47, wps=5886.2, ups=0.09, wpb=64940, bsz=128, num_updates=15851, lr=9.98812e-05, gnorm=1.875, loss_scale=4, train_wall=11, gb_free=2.8, wall=182861 2021-06-20 21:26:38 | INFO | train_inner | epoch 006: 936 / 3002 loss=2.554, ppl=5.87, wps=5876, ups=0.09, wpb=64843, bsz=128, num_updates=15852, lr=9.98812e-05, gnorm=1.959, loss_scale=4, train_wall=11, gb_free=2.8, wall=182872 2021-06-20 21:26:49 | INFO | train_inner | epoch 006: 937 / 3002 loss=2.419, ppl=5.35, wps=5769.1, ups=0.09, wpb=64838, bsz=128, num_updates=15853, lr=9.98812e-05, gnorm=1.87, loss_scale=4, train_wall=11, gb_free=2.8, wall=182883 2021-06-20 21:27:00 | INFO | train_inner | epoch 006: 938 / 3002 loss=2.462, ppl=5.51, wps=5933.6, ups=0.09, wpb=64860, bsz=128, num_updates=15854, lr=9.98812e-05, gnorm=1.901, loss_scale=4, train_wall=10, gb_free=2.8, wall=182894 2021-06-20 21:27:11 | INFO | train_inner | epoch 006: 939 / 3002 loss=2.315, ppl=4.98, wps=5870.5, ups=0.09, wpb=64857, bsz=128, num_updates=15855, lr=9.98812e-05, gnorm=1.914, loss_scale=4, train_wall=11, gb_free=2.8, wall=182905 2021-06-20 21:27:22 | INFO | train_inner | epoch 006: 940 / 3002 loss=2.401, ppl=5.28, wps=5953.3, ups=0.09, wpb=64814, bsz=128, num_updates=15856, lr=9.98811e-05, gnorm=1.945, loss_scale=4, train_wall=10, gb_free=2.8, wall=182916 2021-06-20 21:27:33 | INFO | train_inner | epoch 006: 941 / 3002 loss=2.308, ppl=4.95, wps=5995.6, ups=0.09, wpb=64850, bsz=128, num_updates=15857, lr=9.98811e-05, gnorm=1.902, loss_scale=4, train_wall=10, gb_free=2.8, wall=182927 2021-06-20 21:27:44 | INFO | train_inner | epoch 006: 942 / 3002 loss=2.613, ppl=6.12, wps=5817.1, ups=0.09, wpb=64846, bsz=128, num_updates=15858, lr=9.98811e-05, gnorm=1.914, loss_scale=4, train_wall=11, gb_free=2.8, wall=182938 2021-06-20 21:27:55 | INFO | train_inner | epoch 006: 943 / 3002 loss=2.632, ppl=6.2, wps=5913.1, ups=0.09, wpb=64857, bsz=128, num_updates=15859, lr=9.98811e-05, gnorm=1.927, loss_scale=4, train_wall=11, gb_free=2.8, wall=182949 2021-06-20 21:28:06 | INFO | train_inner | epoch 006: 944 / 3002 loss=2.53, ppl=5.78, wps=5748.9, ups=0.09, wpb=64801, bsz=128, num_updates=15860, lr=9.98811e-05, gnorm=2.115, loss_scale=4, train_wall=11, gb_free=2.8, wall=182960 2021-06-20 21:28:17 | INFO | train_inner | epoch 006: 945 / 3002 loss=2.508, ppl=5.69, wps=5910.6, ups=0.09, wpb=64836, bsz=128, num_updates=15861, lr=9.98811e-05, gnorm=1.936, loss_scale=4, train_wall=10, gb_free=2.8, wall=182971 2021-06-20 21:28:28 | INFO | train_inner | epoch 006: 946 / 3002 loss=2.581, ppl=5.98, wps=5824.1, ups=0.09, wpb=64829, bsz=128, num_updates=15862, lr=9.98811e-05, gnorm=1.872, loss_scale=4, train_wall=11, gb_free=2.8, wall=182982 2021-06-20 21:28:39 | INFO | train_inner | epoch 006: 947 / 3002 loss=2.458, ppl=5.49, wps=5796.3, ups=0.09, wpb=64855, bsz=128, num_updates=15863, lr=9.98811e-05, gnorm=1.89, loss_scale=4, train_wall=11, gb_free=2.8, wall=182994 2021-06-20 21:28:50 | INFO | train_inner | epoch 006: 948 / 3002 loss=2.525, ppl=5.76, wps=5812.2, ups=0.09, wpb=64784, bsz=128, num_updates=15864, lr=9.98811e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=183005 2021-06-20 21:29:01 | INFO | train_inner | epoch 006: 949 / 3002 loss=2.452, ppl=5.47, wps=5927, ups=0.09, wpb=64816, bsz=128, num_updates=15865, lr=9.98811e-05, gnorm=1.951, loss_scale=4, train_wall=11, gb_free=2.8, wall=183016 2021-06-20 21:29:12 | INFO | train_inner | epoch 006: 950 / 3002 loss=2.536, ppl=5.8, wps=5953.9, ups=0.09, wpb=64884, bsz=128, num_updates=15866, lr=9.98811e-05, gnorm=1.982, loss_scale=4, train_wall=10, gb_free=2.8, wall=183027 2021-06-20 21:29:23 | INFO | train_inner | epoch 006: 951 / 3002 loss=2.575, ppl=5.96, wps=5912.6, ups=0.09, wpb=64780, bsz=128, num_updates=15867, lr=9.98811e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=183038 2021-06-20 21:29:34 | INFO | train_inner | epoch 006: 952 / 3002 loss=2.59, ppl=6.02, wps=5786.7, ups=0.09, wpb=64815, bsz=128, num_updates=15868, lr=9.9881e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=183049 2021-06-20 21:29:46 | INFO | train_inner | epoch 006: 953 / 3002 loss=2.411, ppl=5.32, wps=5837.8, ups=0.09, wpb=64718, bsz=128, num_updates=15869, lr=9.9881e-05, gnorm=1.839, loss_scale=4, train_wall=11, gb_free=2.8, wall=183060 2021-06-20 21:29:57 | INFO | train_inner | epoch 006: 954 / 3002 loss=2.473, ppl=5.55, wps=5832.6, ups=0.09, wpb=64820, bsz=128, num_updates=15870, lr=9.9881e-05, gnorm=1.968, loss_scale=4, train_wall=11, gb_free=2.8, wall=183071 2021-06-20 21:30:08 | INFO | train_inner | epoch 006: 955 / 3002 loss=2.523, ppl=5.75, wps=5716.2, ups=0.09, wpb=64831, bsz=128, num_updates=15871, lr=9.9881e-05, gnorm=1.922, loss_scale=4, train_wall=11, gb_free=2.8, wall=183082 2021-06-20 21:30:19 | INFO | train_inner | epoch 006: 956 / 3002 loss=2.48, ppl=5.58, wps=5897.4, ups=0.09, wpb=64829, bsz=128, num_updates=15872, lr=9.9881e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=183093 2021-06-20 21:30:30 | INFO | train_inner | epoch 006: 957 / 3002 loss=2.604, ppl=6.08, wps=5919, ups=0.09, wpb=64841, bsz=128, num_updates=15873, lr=9.9881e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=183104 2021-06-20 21:30:41 | INFO | train_inner | epoch 006: 958 / 3002 loss=2.509, ppl=5.69, wps=5831.9, ups=0.09, wpb=64787, bsz=128, num_updates=15874, lr=9.9881e-05, gnorm=1.92, loss_scale=4, train_wall=11, gb_free=2.8, wall=183115 2021-06-20 21:30:52 | INFO | train_inner | epoch 006: 959 / 3002 loss=2.558, ppl=5.89, wps=5859.5, ups=0.09, wpb=64824, bsz=128, num_updates=15875, lr=9.9881e-05, gnorm=1.938, loss_scale=4, train_wall=11, gb_free=2.8, wall=183126 2021-06-20 21:31:03 | INFO | train_inner | epoch 006: 960 / 3002 loss=2.557, ppl=5.88, wps=5883.8, ups=0.09, wpb=64816, bsz=128, num_updates=15876, lr=9.9881e-05, gnorm=1.977, loss_scale=4, train_wall=11, gb_free=2.8, wall=183137 2021-06-20 21:31:14 | INFO | train_inner | epoch 006: 961 / 3002 loss=2.65, ppl=6.28, wps=5803.9, ups=0.09, wpb=64800, bsz=128, num_updates=15877, lr=9.9881e-05, gnorm=1.887, loss_scale=4, train_wall=11, gb_free=2.8, wall=183149 2021-06-20 21:31:25 | INFO | train_inner | epoch 006: 962 / 3002 loss=2.556, ppl=5.88, wps=5832.3, ups=0.09, wpb=64683, bsz=128, num_updates=15878, lr=9.9881e-05, gnorm=1.966, loss_scale=4, train_wall=11, gb_free=2.8, wall=183160 2021-06-20 21:31:36 | INFO | train_inner | epoch 006: 963 / 3002 loss=2.346, ppl=5.08, wps=5904.1, ups=0.09, wpb=64897, bsz=128, num_updates=15879, lr=9.9881e-05, gnorm=1.862, loss_scale=4, train_wall=11, gb_free=2.8, wall=183171 2021-06-20 21:31:47 | INFO | train_inner | epoch 006: 964 / 3002 loss=2.49, ppl=5.62, wps=5873.3, ups=0.09, wpb=64819, bsz=128, num_updates=15880, lr=9.9881e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=183182 2021-06-20 21:31:58 | INFO | train_inner | epoch 006: 965 / 3002 loss=2.479, ppl=5.58, wps=5875.6, ups=0.09, wpb=64774, bsz=128, num_updates=15881, lr=9.98809e-05, gnorm=1.82, loss_scale=4, train_wall=11, gb_free=2.8, wall=183193 2021-06-20 21:32:09 | INFO | train_inner | epoch 006: 966 / 3002 loss=2.491, ppl=5.62, wps=5871.9, ups=0.09, wpb=64800, bsz=128, num_updates=15882, lr=9.98809e-05, gnorm=1.949, loss_scale=4, train_wall=11, gb_free=2.8, wall=183204 2021-06-20 21:32:21 | INFO | train_inner | epoch 006: 967 / 3002 loss=2.387, ppl=5.23, wps=5827, ups=0.09, wpb=64840, bsz=128, num_updates=15883, lr=9.98809e-05, gnorm=1.875, loss_scale=4, train_wall=11, gb_free=2.8, wall=183215 2021-06-20 21:32:32 | INFO | train_inner | epoch 006: 968 / 3002 loss=2.517, ppl=5.72, wps=5922.2, ups=0.09, wpb=64910, bsz=128, num_updates=15884, lr=9.98809e-05, gnorm=1.975, loss_scale=4, train_wall=10, gb_free=2.8, wall=183226 2021-06-20 21:32:43 | INFO | train_inner | epoch 006: 969 / 3002 loss=2.57, ppl=5.94, wps=5899.5, ups=0.09, wpb=64815, bsz=128, num_updates=15885, lr=9.98809e-05, gnorm=1.934, loss_scale=4, train_wall=11, gb_free=2.8, wall=183237 2021-06-20 21:32:54 | INFO | train_inner | epoch 006: 970 / 3002 loss=2.637, ppl=6.22, wps=5921.8, ups=0.09, wpb=64878, bsz=128, num_updates=15886, lr=9.98809e-05, gnorm=1.945, loss_scale=4, train_wall=10, gb_free=2.8, wall=183248 2021-06-20 21:33:05 | INFO | train_inner | epoch 006: 971 / 3002 loss=2.476, ppl=5.56, wps=5736.9, ups=0.09, wpb=64735, bsz=128, num_updates=15887, lr=9.98809e-05, gnorm=2.544, loss_scale=4, train_wall=11, gb_free=2.8, wall=183259 2021-06-20 21:33:16 | INFO | train_inner | epoch 006: 972 / 3002 loss=2.567, ppl=5.93, wps=5898, ups=0.09, wpb=64820, bsz=128, num_updates=15888, lr=9.98809e-05, gnorm=1.892, loss_scale=4, train_wall=11, gb_free=2.8, wall=183270 2021-06-20 21:33:27 | INFO | train_inner | epoch 006: 973 / 3002 loss=2.305, ppl=4.94, wps=5890.7, ups=0.09, wpb=64856, bsz=128, num_updates=15889, lr=9.98809e-05, gnorm=1.875, loss_scale=4, train_wall=11, gb_free=2.8, wall=183281 2021-06-20 21:33:38 | INFO | train_inner | epoch 006: 974 / 3002 loss=2.365, ppl=5.15, wps=5901.4, ups=0.09, wpb=64856, bsz=128, num_updates=15890, lr=9.98809e-05, gnorm=1.878, loss_scale=4, train_wall=11, gb_free=2.8, wall=183292 2021-06-20 21:33:49 | INFO | train_inner | epoch 006: 975 / 3002 loss=2.479, ppl=5.57, wps=5869.4, ups=0.09, wpb=64844, bsz=128, num_updates=15891, lr=9.98809e-05, gnorm=1.949, loss_scale=4, train_wall=11, gb_free=2.8, wall=183303 2021-06-20 21:34:00 | INFO | train_inner | epoch 006: 976 / 3002 loss=2.481, ppl=5.58, wps=5802.2, ups=0.09, wpb=64893, bsz=128, num_updates=15892, lr=9.98809e-05, gnorm=1.926, loss_scale=4, train_wall=11, gb_free=2.8, wall=183314 2021-06-20 21:34:11 | INFO | train_inner | epoch 006: 977 / 3002 loss=2.432, ppl=5.4, wps=5887.1, ups=0.09, wpb=64812, bsz=128, num_updates=15893, lr=9.98808e-05, gnorm=1.882, loss_scale=4, train_wall=11, gb_free=2.8, wall=183325 2021-06-20 21:34:22 | INFO | train_inner | epoch 006: 978 / 3002 loss=2.462, ppl=5.51, wps=5684.3, ups=0.09, wpb=64847, bsz=128, num_updates=15894, lr=9.98808e-05, gnorm=1.89, loss_scale=4, train_wall=11, gb_free=2.8, wall=183337 2021-06-20 21:34:34 | INFO | train_inner | epoch 006: 979 / 3002 loss=2.524, ppl=5.75, wps=5801.2, ups=0.09, wpb=64852, bsz=128, num_updates=15895, lr=9.98808e-05, gnorm=1.975, loss_scale=4, train_wall=11, gb_free=2.8, wall=183348 2021-06-20 21:34:45 | INFO | train_inner | epoch 006: 980 / 3002 loss=2.523, ppl=5.75, wps=5871.6, ups=0.09, wpb=64836, bsz=128, num_updates=15896, lr=9.98808e-05, gnorm=3.627, loss_scale=4, train_wall=11, gb_free=2.8, wall=183359 2021-06-20 21:34:56 | INFO | train_inner | epoch 006: 981 / 3002 loss=2.407, ppl=5.3, wps=5880.9, ups=0.09, wpb=64868, bsz=128, num_updates=15897, lr=9.98808e-05, gnorm=1.851, loss_scale=4, train_wall=11, gb_free=2.8, wall=183370 2021-06-20 21:35:07 | INFO | train_inner | epoch 006: 982 / 3002 loss=2.423, ppl=5.36, wps=5928, ups=0.09, wpb=64881, bsz=128, num_updates=15898, lr=9.98808e-05, gnorm=1.883, loss_scale=4, train_wall=10, gb_free=2.8, wall=183381 2021-06-20 21:35:18 | INFO | train_inner | epoch 006: 983 / 3002 loss=2.551, ppl=5.86, wps=5898.3, ups=0.09, wpb=64915, bsz=128, num_updates=15899, lr=9.98808e-05, gnorm=2.115, loss_scale=4, train_wall=11, gb_free=2.8, wall=183392 2021-06-20 21:35:29 | INFO | train_inner | epoch 006: 984 / 3002 loss=2.435, ppl=5.41, wps=5824, ups=0.09, wpb=64787, bsz=128, num_updates=15900, lr=9.98808e-05, gnorm=1.883, loss_scale=4, train_wall=11, gb_free=2.8, wall=183403 2021-06-20 21:35:40 | INFO | train_inner | epoch 006: 985 / 3002 loss=2.455, ppl=5.48, wps=5975.4, ups=0.09, wpb=64881, bsz=128, num_updates=15901, lr=9.98808e-05, gnorm=1.932, loss_scale=4, train_wall=10, gb_free=2.8, wall=183414 2021-06-20 21:35:51 | INFO | train_inner | epoch 006: 986 / 3002 loss=2.455, ppl=5.48, wps=5857, ups=0.09, wpb=64790, bsz=128, num_updates=15902, lr=9.98808e-05, gnorm=2.857, loss_scale=4, train_wall=11, gb_free=2.8, wall=183425 2021-06-20 21:36:01 | INFO | train_inner | epoch 006: 987 / 3002 loss=2.542, ppl=5.82, wps=5998.8, ups=0.09, wpb=64825, bsz=128, num_updates=15903, lr=9.98808e-05, gnorm=1.965, loss_scale=4, train_wall=10, gb_free=2.8, wall=183436 2021-06-20 21:36:13 | INFO | train_inner | epoch 006: 988 / 3002 loss=2.579, ppl=5.98, wps=5769, ups=0.09, wpb=64782, bsz=128, num_updates=15904, lr=9.98808e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=183447 2021-06-20 21:36:24 | INFO | train_inner | epoch 006: 989 / 3002 loss=2.412, ppl=5.32, wps=5766.7, ups=0.09, wpb=64823, bsz=128, num_updates=15905, lr=9.98808e-05, gnorm=1.923, loss_scale=4, train_wall=11, gb_free=2.8, wall=183458 2021-06-20 21:36:35 | INFO | train_inner | epoch 006: 990 / 3002 loss=2.412, ppl=5.32, wps=5903.1, ups=0.09, wpb=64869, bsz=128, num_updates=15906, lr=9.98807e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=183469 2021-06-20 21:36:46 | INFO | train_inner | epoch 006: 991 / 3002 loss=2.524, ppl=5.75, wps=5749.8, ups=0.09, wpb=64760, bsz=128, num_updates=15907, lr=9.98807e-05, gnorm=2.042, loss_scale=4, train_wall=11, gb_free=2.8, wall=183481 2021-06-20 21:36:57 | INFO | train_inner | epoch 006: 992 / 3002 loss=2.479, ppl=5.58, wps=5935.4, ups=0.09, wpb=64892, bsz=128, num_updates=15908, lr=9.98807e-05, gnorm=1.906, loss_scale=4, train_wall=10, gb_free=2.8, wall=183491 2021-06-20 21:37:08 | INFO | train_inner | epoch 006: 993 / 3002 loss=2.473, ppl=5.55, wps=5918.3, ups=0.09, wpb=64848, bsz=128, num_updates=15909, lr=9.98807e-05, gnorm=1.944, loss_scale=4, train_wall=10, gb_free=2.8, wall=183502 2021-06-20 21:37:19 | INFO | train_inner | epoch 006: 994 / 3002 loss=2.506, ppl=5.68, wps=5917.5, ups=0.09, wpb=64821, bsz=128, num_updates=15910, lr=9.98807e-05, gnorm=1.97, loss_scale=4, train_wall=10, gb_free=2.8, wall=183513 2021-06-20 21:37:30 | INFO | train_inner | epoch 006: 995 / 3002 loss=2.415, ppl=5.33, wps=5839.4, ups=0.09, wpb=64835, bsz=128, num_updates=15911, lr=9.98807e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=183525 2021-06-20 21:37:41 | INFO | train_inner | epoch 006: 996 / 3002 loss=2.5, ppl=5.66, wps=5764.5, ups=0.09, wpb=64840, bsz=128, num_updates=15912, lr=9.98807e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=183536 2021-06-20 21:37:53 | INFO | train_inner | epoch 006: 997 / 3002 loss=2.262, ppl=4.8, wps=5812.3, ups=0.09, wpb=64879, bsz=128, num_updates=15913, lr=9.98807e-05, gnorm=1.933, loss_scale=4, train_wall=11, gb_free=2.8, wall=183547 2021-06-20 21:38:04 | INFO | train_inner | epoch 006: 998 / 3002 loss=2.387, ppl=5.23, wps=5800.7, ups=0.09, wpb=64799, bsz=128, num_updates=15914, lr=9.98807e-05, gnorm=2.047, loss_scale=4, train_wall=11, gb_free=2.8, wall=183558 2021-06-20 21:38:15 | INFO | train_inner | epoch 006: 999 / 3002 loss=2.399, ppl=5.28, wps=5800.6, ups=0.09, wpb=64820, bsz=128, num_updates=15915, lr=9.98807e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=183569 2021-06-20 21:38:26 | INFO | train_inner | epoch 006: 1000 / 3002 loss=2.337, ppl=5.05, wps=5786.2, ups=0.09, wpb=64834, bsz=128, num_updates=15916, lr=9.98807e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=183580 2021-06-20 21:38:37 | INFO | train_inner | epoch 006: 1001 / 3002 loss=2.549, ppl=5.85, wps=5858.1, ups=0.09, wpb=64788, bsz=128, num_updates=15917, lr=9.98807e-05, gnorm=2.073, loss_scale=4, train_wall=11, gb_free=2.8, wall=183592 2021-06-20 21:38:48 | INFO | train_inner | epoch 006: 1002 / 3002 loss=2.41, ppl=5.31, wps=5886.3, ups=0.09, wpb=64834, bsz=128, num_updates=15918, lr=9.98806e-05, gnorm=1.869, loss_scale=4, train_wall=11, gb_free=2.8, wall=183603 2021-06-20 21:38:59 | INFO | train_inner | epoch 006: 1003 / 3002 loss=2.562, ppl=5.91, wps=5921.6, ups=0.09, wpb=64779, bsz=128, num_updates=15919, lr=9.98806e-05, gnorm=1.908, loss_scale=4, train_wall=10, gb_free=2.8, wall=183613 2021-06-20 21:39:10 | INFO | train_inner | epoch 006: 1004 / 3002 loss=2.318, ppl=4.99, wps=5803.4, ups=0.09, wpb=64766, bsz=128, num_updates=15920, lr=9.98806e-05, gnorm=1.881, loss_scale=4, train_wall=11, gb_free=2.8, wall=183625 2021-06-20 21:39:21 | INFO | train_inner | epoch 006: 1005 / 3002 loss=2.463, ppl=5.51, wps=5848.5, ups=0.09, wpb=64798, bsz=128, num_updates=15921, lr=9.98806e-05, gnorm=1.875, loss_scale=4, train_wall=11, gb_free=2.8, wall=183636 2021-06-20 21:39:33 | INFO | train_inner | epoch 006: 1006 / 3002 loss=2.308, ppl=4.95, wps=5750.3, ups=0.09, wpb=64887, bsz=128, num_updates=15922, lr=9.98806e-05, gnorm=1.848, loss_scale=4, train_wall=11, gb_free=2.8, wall=183647 2021-06-20 21:39:44 | INFO | train_inner | epoch 006: 1007 / 3002 loss=2.425, ppl=5.37, wps=5946.2, ups=0.09, wpb=64854, bsz=128, num_updates=15923, lr=9.98806e-05, gnorm=1.923, loss_scale=4, train_wall=10, gb_free=2.8, wall=183658 2021-06-20 21:39:55 | INFO | train_inner | epoch 006: 1008 / 3002 loss=2.484, ppl=5.59, wps=5913.4, ups=0.09, wpb=64830, bsz=128, num_updates=15924, lr=9.98806e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=183669 2021-06-20 21:40:06 | INFO | train_inner | epoch 006: 1009 / 3002 loss=2.482, ppl=5.59, wps=5827.2, ups=0.09, wpb=64823, bsz=128, num_updates=15925, lr=9.98806e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=183680 2021-06-20 21:40:17 | INFO | train_inner | epoch 006: 1010 / 3002 loss=2.462, ppl=5.51, wps=5945.5, ups=0.09, wpb=64768, bsz=128, num_updates=15926, lr=9.98806e-05, gnorm=1.887, loss_scale=4, train_wall=10, gb_free=2.8, wall=183691 2021-06-20 21:40:27 | INFO | train_inner | epoch 006: 1011 / 3002 loss=2.506, ppl=5.68, wps=5991.3, ups=0.09, wpb=64863, bsz=128, num_updates=15927, lr=9.98806e-05, gnorm=1.97, loss_scale=4, train_wall=10, gb_free=2.8, wall=183702 2021-06-20 21:40:38 | INFO | train_inner | epoch 006: 1012 / 3002 loss=2.586, ppl=6, wps=5900.1, ups=0.09, wpb=64891, bsz=128, num_updates=15928, lr=9.98806e-05, gnorm=1.835, loss_scale=4, train_wall=11, gb_free=2.8, wall=183713 2021-06-20 21:40:49 | INFO | train_inner | epoch 006: 1013 / 3002 loss=2.304, ppl=4.94, wps=5852.9, ups=0.09, wpb=64889, bsz=128, num_updates=15929, lr=9.98806e-05, gnorm=1.853, loss_scale=4, train_wall=11, gb_free=2.8, wall=183724 2021-06-20 21:41:00 | INFO | train_inner | epoch 006: 1014 / 3002 loss=2.453, ppl=5.48, wps=5887.7, ups=0.09, wpb=64823, bsz=128, num_updates=15930, lr=9.98806e-05, gnorm=1.972, loss_scale=4, train_wall=11, gb_free=2.8, wall=183735 2021-06-20 21:41:11 | INFO | train_inner | epoch 006: 1015 / 3002 loss=2.287, ppl=4.88, wps=6032, ups=0.09, wpb=64896, bsz=128, num_updates=15931, lr=9.98805e-05, gnorm=1.925, loss_scale=4, train_wall=10, gb_free=2.8, wall=183746 2021-06-20 21:41:23 | INFO | train_inner | epoch 006: 1016 / 3002 loss=2.525, ppl=5.76, wps=5758.2, ups=0.09, wpb=64856, bsz=128, num_updates=15932, lr=9.98805e-05, gnorm=1.966, loss_scale=4, train_wall=11, gb_free=2.8, wall=183757 2021-06-20 21:41:33 | INFO | train_inner | epoch 006: 1017 / 3002 loss=2.45, ppl=5.46, wps=6026.6, ups=0.09, wpb=64799, bsz=128, num_updates=15933, lr=9.98805e-05, gnorm=1.938, loss_scale=4, train_wall=10, gb_free=2.8, wall=183768 2021-06-20 21:41:44 | INFO | train_inner | epoch 006: 1018 / 3002 loss=2.556, ppl=5.88, wps=5853.8, ups=0.09, wpb=64763, bsz=128, num_updates=15934, lr=9.98805e-05, gnorm=1.898, loss_scale=4, train_wall=11, gb_free=2.8, wall=183779 2021-06-20 21:41:55 | INFO | train_inner | epoch 006: 1019 / 3002 loss=2.516, ppl=5.72, wps=5843.4, ups=0.09, wpb=64814, bsz=128, num_updates=15935, lr=9.98805e-05, gnorm=1.923, loss_scale=4, train_wall=11, gb_free=2.8, wall=183790 2021-06-20 21:42:06 | INFO | train_inner | epoch 006: 1020 / 3002 loss=2.542, ppl=5.83, wps=5948.7, ups=0.09, wpb=64799, bsz=128, num_updates=15936, lr=9.98805e-05, gnorm=1.989, loss_scale=4, train_wall=10, gb_free=2.8, wall=183801 2021-06-20 21:42:17 | INFO | train_inner | epoch 006: 1021 / 3002 loss=2.435, ppl=5.41, wps=5870.6, ups=0.09, wpb=64855, bsz=128, num_updates=15937, lr=9.98805e-05, gnorm=1.881, loss_scale=4, train_wall=11, gb_free=2.8, wall=183812 2021-06-20 21:42:28 | INFO | train_inner | epoch 006: 1022 / 3002 loss=2.58, ppl=5.98, wps=5985.4, ups=0.09, wpb=64821, bsz=128, num_updates=15938, lr=9.98805e-05, gnorm=1.864, loss_scale=4, train_wall=10, gb_free=2.8, wall=183823 2021-06-20 21:42:39 | INFO | train_inner | epoch 006: 1023 / 3002 loss=2.55, ppl=5.86, wps=5945.3, ups=0.09, wpb=64793, bsz=128, num_updates=15939, lr=9.98805e-05, gnorm=1.881, loss_scale=4, train_wall=10, gb_free=2.8, wall=183833 2021-06-20 21:42:50 | INFO | train_inner | epoch 006: 1024 / 3002 loss=2.584, ppl=6, wps=5917, ups=0.09, wpb=64899, bsz=128, num_updates=15940, lr=9.98805e-05, gnorm=1.915, loss_scale=4, train_wall=10, gb_free=2.8, wall=183844 2021-06-20 21:43:01 | INFO | train_inner | epoch 006: 1025 / 3002 loss=2.377, ppl=5.2, wps=5754.4, ups=0.09, wpb=64821, bsz=128, num_updates=15941, lr=9.98805e-05, gnorm=1.862, loss_scale=4, train_wall=11, gb_free=2.8, wall=183856 2021-06-20 21:43:12 | INFO | train_inner | epoch 006: 1026 / 3002 loss=2.585, ppl=6, wps=5854.6, ups=0.09, wpb=64809, bsz=128, num_updates=15942, lr=9.98805e-05, gnorm=1.929, loss_scale=4, train_wall=11, gb_free=2.8, wall=183867 2021-06-20 21:43:24 | INFO | train_inner | epoch 006: 1027 / 3002 loss=2.518, ppl=5.73, wps=5723.5, ups=0.09, wpb=64787, bsz=128, num_updates=15943, lr=9.98804e-05, gnorm=1.913, loss_scale=4, train_wall=11, gb_free=2.8, wall=183878 2021-06-20 21:43:35 | INFO | train_inner | epoch 006: 1028 / 3002 loss=2.457, ppl=5.49, wps=5864.8, ups=0.09, wpb=64799, bsz=128, num_updates=15944, lr=9.98804e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=183889 2021-06-20 21:43:46 | INFO | train_inner | epoch 006: 1029 / 3002 loss=2.311, ppl=4.96, wps=5892.4, ups=0.09, wpb=64797, bsz=128, num_updates=15945, lr=9.98804e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=183900 2021-06-20 21:43:57 | INFO | train_inner | epoch 006: 1030 / 3002 loss=2.53, ppl=5.78, wps=5803.2, ups=0.09, wpb=64833, bsz=128, num_updates=15946, lr=9.98804e-05, gnorm=2.067, loss_scale=4, train_wall=11, gb_free=2.8, wall=183911 2021-06-20 21:44:08 | INFO | train_inner | epoch 006: 1031 / 3002 loss=2.6, ppl=6.06, wps=6024.1, ups=0.09, wpb=64884, bsz=128, num_updates=15947, lr=9.98804e-05, gnorm=1.984, loss_scale=4, train_wall=10, gb_free=2.8, wall=183922 2021-06-20 21:44:19 | INFO | train_inner | epoch 006: 1032 / 3002 loss=2.438, ppl=5.42, wps=5887.6, ups=0.09, wpb=64858, bsz=128, num_updates=15948, lr=9.98804e-05, gnorm=1.923, loss_scale=4, train_wall=11, gb_free=2.8, wall=183933 2021-06-20 21:44:30 | INFO | train_inner | epoch 006: 1033 / 3002 loss=2.528, ppl=5.77, wps=5938.3, ups=0.09, wpb=64857, bsz=128, num_updates=15949, lr=9.98804e-05, gnorm=1.931, loss_scale=4, train_wall=10, gb_free=2.8, wall=183944 2021-06-20 21:44:41 | INFO | train_inner | epoch 006: 1034 / 3002 loss=2.531, ppl=5.78, wps=5882.2, ups=0.09, wpb=64807, bsz=128, num_updates=15950, lr=9.98804e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=183955 2021-06-20 21:44:52 | INFO | train_inner | epoch 006: 1035 / 3002 loss=2.388, ppl=5.23, wps=5784.4, ups=0.09, wpb=64811, bsz=128, num_updates=15951, lr=9.98804e-05, gnorm=1.921, loss_scale=4, train_wall=11, gb_free=2.8, wall=183966 2021-06-20 21:45:03 | INFO | train_inner | epoch 006: 1036 / 3002 loss=2.475, ppl=5.56, wps=5874.6, ups=0.09, wpb=64814, bsz=128, num_updates=15952, lr=9.98804e-05, gnorm=1.879, loss_scale=4, train_wall=11, gb_free=2.8, wall=183977 2021-06-20 21:45:14 | INFO | train_inner | epoch 006: 1037 / 3002 loss=2.456, ppl=5.49, wps=5712.2, ups=0.09, wpb=64677, bsz=128, num_updates=15953, lr=9.98804e-05, gnorm=1.889, loss_scale=4, train_wall=11, gb_free=2.8, wall=183989 2021-06-20 21:45:25 | INFO | train_inner | epoch 006: 1038 / 3002 loss=2.44, ppl=5.43, wps=5807.8, ups=0.09, wpb=64792, bsz=128, num_updates=15954, lr=9.98804e-05, gnorm=1.953, loss_scale=4, train_wall=11, gb_free=2.8, wall=184000 2021-06-20 21:45:37 | INFO | train_inner | epoch 006: 1039 / 3002 loss=2.367, ppl=5.16, wps=5821, ups=0.09, wpb=64856, bsz=128, num_updates=15955, lr=9.98804e-05, gnorm=1.916, loss_scale=4, train_wall=11, gb_free=2.8, wall=184011 2021-06-20 21:45:48 | INFO | train_inner | epoch 006: 1040 / 3002 loss=2.537, ppl=5.8, wps=5805.1, ups=0.09, wpb=64778, bsz=128, num_updates=15956, lr=9.98803e-05, gnorm=1.98, loss_scale=4, train_wall=11, gb_free=2.8, wall=184022 2021-06-20 21:45:59 | INFO | train_inner | epoch 006: 1041 / 3002 loss=2.499, ppl=5.65, wps=5796.3, ups=0.09, wpb=64861, bsz=128, num_updates=15957, lr=9.98803e-05, gnorm=2.036, loss_scale=4, train_wall=11, gb_free=2.8, wall=184033 2021-06-20 21:46:10 | INFO | train_inner | epoch 006: 1042 / 3002 loss=2.672, ppl=6.37, wps=5863.4, ups=0.09, wpb=64849, bsz=128, num_updates=15958, lr=9.98803e-05, gnorm=2.039, loss_scale=4, train_wall=11, gb_free=2.8, wall=184044 2021-06-20 21:46:21 | INFO | train_inner | epoch 006: 1043 / 3002 loss=2.464, ppl=5.52, wps=5883.3, ups=0.09, wpb=64858, bsz=128, num_updates=15959, lr=9.98803e-05, gnorm=1.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=184055 2021-06-20 21:46:32 | INFO | train_inner | epoch 006: 1044 / 3002 loss=2.333, ppl=5.04, wps=5981.7, ups=0.09, wpb=64850, bsz=128, num_updates=15960, lr=9.98803e-05, gnorm=1.917, loss_scale=4, train_wall=10, gb_free=2.8, wall=184066 2021-06-20 21:46:43 | INFO | train_inner | epoch 006: 1045 / 3002 loss=2.53, ppl=5.78, wps=5903.4, ups=0.09, wpb=64848, bsz=128, num_updates=15961, lr=9.98803e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=184077 2021-06-20 21:46:54 | INFO | train_inner | epoch 006: 1046 / 3002 loss=2.44, ppl=5.42, wps=5884.3, ups=0.09, wpb=64916, bsz=128, num_updates=15962, lr=9.98803e-05, gnorm=2.039, loss_scale=4, train_wall=11, gb_free=2.8, wall=184088 2021-06-20 21:47:05 | INFO | train_inner | epoch 006: 1047 / 3002 loss=2.399, ppl=5.27, wps=5865.3, ups=0.09, wpb=64859, bsz=128, num_updates=15963, lr=9.98803e-05, gnorm=1.916, loss_scale=4, train_wall=11, gb_free=2.8, wall=184099 2021-06-20 21:47:16 | INFO | train_inner | epoch 006: 1048 / 3002 loss=2.304, ppl=4.94, wps=5931.7, ups=0.09, wpb=64874, bsz=128, num_updates=15964, lr=9.98803e-05, gnorm=2.601, loss_scale=4, train_wall=10, gb_free=2.8, wall=184110 2021-06-20 21:47:27 | INFO | train_inner | epoch 006: 1049 / 3002 loss=2.374, ppl=5.18, wps=5796, ups=0.09, wpb=64808, bsz=128, num_updates=15965, lr=9.98803e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=184121 2021-06-20 21:47:38 | INFO | train_inner | epoch 006: 1050 / 3002 loss=2.385, ppl=5.22, wps=5940.9, ups=0.09, wpb=64796, bsz=128, num_updates=15966, lr=9.98803e-05, gnorm=1.911, loss_scale=4, train_wall=10, gb_free=2.8, wall=184132 2021-06-20 21:47:49 | INFO | train_inner | epoch 006: 1051 / 3002 loss=2.541, ppl=5.82, wps=5853.5, ups=0.09, wpb=64833, bsz=128, num_updates=15967, lr=9.98803e-05, gnorm=1.93, loss_scale=4, train_wall=11, gb_free=2.8, wall=184143 2021-06-20 21:48:00 | INFO | train_inner | epoch 006: 1052 / 3002 loss=2.393, ppl=5.25, wps=5855.7, ups=0.09, wpb=64825, bsz=128, num_updates=15968, lr=9.98802e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=184154 2021-06-20 21:48:11 | INFO | train_inner | epoch 006: 1053 / 3002 loss=2.459, ppl=5.5, wps=5823.1, ups=0.09, wpb=64821, bsz=128, num_updates=15969, lr=9.98802e-05, gnorm=1.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=184165 2021-06-20 21:48:23 | INFO | train_inner | epoch 006: 1054 / 3002 loss=2.371, ppl=5.17, wps=5709.9, ups=0.09, wpb=64840, bsz=128, num_updates=15970, lr=9.98802e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=184177 2021-06-20 21:48:34 | INFO | train_inner | epoch 006: 1055 / 3002 loss=2.483, ppl=5.59, wps=5861, ups=0.09, wpb=64851, bsz=128, num_updates=15971, lr=9.98802e-05, gnorm=1.867, loss_scale=4, train_wall=11, gb_free=2.8, wall=184188 2021-06-20 21:48:45 | INFO | train_inner | epoch 006: 1056 / 3002 loss=2.343, ppl=5.07, wps=5829.2, ups=0.09, wpb=64786, bsz=128, num_updates=15972, lr=9.98802e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=184199 2021-06-20 21:48:56 | INFO | train_inner | epoch 006: 1057 / 3002 loss=2.412, ppl=5.32, wps=5890.9, ups=0.09, wpb=64854, bsz=128, num_updates=15973, lr=9.98802e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=184210 2021-06-20 21:49:07 | INFO | train_inner | epoch 006: 1058 / 3002 loss=2.562, ppl=5.91, wps=5924.4, ups=0.09, wpb=64895, bsz=128, num_updates=15974, lr=9.98802e-05, gnorm=1.912, loss_scale=4, train_wall=11, gb_free=2.8, wall=184221 2021-06-20 21:49:18 | INFO | train_inner | epoch 006: 1059 / 3002 loss=2.523, ppl=5.75, wps=5840.5, ups=0.09, wpb=64812, bsz=128, num_updates=15975, lr=9.98802e-05, gnorm=1.883, loss_scale=4, train_wall=11, gb_free=2.8, wall=184232 2021-06-20 21:49:29 | INFO | train_inner | epoch 006: 1060 / 3002 loss=2.399, ppl=5.27, wps=5822.8, ups=0.09, wpb=64844, bsz=128, num_updates=15976, lr=9.98802e-05, gnorm=1.804, loss_scale=8, train_wall=11, gb_free=2.8, wall=184243 2021-06-20 21:49:40 | INFO | train_inner | epoch 006: 1061 / 3002 loss=2.564, ppl=5.91, wps=5981, ups=0.09, wpb=64794, bsz=128, num_updates=15977, lr=9.98802e-05, gnorm=1.819, loss_scale=8, train_wall=10, gb_free=2.8, wall=184254 2021-06-20 21:49:51 | INFO | train_inner | epoch 006: 1062 / 3002 loss=2.458, ppl=5.49, wps=5880.9, ups=0.09, wpb=64791, bsz=128, num_updates=15978, lr=9.98802e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=184265 2021-06-20 21:50:02 | INFO | train_inner | epoch 006: 1063 / 3002 loss=2.426, ppl=5.37, wps=5804.8, ups=0.09, wpb=64818, bsz=128, num_updates=15979, lr=9.98802e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=184276 2021-06-20 21:50:13 | INFO | train_inner | epoch 006: 1064 / 3002 loss=2.463, ppl=5.51, wps=5818.3, ups=0.09, wpb=64830, bsz=128, num_updates=15980, lr=9.98802e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=184287 2021-06-20 21:50:24 | INFO | train_inner | epoch 006: 1065 / 3002 loss=2.394, ppl=5.25, wps=5752.5, ups=0.09, wpb=64861, bsz=128, num_updates=15981, lr=9.98801e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=184299 2021-06-20 21:50:35 | INFO | train_inner | epoch 006: 1066 / 3002 loss=2.512, ppl=5.7, wps=5823.2, ups=0.09, wpb=64827, bsz=128, num_updates=15982, lr=9.98801e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=184310 2021-06-20 21:50:46 | INFO | train_inner | epoch 006: 1067 / 3002 loss=2.379, ppl=5.2, wps=5909, ups=0.09, wpb=64844, bsz=128, num_updates=15983, lr=9.98801e-05, gnorm=2.08, loss_scale=8, train_wall=10, gb_free=2.8, wall=184321 2021-06-20 21:50:58 | INFO | train_inner | epoch 006: 1068 / 3002 loss=2.527, ppl=5.76, wps=5721.8, ups=0.09, wpb=64785, bsz=128, num_updates=15984, lr=9.98801e-05, gnorm=1.889, loss_scale=8, train_wall=11, gb_free=2.8, wall=184332 2021-06-20 21:51:08 | INFO | train_inner | epoch 006: 1069 / 3002 loss=2.421, ppl=5.36, wps=6049.3, ups=0.09, wpb=64884, bsz=128, num_updates=15985, lr=9.98801e-05, gnorm=1.967, loss_scale=8, train_wall=10, gb_free=2.8, wall=184343 2021-06-20 21:51:20 | INFO | train_inner | epoch 006: 1070 / 3002 loss=2.367, ppl=5.16, wps=5886.9, ups=0.09, wpb=64862, bsz=128, num_updates=15986, lr=9.98801e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=184354 2021-06-20 21:51:31 | INFO | train_inner | epoch 006: 1071 / 3002 loss=2.47, ppl=5.54, wps=5816.4, ups=0.09, wpb=64873, bsz=128, num_updates=15987, lr=9.98801e-05, gnorm=1.822, loss_scale=8, train_wall=11, gb_free=2.8, wall=184365 2021-06-20 21:51:42 | INFO | train_inner | epoch 006: 1072 / 3002 loss=2.451, ppl=5.47, wps=5907.3, ups=0.09, wpb=64866, bsz=128, num_updates=15988, lr=9.98801e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=184376 2021-06-20 21:51:53 | INFO | train_inner | epoch 006: 1073 / 3002 loss=2.61, ppl=6.1, wps=5750.5, ups=0.09, wpb=64753, bsz=128, num_updates=15989, lr=9.98801e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=184387 2021-06-20 21:52:04 | INFO | train_inner | epoch 006: 1074 / 3002 loss=2.434, ppl=5.4, wps=5832.4, ups=0.09, wpb=64868, bsz=128, num_updates=15990, lr=9.98801e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=184398 2021-06-20 21:52:15 | INFO | train_inner | epoch 006: 1075 / 3002 loss=2.57, ppl=5.94, wps=5760.5, ups=0.09, wpb=64825, bsz=128, num_updates=15991, lr=9.98801e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=184410 2021-06-20 21:52:26 | INFO | train_inner | epoch 006: 1076 / 3002 loss=2.632, ppl=6.2, wps=5823.8, ups=0.09, wpb=64809, bsz=128, num_updates=15992, lr=9.98801e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=184421 2021-06-20 21:52:37 | INFO | train_inner | epoch 006: 1077 / 3002 loss=2.408, ppl=5.31, wps=5936.9, ups=0.09, wpb=64874, bsz=128, num_updates=15993, lr=9.988e-05, gnorm=2.027, loss_scale=8, train_wall=10, gb_free=2.8, wall=184432 2021-06-20 21:52:49 | INFO | train_inner | epoch 006: 1078 / 3002 loss=2.604, ppl=6.08, wps=5742.6, ups=0.09, wpb=64773, bsz=128, num_updates=15994, lr=9.988e-05, gnorm=2.142, loss_scale=8, train_wall=11, gb_free=2.8, wall=184443 2021-06-20 21:53:00 | INFO | train_inner | epoch 006: 1079 / 3002 loss=2.483, ppl=5.59, wps=5860.6, ups=0.09, wpb=64810, bsz=128, num_updates=15995, lr=9.988e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=184454 2021-06-20 21:53:11 | INFO | train_inner | epoch 006: 1080 / 3002 loss=2.486, ppl=5.6, wps=5856, ups=0.09, wpb=64840, bsz=128, num_updates=15996, lr=9.988e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=184465 2021-06-20 21:53:22 | INFO | train_inner | epoch 006: 1081 / 3002 loss=2.466, ppl=5.53, wps=5746.9, ups=0.09, wpb=64701, bsz=128, num_updates=15997, lr=9.988e-05, gnorm=1.892, loss_scale=8, train_wall=11, gb_free=2.8, wall=184476 2021-06-20 21:53:33 | INFO | train_inner | epoch 006: 1082 / 3002 loss=2.519, ppl=5.73, wps=5750.1, ups=0.09, wpb=64771, bsz=128, num_updates=15998, lr=9.988e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=184488 2021-06-20 21:53:45 | INFO | train_inner | epoch 006: 1083 / 3002 loss=2.533, ppl=5.79, wps=5740.1, ups=0.09, wpb=64804, bsz=128, num_updates=15999, lr=9.988e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=184499 2021-06-20 21:53:56 | INFO | train_inner | epoch 006: 1084 / 3002 loss=2.456, ppl=5.49, wps=5891.2, ups=0.09, wpb=64746, bsz=128, num_updates=16000, lr=9.988e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=184510 2021-06-20 21:54:07 | INFO | train_inner | epoch 006: 1085 / 3002 loss=2.466, ppl=5.52, wps=5778, ups=0.09, wpb=64804, bsz=128, num_updates=16001, lr=9.988e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=184521 2021-06-20 21:54:18 | INFO | train_inner | epoch 006: 1086 / 3002 loss=2.471, ppl=5.54, wps=5881.1, ups=0.09, wpb=64860, bsz=128, num_updates=16002, lr=9.988e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=184532 2021-06-20 21:54:29 | INFO | train_inner | epoch 006: 1087 / 3002 loss=2.491, ppl=5.62, wps=5812.6, ups=0.09, wpb=64814, bsz=128, num_updates=16003, lr=9.988e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=184543 2021-06-20 21:54:40 | INFO | train_inner | epoch 006: 1088 / 3002 loss=2.651, ppl=6.28, wps=5825.4, ups=0.09, wpb=64832, bsz=128, num_updates=16004, lr=9.988e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=184554 2021-06-20 21:54:51 | INFO | train_inner | epoch 006: 1089 / 3002 loss=2.353, ppl=5.11, wps=5782.6, ups=0.09, wpb=64848, bsz=128, num_updates=16005, lr=9.988e-05, gnorm=1.894, loss_scale=8, train_wall=11, gb_free=2.8, wall=184566 2021-06-20 21:55:02 | INFO | train_inner | epoch 006: 1090 / 3002 loss=2.416, ppl=5.34, wps=5816.4, ups=0.09, wpb=64805, bsz=128, num_updates=16006, lr=9.98799e-05, gnorm=1.811, loss_scale=8, train_wall=11, gb_free=2.8, wall=184577 2021-06-20 21:55:13 | INFO | train_inner | epoch 006: 1091 / 3002 loss=2.469, ppl=5.54, wps=5938.9, ups=0.09, wpb=64870, bsz=128, num_updates=16007, lr=9.98799e-05, gnorm=1.897, loss_scale=8, train_wall=10, gb_free=2.8, wall=184588 2021-06-20 21:55:25 | INFO | train_inner | epoch 006: 1092 / 3002 loss=2.365, ppl=5.15, wps=5812.2, ups=0.09, wpb=64886, bsz=128, num_updates=16008, lr=9.98799e-05, gnorm=1.882, loss_scale=8, train_wall=11, gb_free=2.8, wall=184599 2021-06-20 21:55:36 | INFO | train_inner | epoch 006: 1093 / 3002 loss=2.41, ppl=5.31, wps=5832.6, ups=0.09, wpb=64813, bsz=128, num_updates=16009, lr=9.98799e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=184610 2021-06-20 21:55:47 | INFO | train_inner | epoch 006: 1094 / 3002 loss=2.502, ppl=5.66, wps=5929.1, ups=0.09, wpb=64618, bsz=128, num_updates=16010, lr=9.98799e-05, gnorm=1.997, loss_scale=8, train_wall=10, gb_free=2.8, wall=184621 2021-06-20 21:55:58 | INFO | train_inner | epoch 006: 1095 / 3002 loss=2.599, ppl=6.06, wps=5817.4, ups=0.09, wpb=64856, bsz=128, num_updates=16011, lr=9.98799e-05, gnorm=1.932, loss_scale=8, train_wall=11, gb_free=2.8, wall=184632 2021-06-20 21:56:09 | INFO | train_inner | epoch 006: 1096 / 3002 loss=2.573, ppl=5.95, wps=5874.7, ups=0.09, wpb=64943, bsz=128, num_updates=16012, lr=9.98799e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=184643 2021-06-20 21:56:20 | INFO | train_inner | epoch 006: 1097 / 3002 loss=2.481, ppl=5.58, wps=5966.1, ups=0.09, wpb=64832, bsz=128, num_updates=16013, lr=9.98799e-05, gnorm=1.938, loss_scale=8, train_wall=10, gb_free=2.8, wall=184654 2021-06-20 21:56:31 | INFO | train_inner | epoch 006: 1098 / 3002 loss=2.532, ppl=5.78, wps=5924, ups=0.09, wpb=64898, bsz=128, num_updates=16014, lr=9.98799e-05, gnorm=1.939, loss_scale=8, train_wall=10, gb_free=2.8, wall=184665 2021-06-20 21:56:42 | INFO | train_inner | epoch 006: 1099 / 3002 loss=2.511, ppl=5.7, wps=5791.6, ups=0.09, wpb=64878, bsz=128, num_updates=16015, lr=9.98799e-05, gnorm=1.886, loss_scale=8, train_wall=11, gb_free=2.8, wall=184676 2021-06-20 21:56:53 | INFO | train_inner | epoch 006: 1100 / 3002 loss=2.454, ppl=5.48, wps=5848, ups=0.09, wpb=64787, bsz=128, num_updates=16016, lr=9.98799e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=184687 2021-06-20 21:57:04 | INFO | train_inner | epoch 006: 1101 / 3002 loss=2.416, ppl=5.34, wps=5795.6, ups=0.09, wpb=64835, bsz=128, num_updates=16017, lr=9.98799e-05, gnorm=1.898, loss_scale=8, train_wall=11, gb_free=2.8, wall=184698 2021-06-20 21:57:15 | INFO | train_inner | epoch 006: 1102 / 3002 loss=2.56, ppl=5.9, wps=5826, ups=0.09, wpb=64853, bsz=128, num_updates=16018, lr=9.98798e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=184709 2021-06-20 21:57:26 | INFO | train_inner | epoch 006: 1103 / 3002 loss=2.539, ppl=5.81, wps=5830.2, ups=0.09, wpb=64895, bsz=128, num_updates=16019, lr=9.98798e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=184721 2021-06-20 21:57:38 | INFO | train_inner | epoch 006: 1104 / 3002 loss=2.519, ppl=5.73, wps=5765.6, ups=0.09, wpb=64772, bsz=128, num_updates=16020, lr=9.98798e-05, gnorm=1.897, loss_scale=8, train_wall=11, gb_free=2.8, wall=184732 2021-06-20 21:57:48 | INFO | train_inner | epoch 006: 1105 / 3002 loss=2.568, ppl=5.93, wps=6006.2, ups=0.09, wpb=64899, bsz=128, num_updates=16021, lr=9.98798e-05, gnorm=1.92, loss_scale=8, train_wall=10, gb_free=2.8, wall=184743 2021-06-20 21:57:59 | INFO | train_inner | epoch 006: 1106 / 3002 loss=2.39, ppl=5.24, wps=5905.2, ups=0.09, wpb=64939, bsz=128, num_updates=16022, lr=9.98798e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=184754 2021-06-20 21:58:10 | INFO | train_inner | epoch 006: 1107 / 3002 loss=2.492, ppl=5.63, wps=5863.2, ups=0.09, wpb=64778, bsz=128, num_updates=16023, lr=9.98798e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=184765 2021-06-20 21:58:22 | INFO | train_inner | epoch 006: 1108 / 3002 loss=2.437, ppl=5.41, wps=5737.1, ups=0.09, wpb=64801, bsz=128, num_updates=16024, lr=9.98798e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=184776 2021-06-20 21:58:33 | INFO | train_inner | epoch 006: 1109 / 3002 loss=2.434, ppl=5.4, wps=5806.4, ups=0.09, wpb=64836, bsz=128, num_updates=16025, lr=9.98798e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=184787 2021-06-20 21:58:44 | INFO | train_inner | epoch 006: 1110 / 3002 loss=2.53, ppl=5.78, wps=5930.3, ups=0.09, wpb=64890, bsz=128, num_updates=16026, lr=9.98798e-05, gnorm=2.021, loss_scale=8, train_wall=10, gb_free=2.8, wall=184798 2021-06-20 21:58:55 | INFO | train_inner | epoch 006: 1111 / 3002 loss=2.444, ppl=5.44, wps=5855.6, ups=0.09, wpb=64816, bsz=128, num_updates=16027, lr=9.98798e-05, gnorm=1.848, loss_scale=8, train_wall=11, gb_free=2.8, wall=184809 2021-06-20 21:59:06 | INFO | train_inner | epoch 006: 1112 / 3002 loss=2.359, ppl=5.13, wps=5797, ups=0.09, wpb=64800, bsz=128, num_updates=16028, lr=9.98798e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=184820 2021-06-20 21:59:17 | INFO | train_inner | epoch 006: 1113 / 3002 loss=2.359, ppl=5.13, wps=5840.2, ups=0.09, wpb=64873, bsz=128, num_updates=16029, lr=9.98798e-05, gnorm=1.898, loss_scale=8, train_wall=11, gb_free=2.8, wall=184831 2021-06-20 21:59:28 | INFO | train_inner | epoch 006: 1114 / 3002 loss=2.461, ppl=5.51, wps=5841.3, ups=0.09, wpb=64701, bsz=128, num_updates=16030, lr=9.98798e-05, gnorm=1.902, loss_scale=8, train_wall=11, gb_free=2.8, wall=184843 2021-06-20 21:59:39 | INFO | train_inner | epoch 006: 1115 / 3002 loss=2.467, ppl=5.53, wps=5896.6, ups=0.09, wpb=64867, bsz=128, num_updates=16031, lr=9.98797e-05, gnorm=1.872, loss_scale=8, train_wall=11, gb_free=2.8, wall=184854 2021-06-20 21:59:50 | INFO | train_inner | epoch 006: 1116 / 3002 loss=2.51, ppl=5.7, wps=5814.5, ups=0.09, wpb=64834, bsz=128, num_updates=16032, lr=9.98797e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=184865 2021-06-20 22:00:01 | INFO | train_inner | epoch 006: 1117 / 3002 loss=2.503, ppl=5.67, wps=5959, ups=0.09, wpb=64875, bsz=128, num_updates=16033, lr=9.98797e-05, gnorm=1.91, loss_scale=8, train_wall=10, gb_free=2.8, wall=184876 2021-06-20 22:00:12 | INFO | train_inner | epoch 006: 1118 / 3002 loss=2.414, ppl=5.33, wps=5812.3, ups=0.09, wpb=64845, bsz=128, num_updates=16034, lr=9.98797e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=184887 2021-06-20 22:00:24 | INFO | train_inner | epoch 006: 1119 / 3002 loss=2.562, ppl=5.91, wps=5731.4, ups=0.09, wpb=64770, bsz=128, num_updates=16035, lr=9.98797e-05, gnorm=1.915, loss_scale=8, train_wall=11, gb_free=2.8, wall=184898 2021-06-20 22:00:35 | INFO | train_inner | epoch 006: 1120 / 3002 loss=2.547, ppl=5.85, wps=5860.4, ups=0.09, wpb=64825, bsz=128, num_updates=16036, lr=9.98797e-05, gnorm=1.813, loss_scale=8, train_wall=11, gb_free=2.8, wall=184909 2021-06-20 22:00:46 | INFO | train_inner | epoch 006: 1121 / 3002 loss=2.423, ppl=5.36, wps=5874.4, ups=0.09, wpb=64791, bsz=128, num_updates=16037, lr=9.98797e-05, gnorm=2.302, loss_scale=8, train_wall=11, gb_free=2.8, wall=184920 2021-06-20 22:00:57 | INFO | train_inner | epoch 006: 1122 / 3002 loss=2.477, ppl=5.57, wps=5882.9, ups=0.09, wpb=64865, bsz=128, num_updates=16038, lr=9.98797e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=184931 2021-06-20 22:01:08 | INFO | train_inner | epoch 006: 1123 / 3002 loss=2.441, ppl=5.43, wps=5845.7, ups=0.09, wpb=64803, bsz=128, num_updates=16039, lr=9.98797e-05, gnorm=1.789, loss_scale=8, train_wall=11, gb_free=2.8, wall=184942 2021-06-20 22:01:19 | INFO | train_inner | epoch 006: 1124 / 3002 loss=2.498, ppl=5.65, wps=5746.2, ups=0.09, wpb=64837, bsz=128, num_updates=16040, lr=9.98797e-05, gnorm=1.874, loss_scale=8, train_wall=11, gb_free=2.8, wall=184954 2021-06-20 22:01:30 | INFO | train_inner | epoch 006: 1125 / 3002 loss=2.499, ppl=5.65, wps=5968.1, ups=0.09, wpb=64777, bsz=128, num_updates=16041, lr=9.98797e-05, gnorm=1.897, loss_scale=8, train_wall=10, gb_free=2.8, wall=184964 2021-06-20 22:01:41 | INFO | train_inner | epoch 006: 1126 / 3002 loss=2.37, ppl=5.17, wps=5830.7, ups=0.09, wpb=64850, bsz=128, num_updates=16042, lr=9.98797e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=184976 2021-06-20 22:01:52 | INFO | train_inner | epoch 006: 1127 / 3002 loss=2.539, ppl=5.81, wps=5722.3, ups=0.09, wpb=64768, bsz=128, num_updates=16043, lr=9.98796e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=184987 2021-06-20 22:02:04 | INFO | train_inner | epoch 006: 1128 / 3002 loss=2.261, ppl=4.79, wps=5778, ups=0.09, wpb=64823, bsz=128, num_updates=16044, lr=9.98796e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=184998 2021-06-20 22:02:15 | INFO | train_inner | epoch 006: 1129 / 3002 loss=2.471, ppl=5.55, wps=5883.5, ups=0.09, wpb=64878, bsz=128, num_updates=16045, lr=9.98796e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=185009 2021-06-20 22:02:26 | INFO | train_inner | epoch 006: 1130 / 3002 loss=2.443, ppl=5.44, wps=5945.5, ups=0.09, wpb=64904, bsz=128, num_updates=16046, lr=9.98796e-05, gnorm=1.856, loss_scale=8, train_wall=10, gb_free=2.8, wall=185020 2021-06-20 22:02:37 | INFO | train_inner | epoch 006: 1131 / 3002 loss=2.366, ppl=5.16, wps=5941.9, ups=0.09, wpb=64883, bsz=128, num_updates=16047, lr=9.98796e-05, gnorm=1.937, loss_scale=8, train_wall=10, gb_free=2.8, wall=185031 2021-06-20 22:02:47 | INFO | train_inner | epoch 006: 1132 / 3002 loss=2.611, ppl=6.11, wps=5925.2, ups=0.09, wpb=64739, bsz=128, num_updates=16048, lr=9.98796e-05, gnorm=1.979, loss_scale=8, train_wall=10, gb_free=2.8, wall=185042 2021-06-20 22:02:59 | INFO | train_inner | epoch 006: 1133 / 3002 loss=2.588, ppl=6.01, wps=5762.5, ups=0.09, wpb=64806, bsz=128, num_updates=16049, lr=9.98796e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=185053 2021-06-20 22:03:10 | INFO | train_inner | epoch 006: 1134 / 3002 loss=2.591, ppl=6.03, wps=5905.7, ups=0.09, wpb=64813, bsz=128, num_updates=16050, lr=9.98796e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=185064 2021-06-20 22:03:21 | INFO | train_inner | epoch 006: 1135 / 3002 loss=2.408, ppl=5.31, wps=5855.1, ups=0.09, wpb=64926, bsz=128, num_updates=16051, lr=9.98796e-05, gnorm=1.908, loss_scale=8, train_wall=11, gb_free=2.8, wall=185075 2021-06-20 22:03:32 | INFO | train_inner | epoch 006: 1136 / 3002 loss=2.393, ppl=5.25, wps=5889.4, ups=0.09, wpb=64883, bsz=128, num_updates=16052, lr=9.98796e-05, gnorm=1.88, loss_scale=8, train_wall=11, gb_free=2.8, wall=185086 2021-06-20 22:03:43 | INFO | train_inner | epoch 006: 1137 / 3002 loss=2.442, ppl=5.43, wps=5759.2, ups=0.09, wpb=64830, bsz=128, num_updates=16053, lr=9.98796e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=185097 2021-06-20 22:03:54 | INFO | train_inner | epoch 006: 1138 / 3002 loss=2.326, ppl=5.01, wps=5893.3, ups=0.09, wpb=64828, bsz=128, num_updates=16054, lr=9.98796e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=185108 2021-06-20 22:04:05 | INFO | train_inner | epoch 006: 1139 / 3002 loss=2.469, ppl=5.54, wps=5862.8, ups=0.09, wpb=64797, bsz=128, num_updates=16055, lr=9.98796e-05, gnorm=1.914, loss_scale=8, train_wall=11, gb_free=2.8, wall=185119 2021-06-20 22:04:16 | INFO | train_inner | epoch 006: 1140 / 3002 loss=2.653, ppl=6.29, wps=5815, ups=0.09, wpb=64689, bsz=128, num_updates=16056, lr=9.98795e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=185131 2021-06-20 22:04:27 | INFO | train_inner | epoch 006: 1141 / 3002 loss=2.516, ppl=5.72, wps=5862.3, ups=0.09, wpb=64908, bsz=128, num_updates=16057, lr=9.98795e-05, gnorm=1.878, loss_scale=8, train_wall=11, gb_free=2.8, wall=185142 2021-06-20 22:04:38 | INFO | train_inner | epoch 006: 1142 / 3002 loss=2.386, ppl=5.23, wps=5869.2, ups=0.09, wpb=64848, bsz=128, num_updates=16058, lr=9.98795e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=185153 2021-06-20 22:04:49 | INFO | train_inner | epoch 006: 1143 / 3002 loss=2.503, ppl=5.67, wps=5923.6, ups=0.09, wpb=64920, bsz=128, num_updates=16059, lr=9.98795e-05, gnorm=1.853, loss_scale=8, train_wall=11, gb_free=2.8, wall=185164 2021-06-20 22:05:00 | INFO | train_inner | epoch 006: 1144 / 3002 loss=2.516, ppl=5.72, wps=5925.9, ups=0.09, wpb=64788, bsz=128, num_updates=16060, lr=9.98795e-05, gnorm=1.873, loss_scale=8, train_wall=11, gb_free=2.8, wall=185175 2021-06-20 22:05:11 | INFO | train_inner | epoch 006: 1145 / 3002 loss=2.54, ppl=5.81, wps=5965.7, ups=0.09, wpb=64877, bsz=128, num_updates=16061, lr=9.98795e-05, gnorm=1.838, loss_scale=8, train_wall=10, gb_free=2.8, wall=185185 2021-06-20 22:05:22 | INFO | train_inner | epoch 006: 1146 / 3002 loss=2.466, ppl=5.52, wps=5819.8, ups=0.09, wpb=64810, bsz=128, num_updates=16062, lr=9.98795e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=185197 2021-06-20 22:05:33 | INFO | train_inner | epoch 006: 1147 / 3002 loss=2.543, ppl=5.83, wps=5925.8, ups=0.09, wpb=64787, bsz=128, num_updates=16063, lr=9.98795e-05, gnorm=1.911, loss_scale=8, train_wall=10, gb_free=2.8, wall=185208 2021-06-20 22:05:44 | INFO | train_inner | epoch 006: 1148 / 3002 loss=2.489, ppl=5.61, wps=5915.8, ups=0.09, wpb=64827, bsz=128, num_updates=16064, lr=9.98795e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=185219 2021-06-20 22:05:55 | INFO | train_inner | epoch 006: 1149 / 3002 loss=2.554, ppl=5.87, wps=5945.5, ups=0.09, wpb=64935, bsz=128, num_updates=16065, lr=9.98795e-05, gnorm=2.045, loss_scale=8, train_wall=10, gb_free=2.8, wall=185229 2021-06-20 22:06:06 | INFO | train_inner | epoch 006: 1150 / 3002 loss=2.463, ppl=5.51, wps=5945.2, ups=0.09, wpb=64951, bsz=128, num_updates=16066, lr=9.98795e-05, gnorm=1.889, loss_scale=8, train_wall=10, gb_free=2.8, wall=185240 2021-06-20 22:06:17 | INFO | train_inner | epoch 006: 1151 / 3002 loss=2.49, ppl=5.62, wps=5790.3, ups=0.09, wpb=64834, bsz=128, num_updates=16067, lr=9.98795e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=185252 2021-06-20 22:06:28 | INFO | train_inner | epoch 006: 1152 / 3002 loss=2.454, ppl=5.48, wps=5909.2, ups=0.09, wpb=64853, bsz=128, num_updates=16068, lr=9.98794e-05, gnorm=2.186, loss_scale=8, train_wall=11, gb_free=2.8, wall=185263 2021-06-20 22:06:39 | INFO | train_inner | epoch 006: 1153 / 3002 loss=2.514, ppl=5.71, wps=5886.1, ups=0.09, wpb=64874, bsz=128, num_updates=16069, lr=9.98794e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=185274 2021-06-20 22:06:50 | INFO | train_inner | epoch 006: 1154 / 3002 loss=2.429, ppl=5.39, wps=5925.8, ups=0.09, wpb=64797, bsz=128, num_updates=16070, lr=9.98794e-05, gnorm=1.938, loss_scale=8, train_wall=10, gb_free=2.8, wall=185284 2021-06-20 22:07:01 | INFO | train_inner | epoch 006: 1155 / 3002 loss=2.491, ppl=5.62, wps=5921.6, ups=0.09, wpb=64812, bsz=128, num_updates=16071, lr=9.98794e-05, gnorm=1.898, loss_scale=8, train_wall=10, gb_free=2.8, wall=185295 2021-06-20 22:07:12 | INFO | train_inner | epoch 006: 1156 / 3002 loss=2.392, ppl=5.25, wps=5879.7, ups=0.09, wpb=64895, bsz=128, num_updates=16072, lr=9.98794e-05, gnorm=1.886, loss_scale=8, train_wall=11, gb_free=2.8, wall=185306 2021-06-20 22:07:23 | INFO | train_inner | epoch 006: 1157 / 3002 loss=2.557, ppl=5.88, wps=5878, ups=0.09, wpb=64922, bsz=128, num_updates=16073, lr=9.98794e-05, gnorm=1.915, loss_scale=8, train_wall=11, gb_free=2.8, wall=185318 2021-06-20 22:07:34 | INFO | train_inner | epoch 006: 1158 / 3002 loss=2.414, ppl=5.33, wps=5854.1, ups=0.09, wpb=64777, bsz=128, num_updates=16074, lr=9.98794e-05, gnorm=1.934, loss_scale=8, train_wall=11, gb_free=2.8, wall=185329 2021-06-20 22:07:45 | INFO | train_inner | epoch 006: 1159 / 3002 loss=2.623, ppl=6.16, wps=5846.4, ups=0.09, wpb=64839, bsz=128, num_updates=16075, lr=9.98794e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=185340 2021-06-20 22:07:56 | INFO | train_inner | epoch 006: 1160 / 3002 loss=2.602, ppl=6.07, wps=5901, ups=0.09, wpb=64767, bsz=128, num_updates=16076, lr=9.98794e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=185351 2021-06-20 22:08:07 | INFO | train_inner | epoch 006: 1161 / 3002 loss=2.563, ppl=5.91, wps=5800.2, ups=0.09, wpb=64769, bsz=128, num_updates=16077, lr=9.98794e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=185362 2021-06-20 22:08:18 | INFO | train_inner | epoch 006: 1162 / 3002 loss=2.447, ppl=5.45, wps=5929.4, ups=0.09, wpb=64850, bsz=128, num_updates=16078, lr=9.98794e-05, gnorm=1.916, loss_scale=8, train_wall=10, gb_free=2.8, wall=185373 2021-06-20 22:08:30 | INFO | train_inner | epoch 006: 1163 / 3002 loss=2.543, ppl=5.83, wps=5794, ups=0.09, wpb=64791, bsz=128, num_updates=16079, lr=9.98794e-05, gnorm=1.884, loss_scale=8, train_wall=11, gb_free=2.8, wall=185384 2021-06-20 22:08:41 | INFO | train_inner | epoch 006: 1164 / 3002 loss=2.52, ppl=5.74, wps=5849.4, ups=0.09, wpb=64849, bsz=128, num_updates=16080, lr=9.98794e-05, gnorm=1.847, loss_scale=8, train_wall=11, gb_free=2.8, wall=185395 2021-06-20 22:08:52 | INFO | train_inner | epoch 006: 1165 / 3002 loss=2.515, ppl=5.72, wps=5900.1, ups=0.09, wpb=64860, bsz=128, num_updates=16081, lr=9.98793e-05, gnorm=1.908, loss_scale=8, train_wall=11, gb_free=2.8, wall=185406 2021-06-20 22:09:03 | INFO | train_inner | epoch 006: 1166 / 3002 loss=2.325, ppl=5.01, wps=5816, ups=0.09, wpb=64866, bsz=128, num_updates=16082, lr=9.98793e-05, gnorm=1.915, loss_scale=8, train_wall=11, gb_free=2.8, wall=185417 2021-06-20 22:09:14 | INFO | train_inner | epoch 006: 1167 / 3002 loss=2.516, ppl=5.72, wps=5788.2, ups=0.09, wpb=64763, bsz=128, num_updates=16083, lr=9.98793e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=185428 2021-06-20 22:09:25 | INFO | train_inner | epoch 006: 1168 / 3002 loss=2.633, ppl=6.2, wps=5929.9, ups=0.09, wpb=64789, bsz=128, num_updates=16084, lr=9.98793e-05, gnorm=1.976, loss_scale=8, train_wall=10, gb_free=2.8, wall=185439 2021-06-20 22:09:36 | INFO | train_inner | epoch 006: 1169 / 3002 loss=2.315, ppl=4.98, wps=5974.5, ups=0.09, wpb=64846, bsz=128, num_updates=16085, lr=9.98793e-05, gnorm=1.895, loss_scale=8, train_wall=10, gb_free=2.8, wall=185450 2021-06-20 22:09:47 | INFO | train_inner | epoch 006: 1170 / 3002 loss=2.573, ppl=5.95, wps=5947.9, ups=0.09, wpb=64832, bsz=128, num_updates=16086, lr=9.98793e-05, gnorm=2.011, loss_scale=8, train_wall=10, gb_free=2.8, wall=185461 2021-06-20 22:09:58 | INFO | train_inner | epoch 006: 1171 / 3002 loss=2.495, ppl=5.64, wps=5779.5, ups=0.09, wpb=64817, bsz=128, num_updates=16087, lr=9.98793e-05, gnorm=1.877, loss_scale=8, train_wall=11, gb_free=2.8, wall=185472 2021-06-20 22:10:09 | INFO | train_inner | epoch 006: 1172 / 3002 loss=2.589, ppl=6.02, wps=5781.5, ups=0.09, wpb=64804, bsz=128, num_updates=16088, lr=9.98793e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=185483 2021-06-20 22:10:20 | INFO | train_inner | epoch 006: 1173 / 3002 loss=2.569, ppl=5.93, wps=5723.6, ups=0.09, wpb=64827, bsz=128, num_updates=16089, lr=9.98793e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=185495 2021-06-20 22:10:32 | INFO | train_inner | epoch 006: 1174 / 3002 loss=2.588, ppl=6.01, wps=5841.4, ups=0.09, wpb=64787, bsz=128, num_updates=16090, lr=9.98793e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=185506 2021-06-20 22:10:42 | INFO | train_inner | epoch 006: 1175 / 3002 loss=2.44, ppl=5.43, wps=5967.4, ups=0.09, wpb=64935, bsz=128, num_updates=16091, lr=9.98793e-05, gnorm=1.913, loss_scale=8, train_wall=10, gb_free=2.8, wall=185517 2021-06-20 22:10:54 | INFO | train_inner | epoch 006: 1176 / 3002 loss=2.568, ppl=5.93, wps=5843.9, ups=0.09, wpb=64771, bsz=128, num_updates=16092, lr=9.98793e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=185528 2021-06-20 22:11:04 | INFO | train_inner | epoch 006: 1177 / 3002 loss=2.556, ppl=5.88, wps=5932.4, ups=0.09, wpb=64765, bsz=128, num_updates=16093, lr=9.98792e-05, gnorm=1.891, loss_scale=8, train_wall=10, gb_free=2.8, wall=185539 2021-06-20 22:11:16 | INFO | train_inner | epoch 006: 1178 / 3002 loss=2.336, ppl=5.05, wps=5821, ups=0.09, wpb=64905, bsz=128, num_updates=16094, lr=9.98792e-05, gnorm=1.849, loss_scale=8, train_wall=11, gb_free=2.8, wall=185550 2021-06-20 22:11:27 | INFO | train_inner | epoch 006: 1179 / 3002 loss=2.454, ppl=5.48, wps=5925.4, ups=0.09, wpb=64831, bsz=128, num_updates=16095, lr=9.98792e-05, gnorm=1.897, loss_scale=8, train_wall=11, gb_free=2.8, wall=185561 2021-06-20 22:11:38 | INFO | train_inner | epoch 006: 1180 / 3002 loss=2.496, ppl=5.64, wps=5814.9, ups=0.09, wpb=64876, bsz=128, num_updates=16096, lr=9.98792e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=185572 2021-06-20 22:11:49 | INFO | train_inner | epoch 006: 1181 / 3002 loss=2.348, ppl=5.09, wps=5855.4, ups=0.09, wpb=64816, bsz=128, num_updates=16097, lr=9.98792e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=185583 2021-06-20 22:12:00 | INFO | train_inner | epoch 006: 1182 / 3002 loss=2.45, ppl=5.46, wps=5824.4, ups=0.09, wpb=64776, bsz=128, num_updates=16098, lr=9.98792e-05, gnorm=1.908, loss_scale=8, train_wall=11, gb_free=2.8, wall=185594 2021-06-20 22:12:11 | INFO | train_inner | epoch 006: 1183 / 3002 loss=2.425, ppl=5.37, wps=5936.8, ups=0.09, wpb=64842, bsz=128, num_updates=16099, lr=9.98792e-05, gnorm=1.963, loss_scale=8, train_wall=10, gb_free=2.8, wall=185605 2021-06-20 22:12:22 | INFO | train_inner | epoch 006: 1184 / 3002 loss=2.454, ppl=5.48, wps=5828.7, ups=0.09, wpb=64895, bsz=128, num_updates=16100, lr=9.98792e-05, gnorm=2.009, loss_scale=8, train_wall=11, gb_free=2.8, wall=185616 2021-06-20 22:12:33 | INFO | train_inner | epoch 006: 1185 / 3002 loss=2.508, ppl=5.69, wps=5858.5, ups=0.09, wpb=64700, bsz=128, num_updates=16101, lr=9.98792e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=185627 2021-06-20 22:12:44 | INFO | train_inner | epoch 006: 1186 / 3002 loss=2.528, ppl=5.77, wps=5903.8, ups=0.09, wpb=64768, bsz=128, num_updates=16102, lr=9.98792e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=185638 2021-06-20 22:12:55 | INFO | train_inner | epoch 006: 1187 / 3002 loss=2.598, ppl=6.05, wps=5840.2, ups=0.09, wpb=64798, bsz=128, num_updates=16103, lr=9.98792e-05, gnorm=1.86, loss_scale=8, train_wall=11, gb_free=2.8, wall=185649 2021-06-20 22:13:06 | INFO | train_inner | epoch 006: 1188 / 3002 loss=2.427, ppl=5.38, wps=5925.1, ups=0.09, wpb=64882, bsz=128, num_updates=16104, lr=9.98792e-05, gnorm=1.953, loss_scale=16, train_wall=11, gb_free=2.8, wall=185660 2021-06-20 22:13:17 | INFO | train_inner | epoch 006: 1189 / 3002 loss=2.478, ppl=5.57, wps=5932.6, ups=0.09, wpb=64835, bsz=128, num_updates=16105, lr=9.98792e-05, gnorm=1.915, loss_scale=16, train_wall=10, gb_free=2.8, wall=185671 2021-06-20 22:13:28 | INFO | train_inner | epoch 006: 1190 / 3002 loss=2.381, ppl=5.21, wps=5870.1, ups=0.09, wpb=64846, bsz=128, num_updates=16106, lr=9.98791e-05, gnorm=2.05, loss_scale=16, train_wall=11, gb_free=2.8, wall=185682 2021-06-20 22:13:39 | INFO | train_inner | epoch 006: 1191 / 3002 loss=2.566, ppl=5.92, wps=5915.2, ups=0.09, wpb=64870, bsz=128, num_updates=16107, lr=9.98791e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=185693 2021-06-20 22:13:50 | INFO | train_inner | epoch 006: 1192 / 3002 loss=2.591, ppl=6.02, wps=5888.3, ups=0.09, wpb=64839, bsz=128, num_updates=16108, lr=9.98791e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=185704 2021-06-20 22:14:01 | INFO | train_inner | epoch 006: 1193 / 3002 loss=2.403, ppl=5.29, wps=5807.1, ups=0.09, wpb=64828, bsz=128, num_updates=16109, lr=9.98791e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=185715 2021-06-20 22:14:12 | INFO | train_inner | epoch 006: 1194 / 3002 loss=2.406, ppl=5.3, wps=5724.9, ups=0.09, wpb=64848, bsz=128, num_updates=16110, lr=9.98791e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=185727 2021-06-20 22:14:24 | INFO | train_inner | epoch 006: 1195 / 3002 loss=2.506, ppl=5.68, wps=5847.4, ups=0.09, wpb=64846, bsz=128, num_updates=16111, lr=9.98791e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=185738 2021-06-20 22:14:35 | INFO | train_inner | epoch 006: 1196 / 3002 loss=2.392, ppl=5.25, wps=5854.9, ups=0.09, wpb=64855, bsz=128, num_updates=16112, lr=9.98791e-05, gnorm=1.866, loss_scale=16, train_wall=11, gb_free=2.8, wall=185749 2021-06-20 22:14:46 | INFO | train_inner | epoch 006: 1197 / 3002 loss=2.597, ppl=6.05, wps=5890.4, ups=0.09, wpb=64852, bsz=128, num_updates=16113, lr=9.98791e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=185760 2021-06-20 22:14:57 | INFO | train_inner | epoch 006: 1198 / 3002 loss=2.404, ppl=5.29, wps=5745.1, ups=0.09, wpb=64794, bsz=128, num_updates=16114, lr=9.98791e-05, gnorm=1.989, loss_scale=16, train_wall=11, gb_free=2.8, wall=185771 2021-06-20 22:15:08 | INFO | train_inner | epoch 006: 1199 / 3002 loss=2.495, ppl=5.64, wps=5826.4, ups=0.09, wpb=64853, bsz=128, num_updates=16115, lr=9.98791e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=185782 2021-06-20 22:15:19 | INFO | train_inner | epoch 006: 1200 / 3002 loss=2.498, ppl=5.65, wps=5782.1, ups=0.09, wpb=64861, bsz=128, num_updates=16116, lr=9.98791e-05, gnorm=2.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=185794 2021-06-20 22:15:30 | INFO | train_inner | epoch 006: 1201 / 3002 loss=2.489, ppl=5.61, wps=5841.4, ups=0.09, wpb=64855, bsz=128, num_updates=16117, lr=9.98791e-05, gnorm=1.859, loss_scale=16, train_wall=11, gb_free=2.8, wall=185805 2021-06-20 22:15:42 | INFO | train_inner | epoch 006: 1202 / 3002 loss=2.324, ppl=5.01, wps=5755.8, ups=0.09, wpb=64756, bsz=128, num_updates=16118, lr=9.9879e-05, gnorm=1.888, loss_scale=16, train_wall=11, gb_free=2.8, wall=185816 2021-06-20 22:15:53 | INFO | train_inner | epoch 006: 1203 / 3002 loss=2.498, ppl=5.65, wps=5861.1, ups=0.09, wpb=64913, bsz=128, num_updates=16119, lr=9.9879e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=185827 2021-06-20 22:16:04 | INFO | train_inner | epoch 006: 1204 / 3002 loss=2.574, ppl=5.96, wps=5850.1, ups=0.09, wpb=64754, bsz=128, num_updates=16120, lr=9.9879e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=185838 2021-06-20 22:16:15 | INFO | train_inner | epoch 006: 1205 / 3002 loss=2.391, ppl=5.25, wps=5765.5, ups=0.09, wpb=64899, bsz=128, num_updates=16121, lr=9.9879e-05, gnorm=1.915, loss_scale=16, train_wall=11, gb_free=2.8, wall=185849 2021-06-20 22:16:26 | INFO | train_inner | epoch 006: 1206 / 3002 loss=2.367, ppl=5.16, wps=5773.6, ups=0.09, wpb=64818, bsz=128, num_updates=16122, lr=9.9879e-05, gnorm=1.925, loss_scale=16, train_wall=11, gb_free=2.8, wall=185861 2021-06-20 22:16:37 | INFO | train_inner | epoch 006: 1207 / 3002 loss=2.423, ppl=5.36, wps=5898.8, ups=0.09, wpb=64793, bsz=128, num_updates=16123, lr=9.9879e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=185872 2021-06-20 22:16:48 | INFO | train_inner | epoch 006: 1208 / 3002 loss=2.403, ppl=5.29, wps=5825.5, ups=0.09, wpb=64831, bsz=128, num_updates=16124, lr=9.9879e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=185883 2021-06-20 22:16:59 | INFO | train_inner | epoch 006: 1209 / 3002 loss=2.352, ppl=5.1, wps=5870.8, ups=0.09, wpb=64895, bsz=128, num_updates=16125, lr=9.9879e-05, gnorm=1.872, loss_scale=16, train_wall=11, gb_free=2.8, wall=185894 2021-06-20 22:17:10 | INFO | train_inner | epoch 006: 1210 / 3002 loss=2.49, ppl=5.62, wps=5891.5, ups=0.09, wpb=64836, bsz=128, num_updates=16126, lr=9.9879e-05, gnorm=1.899, loss_scale=16, train_wall=11, gb_free=2.8, wall=185905 2021-06-20 22:17:21 | INFO | train_inner | epoch 006: 1211 / 3002 loss=2.486, ppl=5.6, wps=5965.3, ups=0.09, wpb=64870, bsz=128, num_updates=16127, lr=9.9879e-05, gnorm=1.897, loss_scale=16, train_wall=10, gb_free=2.8, wall=185916 2021-06-20 22:17:32 | INFO | train_inner | epoch 006: 1212 / 3002 loss=2.489, ppl=5.62, wps=5855.2, ups=0.09, wpb=64881, bsz=128, num_updates=16128, lr=9.9879e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=185927 2021-06-20 22:17:43 | INFO | train_inner | epoch 006: 1213 / 3002 loss=2.419, ppl=5.35, wps=5846.8, ups=0.09, wpb=64904, bsz=128, num_updates=16129, lr=9.9879e-05, gnorm=1.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=185938 2021-06-20 22:17:55 | INFO | train_inner | epoch 006: 1214 / 3002 loss=2.618, ppl=6.14, wps=5763, ups=0.09, wpb=64774, bsz=128, num_updates=16130, lr=9.9879e-05, gnorm=1.915, loss_scale=16, train_wall=11, gb_free=2.8, wall=185949 2021-06-20 22:18:06 | INFO | train_inner | epoch 006: 1215 / 3002 loss=2.497, ppl=5.64, wps=5929.4, ups=0.09, wpb=64833, bsz=128, num_updates=16131, lr=9.98789e-05, gnorm=1.945, loss_scale=16, train_wall=10, gb_free=2.8, wall=185960 2021-06-20 22:18:17 | INFO | train_inner | epoch 006: 1216 / 3002 loss=2.403, ppl=5.29, wps=5850.8, ups=0.09, wpb=64853, bsz=128, num_updates=16132, lr=9.98789e-05, gnorm=1.919, loss_scale=16, train_wall=11, gb_free=2.8, wall=185971 2021-06-20 22:18:28 | INFO | train_inner | epoch 006: 1217 / 3002 loss=2.508, ppl=5.69, wps=5891.9, ups=0.09, wpb=64779, bsz=128, num_updates=16133, lr=9.98789e-05, gnorm=1.902, loss_scale=16, train_wall=11, gb_free=2.8, wall=185982 2021-06-20 22:18:39 | INFO | train_inner | epoch 006: 1218 / 3002 loss=2.407, ppl=5.31, wps=5938.7, ups=0.09, wpb=64835, bsz=128, num_updates=16134, lr=9.98789e-05, gnorm=1.932, loss_scale=16, train_wall=10, gb_free=2.8, wall=185993 2021-06-20 22:18:50 | INFO | train_inner | epoch 006: 1219 / 3002 loss=2.477, ppl=5.57, wps=5786, ups=0.09, wpb=64698, bsz=128, num_updates=16135, lr=9.98789e-05, gnorm=1.828, loss_scale=16, train_wall=11, gb_free=2.8, wall=186004 2021-06-20 22:19:01 | INFO | train_inner | epoch 006: 1220 / 3002 loss=2.331, ppl=5.03, wps=5888.2, ups=0.09, wpb=64886, bsz=128, num_updates=16136, lr=9.98789e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=186015 2021-06-20 22:19:12 | INFO | train_inner | epoch 006: 1221 / 3002 loss=2.523, ppl=5.75, wps=5678.9, ups=0.09, wpb=64847, bsz=128, num_updates=16137, lr=9.98789e-05, gnorm=1.886, loss_scale=16, train_wall=11, gb_free=2.8, wall=186027 2021-06-20 22:19:24 | INFO | train_inner | epoch 006: 1222 / 3002 loss=2.497, ppl=5.65, wps=5729.6, ups=0.09, wpb=64784, bsz=128, num_updates=16138, lr=9.98789e-05, gnorm=1.953, loss_scale=16, train_wall=11, gb_free=2.8, wall=186038 2021-06-20 22:19:34 | INFO | train_inner | epoch 006: 1223 / 3002 loss=2.435, ppl=5.41, wps=5956.1, ups=0.09, wpb=64857, bsz=128, num_updates=16139, lr=9.98789e-05, gnorm=1.955, loss_scale=16, train_wall=10, gb_free=2.8, wall=186049 2021-06-20 22:19:46 | INFO | train_inner | epoch 006: 1224 / 3002 loss=2.348, ppl=5.09, wps=5772.9, ups=0.09, wpb=64860, bsz=128, num_updates=16140, lr=9.98789e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=186060 2021-06-20 22:19:57 | INFO | train_inner | epoch 006: 1225 / 3002 loss=2.501, ppl=5.66, wps=5836.1, ups=0.09, wpb=64789, bsz=128, num_updates=16141, lr=9.98789e-05, gnorm=1.889, loss_scale=16, train_wall=11, gb_free=2.8, wall=186071 2021-06-20 22:20:08 | INFO | train_inner | epoch 006: 1226 / 3002 loss=2.444, ppl=5.44, wps=5848.8, ups=0.09, wpb=64847, bsz=128, num_updates=16142, lr=9.98789e-05, gnorm=1.867, loss_scale=16, train_wall=11, gb_free=2.8, wall=186082 2021-06-20 22:20:19 | INFO | train_inner | epoch 006: 1227 / 3002 loss=2.458, ppl=5.5, wps=5880.6, ups=0.09, wpb=64891, bsz=128, num_updates=16143, lr=9.98788e-05, gnorm=1.983, loss_scale=16, train_wall=11, gb_free=2.8, wall=186093 2021-06-20 22:20:30 | INFO | train_inner | epoch 006: 1228 / 3002 loss=2.398, ppl=5.27, wps=5839.9, ups=0.09, wpb=64815, bsz=128, num_updates=16144, lr=9.98788e-05, gnorm=1.914, loss_scale=16, train_wall=11, gb_free=2.8, wall=186104 2021-06-20 22:20:41 | INFO | train_inner | epoch 006: 1229 / 3002 loss=2.559, ppl=5.89, wps=5818.1, ups=0.09, wpb=64790, bsz=128, num_updates=16145, lr=9.98788e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=186115 2021-06-20 22:20:52 | INFO | train_inner | epoch 006: 1230 / 3002 loss=2.46, ppl=5.5, wps=5928.1, ups=0.09, wpb=64848, bsz=128, num_updates=16146, lr=9.98788e-05, gnorm=1.951, loss_scale=16, train_wall=10, gb_free=2.8, wall=186126 2021-06-20 22:21:03 | INFO | train_inner | epoch 006: 1231 / 3002 loss=2.501, ppl=5.66, wps=5896.5, ups=0.09, wpb=64754, bsz=128, num_updates=16147, lr=9.98788e-05, gnorm=1.912, loss_scale=16, train_wall=11, gb_free=2.8, wall=186137 2021-06-20 22:21:14 | INFO | train_inner | epoch 006: 1232 / 3002 loss=2.702, ppl=6.51, wps=5869.7, ups=0.09, wpb=64862, bsz=128, num_updates=16148, lr=9.98788e-05, gnorm=2.01, loss_scale=16, train_wall=11, gb_free=2.8, wall=186148 2021-06-20 22:21:25 | INFO | train_inner | epoch 006: 1233 / 3002 loss=2.487, ppl=5.61, wps=5939.7, ups=0.09, wpb=64826, bsz=128, num_updates=16149, lr=9.98788e-05, gnorm=1.908, loss_scale=16, train_wall=10, gb_free=2.8, wall=186159 2021-06-20 22:21:36 | INFO | train_inner | epoch 006: 1234 / 3002 loss=2.446, ppl=5.45, wps=5843.2, ups=0.09, wpb=64856, bsz=128, num_updates=16150, lr=9.98788e-05, gnorm=2.075, loss_scale=16, train_wall=11, gb_free=2.8, wall=186170 2021-06-20 22:21:47 | INFO | train_inner | epoch 006: 1235 / 3002 loss=2.519, ppl=5.73, wps=5934.1, ups=0.09, wpb=64865, bsz=128, num_updates=16151, lr=9.98788e-05, gnorm=1.924, loss_scale=16, train_wall=10, gb_free=2.8, wall=186181 2021-06-20 22:21:58 | INFO | train_inner | epoch 006: 1236 / 3002 loss=2.425, ppl=5.37, wps=5982, ups=0.09, wpb=64789, bsz=128, num_updates=16152, lr=9.98788e-05, gnorm=1.915, loss_scale=16, train_wall=10, gb_free=2.8, wall=186192 2021-06-20 22:22:09 | INFO | train_inner | epoch 006: 1237 / 3002 loss=2.477, ppl=5.57, wps=5875.4, ups=0.09, wpb=64866, bsz=128, num_updates=16153, lr=9.98788e-05, gnorm=1.874, loss_scale=16, train_wall=11, gb_free=2.8, wall=186203 2021-06-20 22:22:20 | INFO | train_inner | epoch 006: 1238 / 3002 loss=2.444, ppl=5.44, wps=5935.1, ups=0.09, wpb=64924, bsz=128, num_updates=16154, lr=9.98788e-05, gnorm=1.92, loss_scale=16, train_wall=10, gb_free=2.8, wall=186214 2021-06-20 22:22:31 | INFO | train_inner | epoch 006: 1239 / 3002 loss=2.399, ppl=5.27, wps=5883.1, ups=0.09, wpb=64789, bsz=128, num_updates=16155, lr=9.98788e-05, gnorm=1.838, loss_scale=16, train_wall=11, gb_free=2.8, wall=186225 2021-06-20 22:22:42 | INFO | train_inner | epoch 006: 1240 / 3002 loss=2.362, ppl=5.14, wps=5781.6, ups=0.09, wpb=64827, bsz=128, num_updates=16156, lr=9.98787e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=186236 2021-06-20 22:22:53 | INFO | train_inner | epoch 006: 1241 / 3002 loss=2.367, ppl=5.16, wps=5926, ups=0.09, wpb=64814, bsz=128, num_updates=16157, lr=9.98787e-05, gnorm=1.851, loss_scale=16, train_wall=10, gb_free=2.8, wall=186247 2021-06-20 22:23:04 | INFO | train_inner | epoch 006: 1242 / 3002 loss=2.482, ppl=5.59, wps=5714.7, ups=0.09, wpb=64740, bsz=128, num_updates=16158, lr=9.98787e-05, gnorm=1.837, loss_scale=16, train_wall=11, gb_free=2.8, wall=186259 2021-06-20 22:23:15 | INFO | train_inner | epoch 006: 1243 / 3002 loss=2.385, ppl=5.22, wps=5890.9, ups=0.09, wpb=64864, bsz=128, num_updates=16159, lr=9.98787e-05, gnorm=2.032, loss_scale=16, train_wall=11, gb_free=2.8, wall=186270 2021-06-20 22:23:26 | INFO | train_inner | epoch 006: 1244 / 3002 loss=2.446, ppl=5.45, wps=5921.7, ups=0.09, wpb=64847, bsz=128, num_updates=16160, lr=9.98787e-05, gnorm=1.881, loss_scale=16, train_wall=10, gb_free=2.8, wall=186281 2021-06-20 22:23:37 | INFO | train_inner | epoch 006: 1245 / 3002 loss=2.344, ppl=5.08, wps=5894.3, ups=0.09, wpb=64868, bsz=128, num_updates=16161, lr=9.98787e-05, gnorm=1.851, loss_scale=16, train_wall=11, gb_free=2.8, wall=186292 2021-06-20 22:23:49 | INFO | train_inner | epoch 006: 1246 / 3002 loss=2.579, ppl=5.98, wps=5759.5, ups=0.09, wpb=64867, bsz=128, num_updates=16162, lr=9.98787e-05, gnorm=2.018, loss_scale=16, train_wall=11, gb_free=2.8, wall=186303 2021-06-20 22:24:00 | INFO | train_inner | epoch 006: 1247 / 3002 loss=2.584, ppl=6, wps=5862, ups=0.09, wpb=64782, bsz=128, num_updates=16163, lr=9.98787e-05, gnorm=2.005, loss_scale=16, train_wall=11, gb_free=2.8, wall=186314 2021-06-20 22:24:11 | INFO | train_inner | epoch 006: 1248 / 3002 loss=2.48, ppl=5.58, wps=5818.5, ups=0.09, wpb=64827, bsz=128, num_updates=16164, lr=9.98787e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=186325 2021-06-20 22:24:22 | INFO | train_inner | epoch 006: 1249 / 3002 loss=2.345, ppl=5.08, wps=5816.9, ups=0.09, wpb=64836, bsz=128, num_updates=16165, lr=9.98787e-05, gnorm=1.878, loss_scale=16, train_wall=11, gb_free=2.8, wall=186336 2021-06-20 22:24:33 | INFO | train_inner | epoch 006: 1250 / 3002 loss=2.418, ppl=5.34, wps=5834.8, ups=0.09, wpb=64835, bsz=128, num_updates=16166, lr=9.98787e-05, gnorm=1.895, loss_scale=16, train_wall=11, gb_free=2.8, wall=186347 2021-06-20 22:24:44 | INFO | train_inner | epoch 006: 1251 / 3002 loss=2.521, ppl=5.74, wps=5781.7, ups=0.09, wpb=64830, bsz=128, num_updates=16167, lr=9.98787e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=186359 2021-06-20 22:24:55 | INFO | train_inner | epoch 006: 1252 / 3002 loss=2.382, ppl=5.21, wps=5894.3, ups=0.09, wpb=64898, bsz=128, num_updates=16168, lr=9.98786e-05, gnorm=1.937, loss_scale=16, train_wall=11, gb_free=2.8, wall=186370 2021-06-20 22:25:06 | INFO | train_inner | epoch 006: 1253 / 3002 loss=2.491, ppl=5.62, wps=5983, ups=0.09, wpb=64818, bsz=128, num_updates=16169, lr=9.98786e-05, gnorm=1.948, loss_scale=16, train_wall=10, gb_free=2.8, wall=186380 2021-06-20 22:25:17 | INFO | train_inner | epoch 006: 1254 / 3002 loss=2.523, ppl=5.75, wps=5887.7, ups=0.09, wpb=64867, bsz=128, num_updates=16170, lr=9.98786e-05, gnorm=1.948, loss_scale=16, train_wall=11, gb_free=2.8, wall=186391 2021-06-20 22:25:28 | INFO | train_inner | epoch 006: 1255 / 3002 loss=2.231, ppl=4.69, wps=6011.8, ups=0.09, wpb=64841, bsz=128, num_updates=16171, lr=9.98786e-05, gnorm=1.85, loss_scale=16, train_wall=10, gb_free=2.8, wall=186402 2021-06-20 22:25:39 | INFO | train_inner | epoch 006: 1256 / 3002 loss=2.47, ppl=5.54, wps=5826.4, ups=0.09, wpb=64868, bsz=128, num_updates=16172, lr=9.98786e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=186413 2021-06-20 22:25:50 | INFO | train_inner | epoch 006: 1257 / 3002 loss=2.396, ppl=5.26, wps=5864.3, ups=0.09, wpb=64868, bsz=128, num_updates=16173, lr=9.98786e-05, gnorm=1.836, loss_scale=16, train_wall=11, gb_free=2.8, wall=186424 2021-06-20 22:26:01 | INFO | train_inner | epoch 006: 1258 / 3002 loss=2.436, ppl=5.41, wps=5878.7, ups=0.09, wpb=64820, bsz=128, num_updates=16174, lr=9.98786e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=186435 2021-06-20 22:26:12 | INFO | train_inner | epoch 006: 1259 / 3002 loss=2.459, ppl=5.5, wps=5794.8, ups=0.09, wpb=64766, bsz=128, num_updates=16175, lr=9.98786e-05, gnorm=2.395, loss_scale=16, train_wall=11, gb_free=2.8, wall=186447 2021-06-20 22:26:23 | INFO | train_inner | epoch 006: 1260 / 3002 loss=2.435, ppl=5.41, wps=5945, ups=0.09, wpb=64851, bsz=128, num_updates=16176, lr=9.98786e-05, gnorm=1.879, loss_scale=16, train_wall=10, gb_free=2.8, wall=186458 2021-06-20 22:26:34 | INFO | train_inner | epoch 006: 1261 / 3002 loss=2.517, ppl=5.72, wps=5905, ups=0.09, wpb=64842, bsz=128, num_updates=16177, lr=9.98786e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=186469 2021-06-20 22:26:45 | INFO | train_inner | epoch 006: 1262 / 3002 loss=2.488, ppl=5.61, wps=5916.7, ups=0.09, wpb=64792, bsz=128, num_updates=16178, lr=9.98786e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=186479 2021-06-20 22:26:56 | INFO | train_inner | epoch 006: 1263 / 3002 loss=2.481, ppl=5.58, wps=5831.7, ups=0.09, wpb=64863, bsz=128, num_updates=16179, lr=9.98786e-05, gnorm=1.936, loss_scale=16, train_wall=11, gb_free=2.8, wall=186491 2021-06-20 22:27:07 | INFO | train_inner | epoch 006: 1264 / 3002 loss=2.382, ppl=5.21, wps=5834.9, ups=0.09, wpb=64802, bsz=128, num_updates=16180, lr=9.98786e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=186502 2021-06-20 22:27:19 | INFO | train_inner | epoch 006: 1265 / 3002 loss=2.377, ppl=5.2, wps=5794, ups=0.09, wpb=64862, bsz=128, num_updates=16181, lr=9.98785e-05, gnorm=1.836, loss_scale=16, train_wall=11, gb_free=2.8, wall=186513 2021-06-20 22:27:30 | INFO | train_inner | epoch 006: 1266 / 3002 loss=2.38, ppl=5.2, wps=5896.1, ups=0.09, wpb=64911, bsz=128, num_updates=16182, lr=9.98785e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=186524 2021-06-20 22:27:41 | INFO | train_inner | epoch 006: 1267 / 3002 loss=2.401, ppl=5.28, wps=5850.7, ups=0.09, wpb=64844, bsz=128, num_updates=16183, lr=9.98785e-05, gnorm=1.893, loss_scale=16, train_wall=11, gb_free=2.8, wall=186535 2021-06-20 22:27:52 | INFO | train_inner | epoch 006: 1268 / 3002 loss=2.486, ppl=5.6, wps=5939.7, ups=0.09, wpb=64846, bsz=128, num_updates=16184, lr=9.98785e-05, gnorm=1.931, loss_scale=16, train_wall=10, gb_free=2.8, wall=186546 2021-06-20 22:28:03 | INFO | train_inner | epoch 006: 1269 / 3002 loss=2.529, ppl=5.77, wps=5853.1, ups=0.09, wpb=64824, bsz=128, num_updates=16185, lr=9.98785e-05, gnorm=2.523, loss_scale=16, train_wall=11, gb_free=2.8, wall=186557 2021-06-20 22:28:14 | INFO | train_inner | epoch 006: 1270 / 3002 loss=2.556, ppl=5.88, wps=5818.7, ups=0.09, wpb=64804, bsz=128, num_updates=16186, lr=9.98785e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=186568 2021-06-20 22:28:25 | INFO | train_inner | epoch 006: 1271 / 3002 loss=2.521, ppl=5.74, wps=5751, ups=0.09, wpb=64832, bsz=128, num_updates=16187, lr=9.98785e-05, gnorm=2.032, loss_scale=16, train_wall=11, gb_free=2.8, wall=186579 2021-06-20 22:28:36 | INFO | train_inner | epoch 006: 1272 / 3002 loss=2.514, ppl=5.71, wps=5987.7, ups=0.09, wpb=64873, bsz=128, num_updates=16188, lr=9.98785e-05, gnorm=1.931, loss_scale=16, train_wall=10, gb_free=2.8, wall=186590 2021-06-20 22:28:47 | INFO | train_inner | epoch 006: 1273 / 3002 loss=2.346, ppl=5.08, wps=5890.2, ups=0.09, wpb=64877, bsz=128, num_updates=16189, lr=9.98785e-05, gnorm=1.826, loss_scale=16, train_wall=11, gb_free=2.8, wall=186601 2021-06-20 22:28:58 | INFO | train_inner | epoch 006: 1274 / 3002 loss=2.544, ppl=5.83, wps=5912.3, ups=0.09, wpb=64865, bsz=128, num_updates=16190, lr=9.98785e-05, gnorm=1.892, loss_scale=16, train_wall=11, gb_free=2.8, wall=186612 2021-06-20 22:29:09 | INFO | train_inner | epoch 006: 1275 / 3002 loss=2.306, ppl=4.94, wps=5818.5, ups=0.09, wpb=64806, bsz=128, num_updates=16191, lr=9.98785e-05, gnorm=2.016, loss_scale=16, train_wall=11, gb_free=2.8, wall=186623 2021-06-20 22:29:20 | INFO | train_inner | epoch 006: 1276 / 3002 loss=2.481, ppl=5.58, wps=5946.7, ups=0.09, wpb=64899, bsz=128, num_updates=16192, lr=9.98785e-05, gnorm=2.642, loss_scale=16, train_wall=10, gb_free=2.8, wall=186634 2021-06-20 22:29:31 | INFO | train_inner | epoch 006: 1277 / 3002 loss=2.396, ppl=5.27, wps=5814.1, ups=0.09, wpb=64858, bsz=128, num_updates=16193, lr=9.98784e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=186645 2021-06-20 22:29:42 | INFO | train_inner | epoch 006: 1278 / 3002 loss=2.393, ppl=5.25, wps=5797.2, ups=0.09, wpb=64825, bsz=128, num_updates=16194, lr=9.98784e-05, gnorm=2.1, loss_scale=16, train_wall=11, gb_free=2.8, wall=186657 2021-06-20 22:29:53 | INFO | train_inner | epoch 006: 1279 / 3002 loss=2.485, ppl=5.6, wps=5892.1, ups=0.09, wpb=64811, bsz=128, num_updates=16195, lr=9.98784e-05, gnorm=1.983, loss_scale=16, train_wall=11, gb_free=2.8, wall=186668 2021-06-20 22:30:04 | INFO | train_inner | epoch 006: 1280 / 3002 loss=2.534, ppl=5.79, wps=5778.5, ups=0.09, wpb=64842, bsz=128, num_updates=16196, lr=9.98784e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=186679 2021-06-20 22:30:16 | INFO | train_inner | epoch 006: 1281 / 3002 loss=2.577, ppl=5.97, wps=5816.3, ups=0.09, wpb=64811, bsz=128, num_updates=16197, lr=9.98784e-05, gnorm=1.968, loss_scale=16, train_wall=11, gb_free=2.8, wall=186690 2021-06-20 22:30:27 | INFO | train_inner | epoch 006: 1282 / 3002 loss=2.43, ppl=5.39, wps=5860.8, ups=0.09, wpb=64827, bsz=128, num_updates=16198, lr=9.98784e-05, gnorm=1.867, loss_scale=16, train_wall=11, gb_free=2.8, wall=186701 2021-06-20 22:30:38 | INFO | train_inner | epoch 006: 1283 / 3002 loss=2.536, ppl=5.8, wps=5921, ups=0.09, wpb=64902, bsz=128, num_updates=16199, lr=9.98784e-05, gnorm=2.479, loss_scale=16, train_wall=11, gb_free=2.8, wall=186712 2021-06-20 22:30:49 | INFO | train_inner | epoch 006: 1284 / 3002 loss=2.473, ppl=5.55, wps=5901.9, ups=0.09, wpb=64846, bsz=128, num_updates=16200, lr=9.98784e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=186723 2021-06-20 22:31:00 | INFO | train_inner | epoch 006: 1285 / 3002 loss=2.418, ppl=5.35, wps=5831.6, ups=0.09, wpb=64764, bsz=128, num_updates=16201, lr=9.98784e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=186734 2021-06-20 22:31:11 | INFO | train_inner | epoch 006: 1286 / 3002 loss=2.669, ppl=6.36, wps=5849.3, ups=0.09, wpb=64799, bsz=128, num_updates=16202, lr=9.98784e-05, gnorm=1.926, loss_scale=16, train_wall=11, gb_free=2.8, wall=186745 2021-06-20 22:31:22 | INFO | train_inner | epoch 006: 1287 / 3002 loss=2.521, ppl=5.74, wps=6021.6, ups=0.09, wpb=64882, bsz=128, num_updates=16203, lr=9.98784e-05, gnorm=1.929, loss_scale=16, train_wall=10, gb_free=2.8, wall=186756 2021-06-20 22:31:33 | INFO | train_inner | epoch 006: 1288 / 3002 loss=2.531, ppl=5.78, wps=5887.6, ups=0.09, wpb=64867, bsz=128, num_updates=16204, lr=9.98784e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=186767 2021-06-20 22:31:44 | INFO | train_inner | epoch 006: 1289 / 3002 loss=2.395, ppl=5.26, wps=5754.3, ups=0.09, wpb=64734, bsz=128, num_updates=16205, lr=9.98784e-05, gnorm=2.186, loss_scale=16, train_wall=11, gb_free=2.8, wall=186778 2021-06-20 22:31:55 | INFO | train_inner | epoch 006: 1290 / 3002 loss=2.507, ppl=5.68, wps=5861.9, ups=0.09, wpb=64775, bsz=128, num_updates=16206, lr=9.98783e-05, gnorm=2.033, loss_scale=16, train_wall=11, gb_free=2.8, wall=186789 2021-06-20 22:32:06 | INFO | train_inner | epoch 006: 1291 / 3002 loss=2.391, ppl=5.24, wps=5822.7, ups=0.09, wpb=64809, bsz=128, num_updates=16207, lr=9.98783e-05, gnorm=1.906, loss_scale=16, train_wall=11, gb_free=2.8, wall=186800 2021-06-20 22:32:17 | INFO | train_inner | epoch 006: 1292 / 3002 loss=2.555, ppl=5.88, wps=5894.8, ups=0.09, wpb=64768, bsz=128, num_updates=16208, lr=9.98783e-05, gnorm=1.982, loss_scale=16, train_wall=11, gb_free=2.8, wall=186811 2021-06-20 22:32:28 | INFO | train_inner | epoch 006: 1293 / 3002 loss=2.378, ppl=5.2, wps=5785.8, ups=0.09, wpb=64784, bsz=128, num_updates=16209, lr=9.98783e-05, gnorm=1.899, loss_scale=16, train_wall=11, gb_free=2.8, wall=186823 2021-06-20 22:32:39 | INFO | train_inner | epoch 006: 1294 / 3002 loss=2.464, ppl=5.52, wps=5826.9, ups=0.09, wpb=64832, bsz=128, num_updates=16210, lr=9.98783e-05, gnorm=2.109, loss_scale=16, train_wall=11, gb_free=2.8, wall=186834 2021-06-20 22:32:50 | INFO | train_inner | epoch 006: 1295 / 3002 loss=2.433, ppl=5.4, wps=5901.9, ups=0.09, wpb=64801, bsz=128, num_updates=16211, lr=9.98783e-05, gnorm=1.98, loss_scale=16, train_wall=10, gb_free=2.8, wall=186845 2021-06-20 22:33:02 | INFO | train_inner | epoch 006: 1296 / 3002 loss=2.449, ppl=5.46, wps=5777.4, ups=0.09, wpb=64815, bsz=128, num_updates=16212, lr=9.98783e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=186856 2021-06-20 22:33:13 | INFO | train_inner | epoch 006: 1297 / 3002 loss=2.525, ppl=5.76, wps=5849.3, ups=0.09, wpb=64787, bsz=128, num_updates=16213, lr=9.98783e-05, gnorm=1.918, loss_scale=16, train_wall=11, gb_free=2.8, wall=186867 2021-06-20 22:33:24 | INFO | train_inner | epoch 006: 1298 / 3002 loss=2.266, ppl=4.81, wps=5957.9, ups=0.09, wpb=64821, bsz=128, num_updates=16214, lr=9.98783e-05, gnorm=1.904, loss_scale=16, train_wall=10, gb_free=2.8, wall=186878 2021-06-20 22:33:34 | INFO | train_inner | epoch 006: 1299 / 3002 loss=2.577, ppl=5.97, wps=5970.1, ups=0.09, wpb=64838, bsz=128, num_updates=16215, lr=9.98783e-05, gnorm=1.976, loss_scale=16, train_wall=10, gb_free=2.8, wall=186889 2021-06-20 22:33:45 | INFO | train_inner | epoch 006: 1300 / 3002 loss=2.48, ppl=5.58, wps=5882.4, ups=0.09, wpb=64899, bsz=128, num_updates=16216, lr=9.98783e-05, gnorm=1.831, loss_scale=16, train_wall=11, gb_free=2.8, wall=186900 2021-06-20 22:33:56 | INFO | train_inner | epoch 006: 1301 / 3002 loss=2.594, ppl=6.04, wps=5868.9, ups=0.09, wpb=64872, bsz=128, num_updates=16217, lr=9.98783e-05, gnorm=1.997, loss_scale=16, train_wall=11, gb_free=2.8, wall=186911 2021-06-20 22:34:08 | INFO | train_inner | epoch 006: 1302 / 3002 loss=2.531, ppl=5.78, wps=5807.9, ups=0.09, wpb=64866, bsz=128, num_updates=16218, lr=9.98782e-05, gnorm=1.942, loss_scale=16, train_wall=11, gb_free=2.8, wall=186922 2021-06-20 22:34:19 | INFO | train_inner | epoch 006: 1303 / 3002 loss=2.508, ppl=5.69, wps=5886.2, ups=0.09, wpb=64822, bsz=128, num_updates=16219, lr=9.98782e-05, gnorm=1.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=186933 2021-06-20 22:34:30 | INFO | train_inner | epoch 006: 1304 / 3002 loss=2.547, ppl=5.85, wps=5757.8, ups=0.09, wpb=64728, bsz=128, num_updates=16220, lr=9.98782e-05, gnorm=2.01, loss_scale=16, train_wall=11, gb_free=2.8, wall=186944 2021-06-20 22:34:41 | INFO | train_inner | epoch 006: 1305 / 3002 loss=2.394, ppl=5.26, wps=5781.6, ups=0.09, wpb=64894, bsz=128, num_updates=16221, lr=9.98782e-05, gnorm=1.918, loss_scale=16, train_wall=11, gb_free=2.8, wall=186955 2021-06-20 22:34:52 | INFO | train_inner | epoch 006: 1306 / 3002 loss=2.482, ppl=5.58, wps=5812.4, ups=0.09, wpb=64852, bsz=128, num_updates=16222, lr=9.98782e-05, gnorm=2.896, loss_scale=16, train_wall=11, gb_free=2.8, wall=186967 2021-06-20 22:35:03 | INFO | train_inner | epoch 006: 1307 / 3002 loss=2.442, ppl=5.43, wps=5857.2, ups=0.09, wpb=64756, bsz=128, num_updates=16223, lr=9.98782e-05, gnorm=1.919, loss_scale=16, train_wall=11, gb_free=2.8, wall=186978 2021-06-20 22:35:14 | INFO | train_inner | epoch 006: 1308 / 3002 loss=2.568, ppl=5.93, wps=5921, ups=0.09, wpb=64791, bsz=128, num_updates=16224, lr=9.98782e-05, gnorm=2.104, loss_scale=16, train_wall=11, gb_free=2.8, wall=186989 2021-06-20 22:35:26 | INFO | train_inner | epoch 006: 1309 / 3002 loss=2.459, ppl=5.5, wps=5732.8, ups=0.09, wpb=64821, bsz=128, num_updates=16225, lr=9.98782e-05, gnorm=1.983, loss_scale=16, train_wall=11, gb_free=2.8, wall=187000 2021-06-20 22:35:36 | INFO | train_inner | epoch 006: 1310 / 3002 loss=2.48, ppl=5.58, wps=5973.3, ups=0.09, wpb=64881, bsz=128, num_updates=16226, lr=9.98782e-05, gnorm=1.94, loss_scale=16, train_wall=10, gb_free=2.8, wall=187011 2021-06-20 22:35:48 | INFO | train_inner | epoch 006: 1311 / 3002 loss=2.386, ppl=5.23, wps=5740.9, ups=0.09, wpb=64881, bsz=128, num_updates=16227, lr=9.98782e-05, gnorm=1.858, loss_scale=16, train_wall=11, gb_free=2.8, wall=187022 2021-06-20 22:35:59 | INFO | train_inner | epoch 006: 1312 / 3002 loss=2.495, ppl=5.64, wps=5989.3, ups=0.09, wpb=64751, bsz=128, num_updates=16228, lr=9.98782e-05, gnorm=2.156, loss_scale=16, train_wall=10, gb_free=2.8, wall=187033 2021-06-20 22:36:10 | INFO | train_inner | epoch 006: 1313 / 3002 loss=2.572, ppl=5.95, wps=5847.5, ups=0.09, wpb=64811, bsz=128, num_updates=16229, lr=9.98782e-05, gnorm=2.825, loss_scale=16, train_wall=11, gb_free=2.8, wall=187044 2021-06-20 22:36:21 | INFO | train_inner | epoch 006: 1314 / 3002 loss=2.359, ppl=5.13, wps=5821.2, ups=0.09, wpb=64831, bsz=128, num_updates=16230, lr=9.98782e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=187055 2021-06-20 22:36:32 | INFO | train_inner | epoch 006: 1315 / 3002 loss=2.448, ppl=5.46, wps=5762.8, ups=0.09, wpb=64836, bsz=128, num_updates=16231, lr=9.98781e-05, gnorm=2.219, loss_scale=16, train_wall=11, gb_free=2.8, wall=187066 2021-06-20 22:36:43 | INFO | train_inner | epoch 006: 1316 / 3002 loss=2.482, ppl=5.59, wps=5738.1, ups=0.09, wpb=64814, bsz=128, num_updates=16232, lr=9.98781e-05, gnorm=1.931, loss_scale=32, train_wall=11, gb_free=2.8, wall=187078 2021-06-20 22:36:54 | INFO | train_inner | epoch 006: 1317 / 3002 loss=2.473, ppl=5.55, wps=5835.5, ups=0.09, wpb=64838, bsz=128, num_updates=16233, lr=9.98781e-05, gnorm=1.92, loss_scale=32, train_wall=11, gb_free=2.8, wall=187089 2021-06-20 22:37:05 | INFO | train_inner | epoch 006: 1318 / 3002 loss=2.43, ppl=5.39, wps=5892.2, ups=0.09, wpb=64895, bsz=128, num_updates=16234, lr=9.98781e-05, gnorm=1.957, loss_scale=32, train_wall=11, gb_free=2.8, wall=187100 2021-06-20 22:37:17 | INFO | train_inner | epoch 006: 1319 / 3002 loss=2.48, ppl=5.58, wps=5839.6, ups=0.09, wpb=64872, bsz=128, num_updates=16235, lr=9.98781e-05, gnorm=1.914, loss_scale=32, train_wall=11, gb_free=2.8, wall=187111 2021-06-20 22:37:27 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 2021-06-20 22:37:38 | INFO | train_inner | epoch 006: 1321 / 3002 loss=2.543, ppl=5.83, wps=2974.7, ups=0.05, wpb=64737, bsz=128, num_updates=16236, lr=9.98781e-05, gnorm=1.903, loss_scale=16, train_wall=21, gb_free=2.8, wall=187133 2021-06-20 22:37:49 | INFO | train_inner | epoch 006: 1322 / 3002 loss=2.435, ppl=5.41, wps=5908.5, ups=0.09, wpb=64860, bsz=128, num_updates=16237, lr=9.98781e-05, gnorm=1.895, loss_scale=16, train_wall=11, gb_free=2.8, wall=187144 2021-06-20 22:38:00 | INFO | train_inner | epoch 006: 1323 / 3002 loss=2.545, ppl=5.84, wps=5971.3, ups=0.09, wpb=64791, bsz=128, num_updates=16238, lr=9.98781e-05, gnorm=1.988, loss_scale=16, train_wall=10, gb_free=2.8, wall=187154 2021-06-20 22:38:11 | INFO | train_inner | epoch 006: 1324 / 3002 loss=2.395, ppl=5.26, wps=5872.8, ups=0.09, wpb=64829, bsz=128, num_updates=16239, lr=9.98781e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=187166 2021-06-20 22:38:22 | INFO | train_inner | epoch 006: 1325 / 3002 loss=2.393, ppl=5.25, wps=5823.7, ups=0.09, wpb=64778, bsz=128, num_updates=16240, lr=9.98781e-05, gnorm=2.048, loss_scale=16, train_wall=11, gb_free=2.8, wall=187177 2021-06-20 22:38:33 | INFO | train_inner | epoch 006: 1326 / 3002 loss=2.63, ppl=6.19, wps=5870.3, ups=0.09, wpb=64839, bsz=128, num_updates=16241, lr=9.98781e-05, gnorm=2.042, loss_scale=16, train_wall=11, gb_free=2.8, wall=187188 2021-06-20 22:38:44 | INFO | train_inner | epoch 006: 1327 / 3002 loss=2.478, ppl=5.57, wps=6029.4, ups=0.09, wpb=64836, bsz=128, num_updates=16242, lr=9.98781e-05, gnorm=1.895, loss_scale=16, train_wall=10, gb_free=2.8, wall=187198 2021-06-20 22:38:55 | INFO | train_inner | epoch 006: 1328 / 3002 loss=2.545, ppl=5.84, wps=5794.1, ups=0.09, wpb=64799, bsz=128, num_updates=16243, lr=9.9878e-05, gnorm=1.993, loss_scale=16, train_wall=11, gb_free=2.8, wall=187210 2021-06-20 22:39:06 | INFO | train_inner | epoch 006: 1329 / 3002 loss=2.48, ppl=5.58, wps=5907.7, ups=0.09, wpb=64888, bsz=128, num_updates=16244, lr=9.9878e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=187221 2021-06-20 22:39:17 | INFO | train_inner | epoch 006: 1330 / 3002 loss=2.336, ppl=5.05, wps=5845.7, ups=0.09, wpb=64840, bsz=128, num_updates=16245, lr=9.9878e-05, gnorm=1.88, loss_scale=16, train_wall=11, gb_free=2.8, wall=187232 2021-06-20 22:39:28 | INFO | train_inner | epoch 006: 1331 / 3002 loss=2.445, ppl=5.45, wps=5821.8, ups=0.09, wpb=64728, bsz=128, num_updates=16246, lr=9.9878e-05, gnorm=2.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=187243 2021-06-20 22:39:40 | INFO | train_inner | epoch 006: 1332 / 3002 loss=2.403, ppl=5.29, wps=5806, ups=0.09, wpb=64835, bsz=128, num_updates=16247, lr=9.9878e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=187254 2021-06-20 22:39:51 | INFO | train_inner | epoch 006: 1333 / 3002 loss=2.586, ppl=6, wps=5761.8, ups=0.09, wpb=64815, bsz=128, num_updates=16248, lr=9.9878e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=187265 2021-06-20 22:40:02 | INFO | train_inner | epoch 006: 1334 / 3002 loss=2.458, ppl=5.49, wps=5774.4, ups=0.09, wpb=64836, bsz=128, num_updates=16249, lr=9.9878e-05, gnorm=2.34, loss_scale=16, train_wall=11, gb_free=2.8, wall=187276 2021-06-20 22:40:13 | INFO | train_inner | epoch 006: 1335 / 3002 loss=2.376, ppl=5.19, wps=5765.3, ups=0.09, wpb=64745, bsz=128, num_updates=16250, lr=9.9878e-05, gnorm=2.255, loss_scale=16, train_wall=11, gb_free=2.8, wall=187288 2021-06-20 22:40:24 | INFO | train_inner | epoch 006: 1336 / 3002 loss=2.403, ppl=5.29, wps=5931.5, ups=0.09, wpb=64867, bsz=128, num_updates=16251, lr=9.9878e-05, gnorm=2.649, loss_scale=16, train_wall=10, gb_free=2.8, wall=187299 2021-06-20 22:40:35 | INFO | train_inner | epoch 006: 1337 / 3002 loss=2.573, ppl=5.95, wps=5797.1, ups=0.09, wpb=64814, bsz=128, num_updates=16252, lr=9.9878e-05, gnorm=1.946, loss_scale=16, train_wall=11, gb_free=2.8, wall=187310 2021-06-20 22:40:47 | INFO | train_inner | epoch 006: 1338 / 3002 loss=2.601, ppl=6.07, wps=5827.2, ups=0.09, wpb=64868, bsz=128, num_updates=16253, lr=9.9878e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=187321 2021-06-20 22:40:57 | INFO | train_inner | epoch 006: 1339 / 3002 loss=2.548, ppl=5.85, wps=6034.3, ups=0.09, wpb=64837, bsz=128, num_updates=16254, lr=9.9878e-05, gnorm=2.022, loss_scale=16, train_wall=10, gb_free=2.8, wall=187332 2021-06-20 22:41:08 | INFO | train_inner | epoch 006: 1340 / 3002 loss=2.395, ppl=5.26, wps=5915.4, ups=0.09, wpb=64852, bsz=128, num_updates=16255, lr=9.9878e-05, gnorm=1.861, loss_scale=16, train_wall=11, gb_free=2.8, wall=187343 2021-06-20 22:41:19 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 22:41:30 | INFO | train_inner | epoch 006: 1342 / 3002 loss=2.535, ppl=5.79, wps=2947.2, ups=0.05, wpb=64843, bsz=128, num_updates=16256, lr=9.98779e-05, gnorm=1.924, loss_scale=8, train_wall=21, gb_free=2.8, wall=187365 2021-06-20 22:41:41 | INFO | train_inner | epoch 006: 1343 / 3002 loss=2.445, ppl=5.45, wps=5888.5, ups=0.09, wpb=64841, bsz=128, num_updates=16257, lr=9.98779e-05, gnorm=1.902, loss_scale=8, train_wall=11, gb_free=2.8, wall=187376 2021-06-20 22:41:52 | INFO | train_inner | epoch 006: 1344 / 3002 loss=2.468, ppl=5.53, wps=5917.6, ups=0.09, wpb=64900, bsz=128, num_updates=16258, lr=9.98779e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=187387 2021-06-20 22:42:03 | INFO | train_inner | epoch 006: 1345 / 3002 loss=2.343, ppl=5.08, wps=5936.4, ups=0.09, wpb=64894, bsz=128, num_updates=16259, lr=9.98779e-05, gnorm=1.849, loss_scale=8, train_wall=10, gb_free=2.8, wall=187398 2021-06-20 22:42:14 | INFO | train_inner | epoch 006: 1346 / 3002 loss=2.545, ppl=5.83, wps=5794.1, ups=0.09, wpb=64811, bsz=128, num_updates=16260, lr=9.98779e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=187409 2021-06-20 22:42:26 | INFO | train_inner | epoch 006: 1347 / 3002 loss=2.297, ppl=4.91, wps=5825.6, ups=0.09, wpb=64813, bsz=128, num_updates=16261, lr=9.98779e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=187420 2021-06-20 22:42:36 | INFO | train_inner | epoch 006: 1348 / 3002 loss=2.456, ppl=5.49, wps=5925.2, ups=0.09, wpb=64829, bsz=128, num_updates=16262, lr=9.98779e-05, gnorm=1.897, loss_scale=8, train_wall=10, gb_free=2.8, wall=187431 2021-06-20 22:42:48 | INFO | train_inner | epoch 006: 1349 / 3002 loss=2.464, ppl=5.52, wps=5850.4, ups=0.09, wpb=64780, bsz=128, num_updates=16263, lr=9.98779e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=187442 2021-06-20 22:42:59 | INFO | train_inner | epoch 006: 1350 / 3002 loss=2.456, ppl=5.49, wps=5858, ups=0.09, wpb=64838, bsz=128, num_updates=16264, lr=9.98779e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=187453 2021-06-20 22:43:09 | INFO | train_inner | epoch 006: 1351 / 3002 loss=2.372, ppl=5.18, wps=6093.9, ups=0.09, wpb=64842, bsz=128, num_updates=16265, lr=9.98779e-05, gnorm=1.927, loss_scale=8, train_wall=10, gb_free=2.8, wall=187464 2021-06-20 22:43:20 | INFO | train_inner | epoch 006: 1352 / 3002 loss=2.464, ppl=5.52, wps=5811.2, ups=0.09, wpb=64864, bsz=128, num_updates=16266, lr=9.98779e-05, gnorm=1.882, loss_scale=8, train_wall=11, gb_free=2.8, wall=187475 2021-06-20 22:43:31 | INFO | train_inner | epoch 006: 1353 / 3002 loss=2.509, ppl=5.69, wps=5880.1, ups=0.09, wpb=64832, bsz=128, num_updates=16267, lr=9.98779e-05, gnorm=2.095, loss_scale=8, train_wall=11, gb_free=2.8, wall=187486 2021-06-20 22:43:43 | INFO | train_inner | epoch 006: 1354 / 3002 loss=2.591, ppl=6.03, wps=5823.1, ups=0.09, wpb=64821, bsz=128, num_updates=16268, lr=9.98778e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=187497 2021-06-20 22:43:54 | INFO | train_inner | epoch 006: 1355 / 3002 loss=2.432, ppl=5.4, wps=5943.5, ups=0.09, wpb=64899, bsz=128, num_updates=16269, lr=9.98778e-05, gnorm=3.677, loss_scale=8, train_wall=10, gb_free=2.8, wall=187508 2021-06-20 22:44:05 | INFO | train_inner | epoch 006: 1356 / 3002 loss=2.365, ppl=5.15, wps=5889.8, ups=0.09, wpb=64867, bsz=128, num_updates=16270, lr=9.98778e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=187519 2021-06-20 22:44:16 | INFO | train_inner | epoch 006: 1357 / 3002 loss=2.439, ppl=5.42, wps=5854.1, ups=0.09, wpb=64762, bsz=128, num_updates=16271, lr=9.98778e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=187530 2021-06-20 22:44:26 | INFO | train_inner | epoch 006: 1358 / 3002 loss=2.579, ppl=5.98, wps=5947, ups=0.09, wpb=64787, bsz=128, num_updates=16272, lr=9.98778e-05, gnorm=1.931, loss_scale=8, train_wall=10, gb_free=2.8, wall=187541 2021-06-20 22:44:38 | INFO | train_inner | epoch 006: 1359 / 3002 loss=2.439, ppl=5.42, wps=5829.3, ups=0.09, wpb=64866, bsz=128, num_updates=16273, lr=9.98778e-05, gnorm=1.857, loss_scale=8, train_wall=11, gb_free=2.8, wall=187552 2021-06-20 22:44:49 | INFO | train_inner | epoch 006: 1360 / 3002 loss=2.474, ppl=5.55, wps=5924.9, ups=0.09, wpb=64759, bsz=128, num_updates=16274, lr=9.98778e-05, gnorm=2.488, loss_scale=8, train_wall=10, gb_free=2.8, wall=187563 2021-06-20 22:45:00 | INFO | train_inner | epoch 006: 1361 / 3002 loss=2.53, ppl=5.78, wps=5829.1, ups=0.09, wpb=64776, bsz=128, num_updates=16275, lr=9.98778e-05, gnorm=2.018, loss_scale=8, train_wall=11, gb_free=2.8, wall=187574 2021-06-20 22:45:11 | INFO | train_inner | epoch 006: 1362 / 3002 loss=2.468, ppl=5.53, wps=5797.9, ups=0.09, wpb=64837, bsz=128, num_updates=16276, lr=9.98778e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=187585 2021-06-20 22:45:22 | INFO | train_inner | epoch 006: 1363 / 3002 loss=2.277, ppl=4.85, wps=5830.1, ups=0.09, wpb=64857, bsz=128, num_updates=16277, lr=9.98778e-05, gnorm=1.861, loss_scale=8, train_wall=11, gb_free=2.8, wall=187596 2021-06-20 22:45:33 | INFO | train_inner | epoch 006: 1364 / 3002 loss=2.401, ppl=5.28, wps=5900.6, ups=0.09, wpb=64857, bsz=128, num_updates=16278, lr=9.98778e-05, gnorm=1.914, loss_scale=8, train_wall=10, gb_free=2.8, wall=187607 2021-06-20 22:45:44 | INFO | train_inner | epoch 006: 1365 / 3002 loss=2.514, ppl=5.71, wps=5823.2, ups=0.09, wpb=64897, bsz=128, num_updates=16279, lr=9.98778e-05, gnorm=1.994, loss_scale=8, train_wall=11, gb_free=2.8, wall=187618 2021-06-20 22:45:55 | INFO | train_inner | epoch 006: 1366 / 3002 loss=2.317, ppl=4.98, wps=5954.2, ups=0.09, wpb=64871, bsz=128, num_updates=16280, lr=9.98778e-05, gnorm=1.938, loss_scale=8, train_wall=10, gb_free=2.8, wall=187629 2021-06-20 22:46:06 | INFO | train_inner | epoch 006: 1367 / 3002 loss=2.523, ppl=5.75, wps=5865, ups=0.09, wpb=64801, bsz=128, num_updates=16281, lr=9.98777e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=187640 2021-06-20 22:46:17 | INFO | train_inner | epoch 006: 1368 / 3002 loss=2.427, ppl=5.38, wps=5895.7, ups=0.09, wpb=64824, bsz=128, num_updates=16282, lr=9.98777e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=187651 2021-06-20 22:46:28 | INFO | train_inner | epoch 006: 1369 / 3002 loss=2.343, ppl=5.07, wps=5946.4, ups=0.09, wpb=64810, bsz=128, num_updates=16283, lr=9.98777e-05, gnorm=1.918, loss_scale=8, train_wall=10, gb_free=2.8, wall=187662 2021-06-20 22:46:39 | INFO | train_inner | epoch 006: 1370 / 3002 loss=2.434, ppl=5.4, wps=5985.8, ups=0.09, wpb=64806, bsz=128, num_updates=16284, lr=9.98777e-05, gnorm=1.927, loss_scale=8, train_wall=10, gb_free=2.8, wall=187673 2021-06-20 22:46:50 | INFO | train_inner | epoch 006: 1371 / 3002 loss=2.436, ppl=5.41, wps=5898.1, ups=0.09, wpb=64823, bsz=128, num_updates=16285, lr=9.98777e-05, gnorm=2.026, loss_scale=8, train_wall=11, gb_free=2.8, wall=187684 2021-06-20 22:47:01 | INFO | train_inner | epoch 006: 1372 / 3002 loss=2.386, ppl=5.23, wps=5845.8, ups=0.09, wpb=64957, bsz=128, num_updates=16286, lr=9.98777e-05, gnorm=2.092, loss_scale=8, train_wall=11, gb_free=2.8, wall=187695 2021-06-20 22:47:12 | INFO | train_inner | epoch 006: 1373 / 3002 loss=2.389, ppl=5.24, wps=5987.8, ups=0.09, wpb=64830, bsz=128, num_updates=16287, lr=9.98777e-05, gnorm=2.016, loss_scale=8, train_wall=10, gb_free=2.8, wall=187706 2021-06-20 22:47:23 | INFO | train_inner | epoch 006: 1374 / 3002 loss=2.628, ppl=6.18, wps=5875, ups=0.09, wpb=64823, bsz=128, num_updates=16288, lr=9.98777e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=187717 2021-06-20 22:47:34 | INFO | train_inner | epoch 006: 1375 / 3002 loss=2.412, ppl=5.32, wps=5852.5, ups=0.09, wpb=64919, bsz=128, num_updates=16289, lr=9.98777e-05, gnorm=3.893, loss_scale=8, train_wall=11, gb_free=2.8, wall=187728 2021-06-20 22:47:45 | INFO | train_inner | epoch 006: 1376 / 3002 loss=2.598, ppl=6.06, wps=5869.6, ups=0.09, wpb=64782, bsz=128, num_updates=16290, lr=9.98777e-05, gnorm=2.188, loss_scale=8, train_wall=11, gb_free=2.8, wall=187739 2021-06-20 22:47:56 | INFO | train_inner | epoch 006: 1377 / 3002 loss=2.495, ppl=5.64, wps=5940.8, ups=0.09, wpb=64817, bsz=128, num_updates=16291, lr=9.98777e-05, gnorm=1.976, loss_scale=8, train_wall=10, gb_free=2.8, wall=187750 2021-06-20 22:48:07 | INFO | train_inner | epoch 006: 1378 / 3002 loss=2.323, ppl=5.01, wps=5813.2, ups=0.09, wpb=64822, bsz=128, num_updates=16292, lr=9.98777e-05, gnorm=1.867, loss_scale=8, train_wall=11, gb_free=2.8, wall=187761 2021-06-20 22:48:18 | INFO | train_inner | epoch 006: 1379 / 3002 loss=2.406, ppl=5.3, wps=5855.8, ups=0.09, wpb=64832, bsz=128, num_updates=16293, lr=9.98776e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=187772 2021-06-20 22:48:29 | INFO | train_inner | epoch 006: 1380 / 3002 loss=2.446, ppl=5.45, wps=5758.9, ups=0.09, wpb=64834, bsz=128, num_updates=16294, lr=9.98776e-05, gnorm=2.144, loss_scale=8, train_wall=11, gb_free=2.8, wall=187784 2021-06-20 22:48:40 | INFO | train_inner | epoch 006: 1381 / 3002 loss=2.402, ppl=5.28, wps=5972.2, ups=0.09, wpb=64886, bsz=128, num_updates=16295, lr=9.98776e-05, gnorm=1.905, loss_scale=8, train_wall=10, gb_free=2.8, wall=187794 2021-06-20 22:48:51 | INFO | train_inner | epoch 006: 1382 / 3002 loss=2.428, ppl=5.38, wps=5828.9, ups=0.09, wpb=64828, bsz=128, num_updates=16296, lr=9.98776e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=187806 2021-06-20 22:49:03 | INFO | train_inner | epoch 006: 1383 / 3002 loss=2.465, ppl=5.52, wps=5752, ups=0.09, wpb=64839, bsz=128, num_updates=16297, lr=9.98776e-05, gnorm=1.924, loss_scale=8, train_wall=11, gb_free=2.8, wall=187817 2021-06-20 22:49:14 | INFO | train_inner | epoch 006: 1384 / 3002 loss=2.535, ppl=5.79, wps=5867.9, ups=0.09, wpb=64775, bsz=128, num_updates=16298, lr=9.98776e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=187828 2021-06-20 22:49:25 | INFO | train_inner | epoch 006: 1385 / 3002 loss=2.722, ppl=6.6, wps=5826.2, ups=0.09, wpb=64856, bsz=128, num_updates=16299, lr=9.98776e-05, gnorm=2.061, loss_scale=8, train_wall=11, gb_free=2.8, wall=187839 2021-06-20 22:49:36 | INFO | train_inner | epoch 006: 1386 / 3002 loss=2.432, ppl=5.4, wps=5828.2, ups=0.09, wpb=64774, bsz=128, num_updates=16300, lr=9.98776e-05, gnorm=2.174, loss_scale=8, train_wall=11, gb_free=2.8, wall=187850 2021-06-20 22:49:47 | INFO | train_inner | epoch 006: 1387 / 3002 loss=2.442, ppl=5.43, wps=5819.1, ups=0.09, wpb=64804, bsz=128, num_updates=16301, lr=9.98776e-05, gnorm=2.234, loss_scale=8, train_wall=11, gb_free=2.8, wall=187861 2021-06-20 22:49:58 | INFO | train_inner | epoch 006: 1388 / 3002 loss=2.433, ppl=5.4, wps=5744.6, ups=0.09, wpb=64821, bsz=128, num_updates=16302, lr=9.98776e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=187873 2021-06-20 22:50:09 | INFO | train_inner | epoch 006: 1389 / 3002 loss=2.382, ppl=5.21, wps=5753.7, ups=0.09, wpb=64841, bsz=128, num_updates=16303, lr=9.98776e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=187884 2021-06-20 22:50:21 | INFO | train_inner | epoch 006: 1390 / 3002 loss=2.43, ppl=5.39, wps=5750.3, ups=0.09, wpb=64767, bsz=128, num_updates=16304, lr=9.98776e-05, gnorm=2.065, loss_scale=8, train_wall=11, gb_free=2.8, wall=187895 2021-06-20 22:50:31 | INFO | train_inner | epoch 006: 1391 / 3002 loss=2.335, ppl=5.05, wps=6060.2, ups=0.09, wpb=64805, bsz=128, num_updates=16305, lr=9.98776e-05, gnorm=1.936, loss_scale=8, train_wall=10, gb_free=2.8, wall=187906 2021-06-20 22:50:43 | INFO | train_inner | epoch 006: 1392 / 3002 loss=2.489, ppl=5.62, wps=5783, ups=0.09, wpb=64794, bsz=128, num_updates=16306, lr=9.98775e-05, gnorm=4.609, loss_scale=8, train_wall=11, gb_free=2.8, wall=187917 2021-06-20 22:50:54 | INFO | train_inner | epoch 006: 1393 / 3002 loss=2.587, ppl=6.01, wps=5845, ups=0.09, wpb=64737, bsz=128, num_updates=16307, lr=9.98775e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=187928 2021-06-20 22:51:05 | INFO | train_inner | epoch 006: 1394 / 3002 loss=2.432, ppl=5.4, wps=5871.1, ups=0.09, wpb=64826, bsz=128, num_updates=16308, lr=9.98775e-05, gnorm=1.869, loss_scale=8, train_wall=11, gb_free=2.8, wall=187939 2021-06-20 22:51:16 | INFO | train_inner | epoch 006: 1395 / 3002 loss=2.398, ppl=5.27, wps=5887, ups=0.09, wpb=64732, bsz=128, num_updates=16309, lr=9.98775e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=187950 2021-06-20 22:51:27 | INFO | train_inner | epoch 006: 1396 / 3002 loss=2.47, ppl=5.54, wps=5830.1, ups=0.09, wpb=64758, bsz=128, num_updates=16310, lr=9.98775e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=187961 2021-06-20 22:51:38 | INFO | train_inner | epoch 006: 1397 / 3002 loss=2.533, ppl=5.79, wps=5764.1, ups=0.09, wpb=64745, bsz=128, num_updates=16311, lr=9.98775e-05, gnorm=1.879, loss_scale=8, train_wall=11, gb_free=2.8, wall=187972 2021-06-20 22:51:49 | INFO | train_inner | epoch 006: 1398 / 3002 loss=2.406, ppl=5.3, wps=5843.3, ups=0.09, wpb=64803, bsz=128, num_updates=16312, lr=9.98775e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=187984 2021-06-20 22:52:00 | INFO | train_inner | epoch 006: 1399 / 3002 loss=2.416, ppl=5.34, wps=5810.8, ups=0.09, wpb=64829, bsz=128, num_updates=16313, lr=9.98775e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=187995 2021-06-20 22:52:11 | INFO | train_inner | epoch 006: 1400 / 3002 loss=2.444, ppl=5.44, wps=5858.8, ups=0.09, wpb=64840, bsz=128, num_updates=16314, lr=9.98775e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=188006 2021-06-20 22:52:22 | INFO | train_inner | epoch 006: 1401 / 3002 loss=2.49, ppl=5.62, wps=5868.1, ups=0.09, wpb=64915, bsz=128, num_updates=16315, lr=9.98775e-05, gnorm=2.09, loss_scale=8, train_wall=11, gb_free=2.8, wall=188017 2021-06-20 22:52:33 | INFO | train_inner | epoch 006: 1402 / 3002 loss=2.582, ppl=5.99, wps=5941.4, ups=0.09, wpb=64854, bsz=128, num_updates=16316, lr=9.98775e-05, gnorm=2.033, loss_scale=8, train_wall=10, gb_free=2.8, wall=188028 2021-06-20 22:52:44 | INFO | train_inner | epoch 006: 1403 / 3002 loss=2.53, ppl=5.78, wps=5862.3, ups=0.09, wpb=64821, bsz=128, num_updates=16317, lr=9.98775e-05, gnorm=2.004, loss_scale=8, train_wall=11, gb_free=2.8, wall=188039 2021-06-20 22:52:56 | INFO | train_inner | epoch 006: 1404 / 3002 loss=2.41, ppl=5.32, wps=5854.2, ups=0.09, wpb=64841, bsz=128, num_updates=16318, lr=9.98774e-05, gnorm=2.079, loss_scale=8, train_wall=11, gb_free=2.8, wall=188050 2021-06-20 22:53:07 | INFO | train_inner | epoch 006: 1405 / 3002 loss=2.485, ppl=5.6, wps=5879.5, ups=0.09, wpb=64843, bsz=128, num_updates=16319, lr=9.98774e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=188061 2021-06-20 22:53:18 | INFO | train_inner | epoch 006: 1406 / 3002 loss=2.529, ppl=5.77, wps=5823.2, ups=0.09, wpb=64849, bsz=128, num_updates=16320, lr=9.98774e-05, gnorm=2.108, loss_scale=8, train_wall=11, gb_free=2.8, wall=188072 2021-06-20 22:53:29 | INFO | train_inner | epoch 006: 1407 / 3002 loss=2.571, ppl=5.94, wps=5897.1, ups=0.09, wpb=64811, bsz=128, num_updates=16321, lr=9.98774e-05, gnorm=2.01, loss_scale=8, train_wall=11, gb_free=2.8, wall=188083 2021-06-20 22:53:40 | INFO | train_inner | epoch 006: 1408 / 3002 loss=2.414, ppl=5.33, wps=5891.8, ups=0.09, wpb=64846, bsz=128, num_updates=16322, lr=9.98774e-05, gnorm=2.104, loss_scale=8, train_wall=11, gb_free=2.8, wall=188094 2021-06-20 22:53:51 | INFO | train_inner | epoch 006: 1409 / 3002 loss=2.553, ppl=5.87, wps=5907.6, ups=0.09, wpb=64743, bsz=128, num_updates=16323, lr=9.98774e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=188105 2021-06-20 22:54:02 | INFO | train_inner | epoch 006: 1410 / 3002 loss=2.367, ppl=5.16, wps=5870.4, ups=0.09, wpb=64827, bsz=128, num_updates=16324, lr=9.98774e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=188116 2021-06-20 22:54:13 | INFO | train_inner | epoch 006: 1411 / 3002 loss=2.654, ppl=6.29, wps=5891.2, ups=0.09, wpb=64683, bsz=128, num_updates=16325, lr=9.98774e-05, gnorm=2.244, loss_scale=8, train_wall=11, gb_free=2.8, wall=188127 2021-06-20 22:54:24 | INFO | train_inner | epoch 006: 1412 / 3002 loss=2.483, ppl=5.59, wps=5911.7, ups=0.09, wpb=64887, bsz=128, num_updates=16326, lr=9.98774e-05, gnorm=2.001, loss_scale=8, train_wall=10, gb_free=2.8, wall=188138 2021-06-20 22:54:35 | INFO | train_inner | epoch 006: 1413 / 3002 loss=2.351, ppl=5.1, wps=5863, ups=0.09, wpb=64852, bsz=128, num_updates=16327, lr=9.98774e-05, gnorm=5.618, loss_scale=8, train_wall=11, gb_free=2.8, wall=188149 2021-06-20 22:54:46 | INFO | train_inner | epoch 006: 1414 / 3002 loss=2.534, ppl=5.79, wps=5709.3, ups=0.09, wpb=64816, bsz=128, num_updates=16328, lr=9.98774e-05, gnorm=1.968, loss_scale=8, train_wall=11, gb_free=2.8, wall=188160 2021-06-20 22:54:57 | INFO | train_inner | epoch 006: 1415 / 3002 loss=2.476, ppl=5.56, wps=5772, ups=0.09, wpb=64796, bsz=128, num_updates=16329, lr=9.98774e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=188172 2021-06-20 22:55:08 | INFO | train_inner | epoch 006: 1416 / 3002 loss=2.355, ppl=5.12, wps=5887.8, ups=0.09, wpb=64827, bsz=128, num_updates=16330, lr=9.98774e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=188183 2021-06-20 22:55:20 | INFO | train_inner | epoch 006: 1417 / 3002 loss=2.458, ppl=5.5, wps=5769.2, ups=0.09, wpb=64796, bsz=128, num_updates=16331, lr=9.98773e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=188194 2021-06-20 22:55:31 | INFO | train_inner | epoch 006: 1418 / 3002 loss=2.377, ppl=5.19, wps=5869.9, ups=0.09, wpb=64751, bsz=128, num_updates=16332, lr=9.98773e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=188205 2021-06-20 22:55:42 | INFO | train_inner | epoch 006: 1419 / 3002 loss=2.362, ppl=5.14, wps=5823.9, ups=0.09, wpb=64834, bsz=128, num_updates=16333, lr=9.98773e-05, gnorm=1.889, loss_scale=8, train_wall=11, gb_free=2.8, wall=188216 2021-06-20 22:55:53 | INFO | train_inner | epoch 006: 1420 / 3002 loss=2.423, ppl=5.36, wps=5813.8, ups=0.09, wpb=64911, bsz=128, num_updates=16334, lr=9.98773e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=188227 2021-06-20 22:56:04 | INFO | train_inner | epoch 006: 1421 / 3002 loss=2.522, ppl=5.74, wps=5860.4, ups=0.09, wpb=64791, bsz=128, num_updates=16335, lr=9.98773e-05, gnorm=2.413, loss_scale=8, train_wall=11, gb_free=2.8, wall=188238 2021-06-20 22:56:15 | INFO | train_inner | epoch 006: 1422 / 3002 loss=2.424, ppl=5.37, wps=5897, ups=0.09, wpb=64836, bsz=128, num_updates=16336, lr=9.98773e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=188249 2021-06-20 22:56:26 | INFO | train_inner | epoch 006: 1423 / 3002 loss=2.599, ppl=6.06, wps=5816.6, ups=0.09, wpb=64745, bsz=128, num_updates=16337, lr=9.98773e-05, gnorm=2.228, loss_scale=8, train_wall=11, gb_free=2.8, wall=188260 2021-06-20 22:56:37 | INFO | train_inner | epoch 006: 1424 / 3002 loss=2.591, ppl=6.03, wps=5898.7, ups=0.09, wpb=64843, bsz=128, num_updates=16338, lr=9.98773e-05, gnorm=2.041, loss_scale=8, train_wall=11, gb_free=2.8, wall=188271 2021-06-20 22:56:48 | INFO | train_inner | epoch 006: 1425 / 3002 loss=2.443, ppl=5.44, wps=5986.4, ups=0.09, wpb=64904, bsz=128, num_updates=16339, lr=9.98773e-05, gnorm=1.918, loss_scale=8, train_wall=10, gb_free=2.8, wall=188282 2021-06-20 22:56:59 | INFO | train_inner | epoch 006: 1426 / 3002 loss=2.488, ppl=5.61, wps=5854, ups=0.09, wpb=64744, bsz=128, num_updates=16340, lr=9.98773e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=188293 2021-06-20 22:57:10 | INFO | train_inner | epoch 006: 1427 / 3002 loss=2.648, ppl=6.27, wps=5754.4, ups=0.09, wpb=64883, bsz=128, num_updates=16341, lr=9.98773e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=188305 2021-06-20 22:57:21 | INFO | train_inner | epoch 006: 1428 / 3002 loss=2.525, ppl=5.76, wps=5920.8, ups=0.09, wpb=64888, bsz=128, num_updates=16342, lr=9.98773e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=188316 2021-06-20 22:57:32 | INFO | train_inner | epoch 006: 1429 / 3002 loss=2.455, ppl=5.48, wps=5845.6, ups=0.09, wpb=64797, bsz=128, num_updates=16343, lr=9.98772e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=188327 2021-06-20 22:57:43 | INFO | train_inner | epoch 006: 1430 / 3002 loss=2.433, ppl=5.4, wps=5870.5, ups=0.09, wpb=64771, bsz=128, num_updates=16344, lr=9.98772e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=188338 2021-06-20 22:57:55 | INFO | train_inner | epoch 006: 1431 / 3002 loss=2.554, ppl=5.87, wps=5753.7, ups=0.09, wpb=64815, bsz=128, num_updates=16345, lr=9.98772e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=188349 2021-06-20 22:58:06 | INFO | train_inner | epoch 006: 1432 / 3002 loss=2.46, ppl=5.5, wps=5875.3, ups=0.09, wpb=64902, bsz=128, num_updates=16346, lr=9.98772e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=188360 2021-06-20 22:58:17 | INFO | train_inner | epoch 006: 1433 / 3002 loss=2.391, ppl=5.25, wps=5873.9, ups=0.09, wpb=64789, bsz=128, num_updates=16347, lr=9.98772e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=188371 2021-06-20 22:58:28 | INFO | train_inner | epoch 006: 1434 / 3002 loss=2.52, ppl=5.73, wps=5837.1, ups=0.09, wpb=64792, bsz=128, num_updates=16348, lr=9.98772e-05, gnorm=2.234, loss_scale=8, train_wall=11, gb_free=2.8, wall=188382 2021-06-20 22:58:39 | INFO | train_inner | epoch 006: 1435 / 3002 loss=2.469, ppl=5.54, wps=5815.7, ups=0.09, wpb=64883, bsz=128, num_updates=16349, lr=9.98772e-05, gnorm=2.125, loss_scale=8, train_wall=11, gb_free=2.8, wall=188393 2021-06-20 22:58:50 | INFO | train_inner | epoch 006: 1436 / 3002 loss=2.471, ppl=5.54, wps=5919.8, ups=0.09, wpb=64801, bsz=128, num_updates=16350, lr=9.98772e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=188404 2021-06-20 22:59:01 | INFO | train_inner | epoch 006: 1437 / 3002 loss=2.601, ppl=6.07, wps=5892.8, ups=0.09, wpb=64889, bsz=128, num_updates=16351, lr=9.98772e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=188415 2021-06-20 22:59:12 | INFO | train_inner | epoch 006: 1438 / 3002 loss=2.434, ppl=5.4, wps=5953.7, ups=0.09, wpb=64844, bsz=128, num_updates=16352, lr=9.98772e-05, gnorm=1.895, loss_scale=8, train_wall=10, gb_free=2.8, wall=188426 2021-06-20 22:59:23 | INFO | train_inner | epoch 006: 1439 / 3002 loss=2.393, ppl=5.25, wps=5772.5, ups=0.09, wpb=64866, bsz=128, num_updates=16353, lr=9.98772e-05, gnorm=1.896, loss_scale=8, train_wall=11, gb_free=2.8, wall=188437 2021-06-20 22:59:34 | INFO | train_inner | epoch 006: 1440 / 3002 loss=2.451, ppl=5.47, wps=5795.7, ups=0.09, wpb=64746, bsz=128, num_updates=16354, lr=9.98772e-05, gnorm=1.883, loss_scale=8, train_wall=11, gb_free=2.8, wall=188448 2021-06-20 22:59:45 | INFO | train_inner | epoch 006: 1441 / 3002 loss=2.551, ppl=5.86, wps=5817.8, ups=0.09, wpb=64717, bsz=128, num_updates=16355, lr=9.98772e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=188460 2021-06-20 22:59:57 | INFO | train_inner | epoch 006: 1442 / 3002 loss=2.58, ppl=5.98, wps=5763.9, ups=0.09, wpb=64879, bsz=128, num_updates=16356, lr=9.98771e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=188471 2021-06-20 23:00:08 | INFO | train_inner | epoch 006: 1443 / 3002 loss=2.435, ppl=5.41, wps=5702.5, ups=0.09, wpb=64860, bsz=128, num_updates=16357, lr=9.98771e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=188482 2021-06-20 23:00:19 | INFO | train_inner | epoch 006: 1444 / 3002 loss=2.417, ppl=5.34, wps=5777.6, ups=0.09, wpb=64829, bsz=128, num_updates=16358, lr=9.98771e-05, gnorm=1.83, loss_scale=8, train_wall=11, gb_free=2.8, wall=188493 2021-06-20 23:00:30 | INFO | train_inner | epoch 006: 1445 / 3002 loss=2.501, ppl=5.66, wps=5900.6, ups=0.09, wpb=64820, bsz=128, num_updates=16359, lr=9.98771e-05, gnorm=1.875, loss_scale=8, train_wall=11, gb_free=2.8, wall=188504 2021-06-20 23:00:41 | INFO | train_inner | epoch 006: 1446 / 3002 loss=2.623, ppl=6.16, wps=5758.1, ups=0.09, wpb=64746, bsz=128, num_updates=16360, lr=9.98771e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=188516 2021-06-20 23:00:53 | INFO | train_inner | epoch 006: 1447 / 3002 loss=2.431, ppl=5.39, wps=5801.8, ups=0.09, wpb=64824, bsz=128, num_updates=16361, lr=9.98771e-05, gnorm=1.95, loss_scale=8, train_wall=11, gb_free=2.8, wall=188527 2021-06-20 23:01:04 | INFO | train_inner | epoch 006: 1448 / 3002 loss=2.494, ppl=5.63, wps=5835.7, ups=0.09, wpb=64802, bsz=128, num_updates=16362, lr=9.98771e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=188538 2021-06-20 23:01:15 | INFO | train_inner | epoch 006: 1449 / 3002 loss=2.598, ppl=6.06, wps=5829.7, ups=0.09, wpb=64753, bsz=128, num_updates=16363, lr=9.98771e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=188549 2021-06-20 23:01:26 | INFO | train_inner | epoch 006: 1450 / 3002 loss=2.402, ppl=5.28, wps=5802.2, ups=0.09, wpb=64873, bsz=128, num_updates=16364, lr=9.98771e-05, gnorm=2.073, loss_scale=8, train_wall=11, gb_free=2.8, wall=188560 2021-06-20 23:01:37 | INFO | train_inner | epoch 006: 1451 / 3002 loss=2.592, ppl=6.03, wps=5848.1, ups=0.09, wpb=64906, bsz=128, num_updates=16365, lr=9.98771e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=188571 2021-06-20 23:01:48 | INFO | train_inner | epoch 006: 1452 / 3002 loss=2.56, ppl=5.9, wps=5937.9, ups=0.09, wpb=64854, bsz=128, num_updates=16366, lr=9.98771e-05, gnorm=1.846, loss_scale=8, train_wall=10, gb_free=2.8, wall=188582 2021-06-20 23:01:59 | INFO | train_inner | epoch 006: 1453 / 3002 loss=2.446, ppl=5.45, wps=5857.6, ups=0.09, wpb=64858, bsz=128, num_updates=16367, lr=9.98771e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=188593 2021-06-20 23:02:10 | INFO | train_inner | epoch 006: 1454 / 3002 loss=2.362, ppl=5.14, wps=5808.1, ups=0.09, wpb=64819, bsz=128, num_updates=16368, lr=9.9877e-05, gnorm=1.859, loss_scale=8, train_wall=11, gb_free=2.8, wall=188605 2021-06-20 23:02:21 | INFO | train_inner | epoch 006: 1455 / 3002 loss=2.528, ppl=5.77, wps=5874, ups=0.09, wpb=64781, bsz=128, num_updates=16369, lr=9.9877e-05, gnorm=1.84, loss_scale=8, train_wall=11, gb_free=2.8, wall=188616 2021-06-20 23:02:32 | INFO | train_inner | epoch 006: 1456 / 3002 loss=2.584, ppl=6, wps=5905.7, ups=0.09, wpb=64862, bsz=128, num_updates=16370, lr=9.9877e-05, gnorm=3.726, loss_scale=8, train_wall=11, gb_free=2.8, wall=188627 2021-06-20 23:02:43 | INFO | train_inner | epoch 006: 1457 / 3002 loss=2.482, ppl=5.58, wps=6002.6, ups=0.09, wpb=64922, bsz=128, num_updates=16371, lr=9.9877e-05, gnorm=1.932, loss_scale=8, train_wall=10, gb_free=2.8, wall=188637 2021-06-20 23:02:54 | INFO | train_inner | epoch 006: 1458 / 3002 loss=2.547, ppl=5.84, wps=5818.2, ups=0.09, wpb=64806, bsz=128, num_updates=16372, lr=9.9877e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=188648 2021-06-20 23:03:05 | INFO | train_inner | epoch 006: 1459 / 3002 loss=2.511, ppl=5.7, wps=5880.3, ups=0.09, wpb=64782, bsz=128, num_updates=16373, lr=9.9877e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=188660 2021-06-20 23:03:16 | INFO | train_inner | epoch 006: 1460 / 3002 loss=2.424, ppl=5.37, wps=5753.4, ups=0.09, wpb=64811, bsz=128, num_updates=16374, lr=9.9877e-05, gnorm=1.868, loss_scale=8, train_wall=11, gb_free=2.8, wall=188671 2021-06-20 23:03:28 | INFO | train_inner | epoch 006: 1461 / 3002 loss=2.516, ppl=5.72, wps=5777.3, ups=0.09, wpb=64787, bsz=128, num_updates=16375, lr=9.9877e-05, gnorm=2.168, loss_scale=8, train_wall=11, gb_free=2.8, wall=188682 2021-06-20 23:03:39 | INFO | train_inner | epoch 006: 1462 / 3002 loss=2.496, ppl=5.64, wps=5873, ups=0.09, wpb=64821, bsz=128, num_updates=16376, lr=9.9877e-05, gnorm=2.501, loss_scale=8, train_wall=11, gb_free=2.8, wall=188693 2021-06-20 23:03:50 | INFO | train_inner | epoch 006: 1463 / 3002 loss=2.492, ppl=5.63, wps=5779.6, ups=0.09, wpb=64870, bsz=128, num_updates=16377, lr=9.9877e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=188704 2021-06-20 23:04:01 | INFO | train_inner | epoch 006: 1464 / 3002 loss=2.636, ppl=6.21, wps=5836.1, ups=0.09, wpb=64777, bsz=128, num_updates=16378, lr=9.9877e-05, gnorm=3.611, loss_scale=8, train_wall=11, gb_free=2.8, wall=188715 2021-06-20 23:04:12 | INFO | train_inner | epoch 006: 1465 / 3002 loss=2.355, ppl=5.12, wps=5860.6, ups=0.09, wpb=64888, bsz=128, num_updates=16379, lr=9.9877e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=188726 2021-06-20 23:04:23 | INFO | train_inner | epoch 006: 1466 / 3002 loss=2.397, ppl=5.27, wps=5869.6, ups=0.09, wpb=64832, bsz=128, num_updates=16380, lr=9.9877e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=188737 2021-06-20 23:04:34 | INFO | train_inner | epoch 006: 1467 / 3002 loss=2.537, ppl=5.8, wps=5966.6, ups=0.09, wpb=64840, bsz=128, num_updates=16381, lr=9.98769e-05, gnorm=1.902, loss_scale=8, train_wall=10, gb_free=2.8, wall=188748 2021-06-20 23:04:45 | INFO | train_inner | epoch 006: 1468 / 3002 loss=2.36, ppl=5.13, wps=5838, ups=0.09, wpb=64826, bsz=128, num_updates=16382, lr=9.98769e-05, gnorm=1.907, loss_scale=8, train_wall=11, gb_free=2.8, wall=188759 2021-06-20 23:04:56 | INFO | train_inner | epoch 006: 1469 / 3002 loss=2.339, ppl=5.06, wps=5773.7, ups=0.09, wpb=64824, bsz=128, num_updates=16383, lr=9.98769e-05, gnorm=2.034, loss_scale=16, train_wall=11, gb_free=2.8, wall=188771 2021-06-20 23:05:07 | INFO | train_inner | epoch 006: 1470 / 3002 loss=2.491, ppl=5.62, wps=5867.6, ups=0.09, wpb=64821, bsz=128, num_updates=16384, lr=9.98769e-05, gnorm=1.904, loss_scale=16, train_wall=11, gb_free=2.8, wall=188782 2021-06-20 23:05:18 | INFO | train_inner | epoch 006: 1471 / 3002 loss=2.499, ppl=5.65, wps=5882.9, ups=0.09, wpb=64850, bsz=128, num_updates=16385, lr=9.98769e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=188793 2021-06-20 23:05:30 | INFO | train_inner | epoch 006: 1472 / 3002 loss=2.432, ppl=5.4, wps=5805.3, ups=0.09, wpb=64787, bsz=128, num_updates=16386, lr=9.98769e-05, gnorm=1.954, loss_scale=16, train_wall=11, gb_free=2.8, wall=188804 2021-06-20 23:05:41 | INFO | train_inner | epoch 006: 1473 / 3002 loss=2.442, ppl=5.44, wps=5716.9, ups=0.09, wpb=64844, bsz=128, num_updates=16387, lr=9.98769e-05, gnorm=1.889, loss_scale=16, train_wall=11, gb_free=2.8, wall=188815 2021-06-20 23:05:52 | INFO | train_inner | epoch 006: 1474 / 3002 loss=2.422, ppl=5.36, wps=5989.2, ups=0.09, wpb=64865, bsz=128, num_updates=16388, lr=9.98769e-05, gnorm=1.892, loss_scale=16, train_wall=10, gb_free=2.8, wall=188826 2021-06-20 23:06:03 | INFO | train_inner | epoch 006: 1475 / 3002 loss=2.475, ppl=5.56, wps=5866.9, ups=0.09, wpb=64808, bsz=128, num_updates=16389, lr=9.98769e-05, gnorm=3.562, loss_scale=16, train_wall=11, gb_free=2.8, wall=188837 2021-06-20 23:06:14 | INFO | train_inner | epoch 006: 1476 / 3002 loss=2.423, ppl=5.36, wps=5836.2, ups=0.09, wpb=64776, bsz=128, num_updates=16390, lr=9.98769e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=188848 2021-06-20 23:06:25 | INFO | train_inner | epoch 006: 1477 / 3002 loss=2.492, ppl=5.62, wps=5808.3, ups=0.09, wpb=64829, bsz=128, num_updates=16391, lr=9.98769e-05, gnorm=1.908, loss_scale=16, train_wall=11, gb_free=2.8, wall=188859 2021-06-20 23:06:36 | INFO | train_inner | epoch 006: 1478 / 3002 loss=2.517, ppl=5.72, wps=5877.9, ups=0.09, wpb=64929, bsz=128, num_updates=16392, lr=9.98769e-05, gnorm=1.893, loss_scale=16, train_wall=11, gb_free=2.8, wall=188870 2021-06-20 23:06:47 | INFO | train_inner | epoch 006: 1479 / 3002 loss=2.448, ppl=5.46, wps=6027.4, ups=0.09, wpb=64872, bsz=128, num_updates=16393, lr=9.98768e-05, gnorm=1.819, loss_scale=16, train_wall=10, gb_free=2.8, wall=188881 2021-06-20 23:06:58 | INFO | train_inner | epoch 006: 1480 / 3002 loss=2.34, ppl=5.06, wps=5849, ups=0.09, wpb=64799, bsz=128, num_updates=16394, lr=9.98768e-05, gnorm=1.848, loss_scale=16, train_wall=11, gb_free=2.8, wall=188892 2021-06-20 23:07:09 | INFO | train_inner | epoch 006: 1481 / 3002 loss=2.568, ppl=5.93, wps=5980.8, ups=0.09, wpb=64866, bsz=128, num_updates=16395, lr=9.98768e-05, gnorm=2.062, loss_scale=16, train_wall=10, gb_free=2.8, wall=188903 2021-06-20 23:07:20 | INFO | train_inner | epoch 006: 1482 / 3002 loss=2.43, ppl=5.39, wps=5941.1, ups=0.09, wpb=64862, bsz=128, num_updates=16396, lr=9.98768e-05, gnorm=3.321, loss_scale=16, train_wall=10, gb_free=2.8, wall=188914 2021-06-20 23:07:31 | INFO | train_inner | epoch 006: 1483 / 3002 loss=2.496, ppl=5.64, wps=5940, ups=0.09, wpb=64776, bsz=128, num_updates=16397, lr=9.98768e-05, gnorm=1.974, loss_scale=16, train_wall=10, gb_free=2.8, wall=188925 2021-06-20 23:07:42 | INFO | train_inner | epoch 006: 1484 / 3002 loss=2.416, ppl=5.34, wps=5872.9, ups=0.09, wpb=64825, bsz=128, num_updates=16398, lr=9.98768e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=188936 2021-06-20 23:07:53 | INFO | train_inner | epoch 006: 1485 / 3002 loss=2.407, ppl=5.31, wps=5776.6, ups=0.09, wpb=64911, bsz=128, num_updates=16399, lr=9.98768e-05, gnorm=1.918, loss_scale=16, train_wall=11, gb_free=2.8, wall=188947 2021-06-20 23:08:04 | INFO | train_inner | epoch 006: 1486 / 3002 loss=2.612, ppl=6.12, wps=5851, ups=0.09, wpb=64789, bsz=128, num_updates=16400, lr=9.98768e-05, gnorm=2.052, loss_scale=16, train_wall=11, gb_free=2.8, wall=188958 2021-06-20 23:08:15 | INFO | train_inner | epoch 006: 1487 / 3002 loss=2.451, ppl=5.47, wps=5888, ups=0.09, wpb=64899, bsz=128, num_updates=16401, lr=9.98768e-05, gnorm=1.902, loss_scale=16, train_wall=11, gb_free=2.8, wall=188969 2021-06-20 23:08:26 | INFO | train_inner | epoch 006: 1488 / 3002 loss=2.47, ppl=5.54, wps=5756.6, ups=0.09, wpb=64780, bsz=128, num_updates=16402, lr=9.98768e-05, gnorm=1.873, loss_scale=16, train_wall=11, gb_free=2.8, wall=188981 2021-06-20 23:08:37 | INFO | train_inner | epoch 006: 1489 / 3002 loss=2.509, ppl=5.69, wps=5894.5, ups=0.09, wpb=64795, bsz=128, num_updates=16403, lr=9.98768e-05, gnorm=1.904, loss_scale=16, train_wall=11, gb_free=2.8, wall=188992 2021-06-20 23:08:48 | INFO | train_inner | epoch 006: 1490 / 3002 loss=2.459, ppl=5.5, wps=5770.2, ups=0.09, wpb=64822, bsz=128, num_updates=16404, lr=9.98768e-05, gnorm=1.925, loss_scale=16, train_wall=11, gb_free=2.8, wall=189003 2021-06-20 23:09:00 | INFO | train_inner | epoch 006: 1491 / 3002 loss=2.599, ppl=6.06, wps=5822.3, ups=0.09, wpb=64843, bsz=128, num_updates=16405, lr=9.98768e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=189014 2021-06-20 23:09:10 | INFO | train_inner | epoch 006: 1492 / 3002 loss=2.599, ppl=6.06, wps=5993.3, ups=0.09, wpb=64891, bsz=128, num_updates=16406, lr=9.98767e-05, gnorm=1.956, loss_scale=16, train_wall=10, gb_free=2.8, wall=189025 2021-06-20 23:09:22 | INFO | train_inner | epoch 006: 1493 / 3002 loss=2.498, ppl=5.65, wps=5834.2, ups=0.09, wpb=64819, bsz=128, num_updates=16407, lr=9.98767e-05, gnorm=2.057, loss_scale=16, train_wall=11, gb_free=2.8, wall=189036 2021-06-20 23:09:33 | INFO | train_inner | epoch 006: 1494 / 3002 loss=2.4, ppl=5.28, wps=5773.6, ups=0.09, wpb=64839, bsz=128, num_updates=16408, lr=9.98767e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=189047 2021-06-20 23:09:44 | INFO | train_inner | epoch 006: 1495 / 3002 loss=2.212, ppl=4.63, wps=5918.5, ups=0.09, wpb=64851, bsz=128, num_updates=16409, lr=9.98767e-05, gnorm=1.932, loss_scale=16, train_wall=10, gb_free=2.8, wall=189058 2021-06-20 23:09:55 | INFO | train_inner | epoch 006: 1496 / 3002 loss=2.423, ppl=5.36, wps=5945.1, ups=0.09, wpb=64858, bsz=128, num_updates=16410, lr=9.98767e-05, gnorm=1.974, loss_scale=16, train_wall=10, gb_free=2.8, wall=189069 2021-06-20 23:10:06 | INFO | train_inner | epoch 006: 1497 / 3002 loss=2.521, ppl=5.74, wps=5813.9, ups=0.09, wpb=64830, bsz=128, num_updates=16411, lr=9.98767e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=189080 2021-06-20 23:10:17 | INFO | train_inner | epoch 006: 1498 / 3002 loss=2.505, ppl=5.68, wps=5878, ups=0.09, wpb=64824, bsz=128, num_updates=16412, lr=9.98767e-05, gnorm=2.139, loss_scale=16, train_wall=11, gb_free=2.8, wall=189091 2021-06-20 23:10:28 | INFO | train_inner | epoch 006: 1499 / 3002 loss=2.502, ppl=5.66, wps=5814.7, ups=0.09, wpb=64758, bsz=128, num_updates=16413, lr=9.98767e-05, gnorm=1.97, loss_scale=16, train_wall=11, gb_free=2.8, wall=189102 2021-06-20 23:10:39 | INFO | train_inner | epoch 006: 1500 / 3002 loss=2.405, ppl=5.3, wps=5762.3, ups=0.09, wpb=64794, bsz=128, num_updates=16414, lr=9.98767e-05, gnorm=1.831, loss_scale=16, train_wall=11, gb_free=2.8, wall=189114 2021-06-20 23:10:50 | INFO | train_inner | epoch 006: 1501 / 3002 loss=2.468, ppl=5.53, wps=5878.2, ups=0.09, wpb=64816, bsz=128, num_updates=16415, lr=9.98767e-05, gnorm=1.921, loss_scale=16, train_wall=11, gb_free=2.8, wall=189125 2021-06-20 23:11:02 | INFO | train_inner | epoch 006: 1502 / 3002 loss=2.466, ppl=5.52, wps=5742.7, ups=0.09, wpb=64823, bsz=128, num_updates=16416, lr=9.98767e-05, gnorm=1.895, loss_scale=16, train_wall=11, gb_free=2.8, wall=189136 2021-06-20 23:11:13 | INFO | train_inner | epoch 006: 1503 / 3002 loss=2.44, ppl=5.43, wps=5724, ups=0.09, wpb=64832, bsz=128, num_updates=16417, lr=9.98767e-05, gnorm=2.457, loss_scale=16, train_wall=11, gb_free=2.8, wall=189147 2021-06-20 23:11:24 | INFO | train_inner | epoch 006: 1504 / 3002 loss=2.564, ppl=5.91, wps=6012.9, ups=0.09, wpb=64778, bsz=128, num_updates=16418, lr=9.98766e-05, gnorm=1.959, loss_scale=16, train_wall=10, gb_free=2.8, wall=189158 2021-06-20 23:11:34 | INFO | train_inner | epoch 006: 1505 / 3002 loss=2.608, ppl=6.1, wps=5989.9, ups=0.09, wpb=64857, bsz=128, num_updates=16419, lr=9.98766e-05, gnorm=1.974, loss_scale=16, train_wall=10, gb_free=2.8, wall=189169 2021-06-20 23:11:46 | INFO | train_inner | epoch 006: 1506 / 3002 loss=2.544, ppl=5.83, wps=5849.8, ups=0.09, wpb=64795, bsz=128, num_updates=16420, lr=9.98766e-05, gnorm=1.974, loss_scale=16, train_wall=11, gb_free=2.8, wall=189180 2021-06-20 23:11:57 | INFO | train_inner | epoch 006: 1507 / 3002 loss=2.601, ppl=6.07, wps=5822.1, ups=0.09, wpb=64858, bsz=128, num_updates=16421, lr=9.98766e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=189191 2021-06-20 23:12:08 | INFO | train_inner | epoch 006: 1508 / 3002 loss=2.381, ppl=5.21, wps=5924.5, ups=0.09, wpb=64837, bsz=128, num_updates=16422, lr=9.98766e-05, gnorm=2, loss_scale=16, train_wall=11, gb_free=2.8, wall=189202 2021-06-20 23:12:18 | INFO | train_inner | epoch 006: 1509 / 3002 loss=2.333, ppl=5.04, wps=5953.6, ups=0.09, wpb=64840, bsz=128, num_updates=16423, lr=9.98766e-05, gnorm=1.841, loss_scale=16, train_wall=10, gb_free=2.8, wall=189213 2021-06-20 23:12:30 | INFO | train_inner | epoch 006: 1510 / 3002 loss=2.45, ppl=5.46, wps=5706.2, ups=0.09, wpb=64856, bsz=128, num_updates=16424, lr=9.98766e-05, gnorm=1.87, loss_scale=16, train_wall=11, gb_free=2.8, wall=189224 2021-06-20 23:12:41 | INFO | train_inner | epoch 006: 1511 / 3002 loss=2.552, ppl=5.86, wps=6058.3, ups=0.09, wpb=64837, bsz=128, num_updates=16425, lr=9.98766e-05, gnorm=1.879, loss_scale=16, train_wall=10, gb_free=2.8, wall=189235 2021-06-20 23:12:51 | INFO | train_inner | epoch 006: 1512 / 3002 loss=2.447, ppl=5.45, wps=5954.4, ups=0.09, wpb=64866, bsz=128, num_updates=16426, lr=9.98766e-05, gnorm=1.885, loss_scale=16, train_wall=10, gb_free=2.8, wall=189246 2021-06-20 23:13:03 | INFO | train_inner | epoch 006: 1513 / 3002 loss=2.582, ppl=5.99, wps=5781.4, ups=0.09, wpb=64879, bsz=128, num_updates=16427, lr=9.98766e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=189257 2021-06-20 23:13:14 | INFO | train_inner | epoch 006: 1514 / 3002 loss=2.506, ppl=5.68, wps=5849.7, ups=0.09, wpb=64751, bsz=128, num_updates=16428, lr=9.98766e-05, gnorm=1.931, loss_scale=16, train_wall=11, gb_free=2.8, wall=189268 2021-06-20 23:13:25 | INFO | train_inner | epoch 006: 1515 / 3002 loss=2.511, ppl=5.7, wps=5876.7, ups=0.09, wpb=64851, bsz=128, num_updates=16429, lr=9.98766e-05, gnorm=1.833, loss_scale=16, train_wall=11, gb_free=2.8, wall=189279 2021-06-20 23:13:36 | INFO | train_inner | epoch 006: 1516 / 3002 loss=2.532, ppl=5.78, wps=5843.4, ups=0.09, wpb=64869, bsz=128, num_updates=16430, lr=9.98766e-05, gnorm=2.041, loss_scale=16, train_wall=11, gb_free=2.8, wall=189290 2021-06-20 23:13:47 | INFO | train_inner | epoch 006: 1517 / 3002 loss=2.519, ppl=5.73, wps=5846.1, ups=0.09, wpb=64880, bsz=128, num_updates=16431, lr=9.98765e-05, gnorm=1.895, loss_scale=16, train_wall=11, gb_free=2.8, wall=189301 2021-06-20 23:13:58 | INFO | train_inner | epoch 006: 1518 / 3002 loss=2.349, ppl=5.1, wps=5933.8, ups=0.09, wpb=64867, bsz=128, num_updates=16432, lr=9.98765e-05, gnorm=2.268, loss_scale=16, train_wall=10, gb_free=2.8, wall=189312 2021-06-20 23:14:09 | INFO | train_inner | epoch 006: 1519 / 3002 loss=2.465, ppl=5.52, wps=5980.8, ups=0.09, wpb=64758, bsz=128, num_updates=16433, lr=9.98765e-05, gnorm=1.944, loss_scale=16, train_wall=10, gb_free=2.8, wall=189323 2021-06-20 23:14:20 | INFO | train_inner | epoch 006: 1520 / 3002 loss=2.534, ppl=5.79, wps=5798.8, ups=0.09, wpb=64901, bsz=128, num_updates=16434, lr=9.98765e-05, gnorm=2.161, loss_scale=16, train_wall=11, gb_free=2.8, wall=189334 2021-06-20 23:14:31 | INFO | train_inner | epoch 006: 1521 / 3002 loss=2.421, ppl=5.35, wps=5827.5, ups=0.09, wpb=64838, bsz=128, num_updates=16435, lr=9.98765e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=189345 2021-06-20 23:14:42 | INFO | train_inner | epoch 006: 1522 / 3002 loss=2.553, ppl=5.87, wps=5797, ups=0.09, wpb=64815, bsz=128, num_updates=16436, lr=9.98765e-05, gnorm=1.943, loss_scale=16, train_wall=11, gb_free=2.8, wall=189357 2021-06-20 23:14:53 | INFO | train_inner | epoch 006: 1523 / 3002 loss=2.434, ppl=5.4, wps=5893.4, ups=0.09, wpb=64809, bsz=128, num_updates=16437, lr=9.98765e-05, gnorm=2.045, loss_scale=16, train_wall=11, gb_free=2.8, wall=189368 2021-06-20 23:15:04 | INFO | train_inner | epoch 006: 1524 / 3002 loss=2.327, ppl=5.02, wps=5963.9, ups=0.09, wpb=64934, bsz=128, num_updates=16438, lr=9.98765e-05, gnorm=4.552, loss_scale=16, train_wall=10, gb_free=2.8, wall=189378 2021-06-20 23:15:15 | INFO | train_inner | epoch 006: 1525 / 3002 loss=2.37, ppl=5.17, wps=5799.5, ups=0.09, wpb=64852, bsz=128, num_updates=16439, lr=9.98765e-05, gnorm=1.82, loss_scale=16, train_wall=11, gb_free=2.8, wall=189390 2021-06-20 23:15:26 | INFO | train_inner | epoch 006: 1526 / 3002 loss=2.394, ppl=5.25, wps=5867.7, ups=0.09, wpb=64921, bsz=128, num_updates=16440, lr=9.98765e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=189401 2021-06-20 23:15:38 | INFO | train_inner | epoch 006: 1527 / 3002 loss=2.487, ppl=5.61, wps=5793.6, ups=0.09, wpb=64834, bsz=128, num_updates=16441, lr=9.98765e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=189412 2021-06-20 23:15:48 | INFO | train_inner | epoch 006: 1528 / 3002 loss=2.499, ppl=5.65, wps=5935.2, ups=0.09, wpb=64768, bsz=128, num_updates=16442, lr=9.98765e-05, gnorm=1.985, loss_scale=16, train_wall=10, gb_free=2.8, wall=189423 2021-06-20 23:16:00 | INFO | train_inner | epoch 006: 1529 / 3002 loss=2.449, ppl=5.46, wps=5810.8, ups=0.09, wpb=64860, bsz=128, num_updates=16443, lr=9.98764e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=189434 2021-06-20 23:16:11 | INFO | train_inner | epoch 006: 1530 / 3002 loss=2.498, ppl=5.65, wps=5809.5, ups=0.09, wpb=64896, bsz=128, num_updates=16444, lr=9.98764e-05, gnorm=1.993, loss_scale=16, train_wall=11, gb_free=2.8, wall=189445 2021-06-20 23:16:22 | INFO | train_inner | epoch 006: 1531 / 3002 loss=2.536, ppl=5.8, wps=5802.3, ups=0.09, wpb=64811, bsz=128, num_updates=16445, lr=9.98764e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=189456 2021-06-20 23:16:33 | INFO | train_inner | epoch 006: 1532 / 3002 loss=2.534, ppl=5.79, wps=5786.6, ups=0.09, wpb=64799, bsz=128, num_updates=16446, lr=9.98764e-05, gnorm=1.974, loss_scale=16, train_wall=11, gb_free=2.8, wall=189468 2021-06-20 23:16:44 | INFO | train_inner | epoch 006: 1533 / 3002 loss=2.551, ppl=5.86, wps=5869.4, ups=0.09, wpb=64813, bsz=128, num_updates=16447, lr=9.98764e-05, gnorm=2.011, loss_scale=16, train_wall=11, gb_free=2.8, wall=189479 2021-06-20 23:16:55 | INFO | train_inner | epoch 006: 1534 / 3002 loss=2.544, ppl=5.83, wps=5878.3, ups=0.09, wpb=64727, bsz=128, num_updates=16448, lr=9.98764e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=189490 2021-06-20 23:17:06 | INFO | train_inner | epoch 006: 1535 / 3002 loss=2.279, ppl=4.85, wps=5963, ups=0.09, wpb=64829, bsz=128, num_updates=16449, lr=9.98764e-05, gnorm=1.817, loss_scale=16, train_wall=10, gb_free=2.8, wall=189500 2021-06-20 23:17:17 | INFO | train_inner | epoch 006: 1536 / 3002 loss=2.445, ppl=5.44, wps=5802.9, ups=0.09, wpb=64779, bsz=128, num_updates=16450, lr=9.98764e-05, gnorm=1.97, loss_scale=16, train_wall=11, gb_free=2.8, wall=189512 2021-06-20 23:17:28 | INFO | train_inner | epoch 006: 1537 / 3002 loss=2.33, ppl=5.03, wps=5820.6, ups=0.09, wpb=64835, bsz=128, num_updates=16451, lr=9.98764e-05, gnorm=1.991, loss_scale=16, train_wall=11, gb_free=2.8, wall=189523 2021-06-20 23:17:39 | INFO | train_inner | epoch 006: 1538 / 3002 loss=2.471, ppl=5.54, wps=5889.5, ups=0.09, wpb=64768, bsz=128, num_updates=16452, lr=9.98764e-05, gnorm=2.031, loss_scale=16, train_wall=11, gb_free=2.8, wall=189534 2021-06-20 23:17:50 | INFO | train_inner | epoch 006: 1539 / 3002 loss=2.552, ppl=5.86, wps=6028, ups=0.09, wpb=64876, bsz=128, num_updates=16453, lr=9.98764e-05, gnorm=1.916, loss_scale=16, train_wall=10, gb_free=2.8, wall=189544 2021-06-20 23:18:01 | INFO | train_inner | epoch 006: 1540 / 3002 loss=2.632, ppl=6.2, wps=5885.5, ups=0.09, wpb=64857, bsz=128, num_updates=16454, lr=9.98764e-05, gnorm=1.861, loss_scale=16, train_wall=11, gb_free=2.8, wall=189556 2021-06-20 23:18:12 | INFO | train_inner | epoch 006: 1541 / 3002 loss=2.579, ppl=5.97, wps=6000.8, ups=0.09, wpb=64845, bsz=128, num_updates=16455, lr=9.98764e-05, gnorm=2.081, loss_scale=16, train_wall=10, gb_free=2.8, wall=189566 2021-06-20 23:18:23 | INFO | train_inner | epoch 006: 1542 / 3002 loss=2.499, ppl=5.65, wps=5868.7, ups=0.09, wpb=64554, bsz=128, num_updates=16456, lr=9.98763e-05, gnorm=1.906, loss_scale=16, train_wall=11, gb_free=2.8, wall=189577 2021-06-20 23:18:34 | INFO | train_inner | epoch 006: 1543 / 3002 loss=2.523, ppl=5.75, wps=5870.5, ups=0.09, wpb=64869, bsz=128, num_updates=16457, lr=9.98763e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=189588 2021-06-20 23:18:45 | INFO | train_inner | epoch 006: 1544 / 3002 loss=2.453, ppl=5.47, wps=5905.6, ups=0.09, wpb=64858, bsz=128, num_updates=16458, lr=9.98763e-05, gnorm=1.926, loss_scale=16, train_wall=10, gb_free=2.8, wall=189599 2021-06-20 23:18:56 | INFO | train_inner | epoch 006: 1545 / 3002 loss=2.479, ppl=5.57, wps=5943.2, ups=0.09, wpb=64859, bsz=128, num_updates=16459, lr=9.98763e-05, gnorm=2.002, loss_scale=16, train_wall=10, gb_free=2.8, wall=189610 2021-06-20 23:19:07 | INFO | train_inner | epoch 006: 1546 / 3002 loss=2.39, ppl=5.24, wps=5889, ups=0.09, wpb=64873, bsz=128, num_updates=16460, lr=9.98763e-05, gnorm=1.95, loss_scale=16, train_wall=11, gb_free=2.8, wall=189621 2021-06-20 23:19:18 | INFO | train_inner | epoch 006: 1547 / 3002 loss=2.388, ppl=5.24, wps=5958.6, ups=0.09, wpb=64892, bsz=128, num_updates=16461, lr=9.98763e-05, gnorm=1.923, loss_scale=16, train_wall=10, gb_free=2.8, wall=189632 2021-06-20 23:19:29 | INFO | train_inner | epoch 006: 1548 / 3002 loss=2.527, ppl=5.76, wps=5922.5, ups=0.09, wpb=64858, bsz=128, num_updates=16462, lr=9.98763e-05, gnorm=2.105, loss_scale=16, train_wall=10, gb_free=2.8, wall=189643 2021-06-20 23:19:40 | INFO | train_inner | epoch 006: 1549 / 3002 loss=2.504, ppl=5.67, wps=5858.7, ups=0.09, wpb=64816, bsz=128, num_updates=16463, lr=9.98763e-05, gnorm=1.879, loss_scale=16, train_wall=11, gb_free=2.8, wall=189654 2021-06-20 23:19:51 | INFO | train_inner | epoch 006: 1550 / 3002 loss=2.582, ppl=5.99, wps=5829.4, ups=0.09, wpb=64910, bsz=128, num_updates=16464, lr=9.98763e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=189665 2021-06-20 23:20:02 | INFO | train_inner | epoch 006: 1551 / 3002 loss=2.387, ppl=5.23, wps=5946, ups=0.09, wpb=64776, bsz=128, num_updates=16465, lr=9.98763e-05, gnorm=2.376, loss_scale=16, train_wall=10, gb_free=2.8, wall=189676 2021-06-20 23:20:13 | INFO | train_inner | epoch 006: 1552 / 3002 loss=2.361, ppl=5.14, wps=5985.4, ups=0.09, wpb=64822, bsz=128, num_updates=16466, lr=9.98763e-05, gnorm=1.895, loss_scale=16, train_wall=10, gb_free=2.8, wall=189687 2021-06-20 23:20:24 | INFO | train_inner | epoch 006: 1553 / 3002 loss=2.608, ppl=6.1, wps=5882.6, ups=0.09, wpb=64851, bsz=128, num_updates=16467, lr=9.98763e-05, gnorm=2.175, loss_scale=16, train_wall=11, gb_free=2.8, wall=189698 2021-06-20 23:20:35 | INFO | train_inner | epoch 006: 1554 / 3002 loss=2.587, ppl=6.01, wps=5719, ups=0.09, wpb=64864, bsz=128, num_updates=16468, lr=9.98762e-05, gnorm=1.921, loss_scale=16, train_wall=11, gb_free=2.8, wall=189709 2021-06-20 23:20:46 | INFO | train_inner | epoch 006: 1555 / 3002 loss=2.616, ppl=6.13, wps=6022.1, ups=0.09, wpb=64727, bsz=128, num_updates=16469, lr=9.98762e-05, gnorm=2.045, loss_scale=16, train_wall=10, gb_free=2.8, wall=189720 2021-06-20 23:20:57 | INFO | train_inner | epoch 006: 1556 / 3002 loss=2.403, ppl=5.29, wps=5756, ups=0.09, wpb=64882, bsz=128, num_updates=16470, lr=9.98762e-05, gnorm=1.863, loss_scale=16, train_wall=11, gb_free=2.8, wall=189731 2021-06-20 23:21:08 | INFO | train_inner | epoch 006: 1557 / 3002 loss=2.435, ppl=5.41, wps=5970, ups=0.09, wpb=64898, bsz=128, num_updates=16471, lr=9.98762e-05, gnorm=1.856, loss_scale=16, train_wall=10, gb_free=2.8, wall=189742 2021-06-20 23:21:19 | INFO | train_inner | epoch 006: 1558 / 3002 loss=2.459, ppl=5.5, wps=5777.3, ups=0.09, wpb=64814, bsz=128, num_updates=16472, lr=9.98762e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=189754 2021-06-20 23:21:30 | INFO | train_inner | epoch 006: 1559 / 3002 loss=2.58, ppl=5.98, wps=5736.5, ups=0.09, wpb=64801, bsz=128, num_updates=16473, lr=9.98762e-05, gnorm=1.887, loss_scale=16, train_wall=11, gb_free=2.8, wall=189765 2021-06-20 23:21:42 | INFO | train_inner | epoch 006: 1560 / 3002 loss=2.568, ppl=5.93, wps=5807, ups=0.09, wpb=64716, bsz=128, num_updates=16474, lr=9.98762e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=189776 2021-06-20 23:21:53 | INFO | train_inner | epoch 006: 1561 / 3002 loss=2.484, ppl=5.6, wps=5902.8, ups=0.09, wpb=64882, bsz=128, num_updates=16475, lr=9.98762e-05, gnorm=1.788, loss_scale=16, train_wall=11, gb_free=2.8, wall=189787 2021-06-20 23:22:04 | INFO | train_inner | epoch 006: 1562 / 3002 loss=2.571, ppl=5.94, wps=5863.2, ups=0.09, wpb=64812, bsz=128, num_updates=16476, lr=9.98762e-05, gnorm=2.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=189798 2021-06-20 23:22:15 | INFO | train_inner | epoch 006: 1563 / 3002 loss=2.463, ppl=5.51, wps=5825.6, ups=0.09, wpb=64801, bsz=128, num_updates=16477, lr=9.98762e-05, gnorm=1.866, loss_scale=16, train_wall=11, gb_free=2.8, wall=189809 2021-06-20 23:22:26 | INFO | train_inner | epoch 006: 1564 / 3002 loss=2.512, ppl=5.7, wps=5933.3, ups=0.09, wpb=64889, bsz=128, num_updates=16478, lr=9.98762e-05, gnorm=1.965, loss_scale=16, train_wall=10, gb_free=2.8, wall=189820 2021-06-20 23:22:37 | INFO | train_inner | epoch 006: 1565 / 3002 loss=2.406, ppl=5.3, wps=5812.9, ups=0.09, wpb=64830, bsz=128, num_updates=16479, lr=9.98762e-05, gnorm=1.874, loss_scale=16, train_wall=11, gb_free=2.8, wall=189831 2021-06-20 23:22:48 | INFO | train_inner | epoch 006: 1566 / 3002 loss=2.524, ppl=5.75, wps=5909.6, ups=0.09, wpb=64813, bsz=128, num_updates=16480, lr=9.98762e-05, gnorm=1.914, loss_scale=16, train_wall=11, gb_free=2.8, wall=189842 2021-06-20 23:22:59 | INFO | train_inner | epoch 006: 1567 / 3002 loss=2.469, ppl=5.54, wps=5781.3, ups=0.09, wpb=64834, bsz=128, num_updates=16481, lr=9.98761e-05, gnorm=1.931, loss_scale=16, train_wall=11, gb_free=2.8, wall=189853 2021-06-20 23:23:10 | INFO | train_inner | epoch 006: 1568 / 3002 loss=2.431, ppl=5.39, wps=5887.4, ups=0.09, wpb=64826, bsz=128, num_updates=16482, lr=9.98761e-05, gnorm=2.544, loss_scale=16, train_wall=11, gb_free=2.8, wall=189864 2021-06-20 23:23:21 | INFO | train_inner | epoch 006: 1569 / 3002 loss=2.525, ppl=5.75, wps=5871.2, ups=0.09, wpb=64794, bsz=128, num_updates=16483, lr=9.98761e-05, gnorm=1.916, loss_scale=16, train_wall=11, gb_free=2.8, wall=189875 2021-06-20 23:23:32 | INFO | train_inner | epoch 006: 1570 / 3002 loss=2.559, ppl=5.89, wps=5900.4, ups=0.09, wpb=64813, bsz=128, num_updates=16484, lr=9.98761e-05, gnorm=1.871, loss_scale=16, train_wall=11, gb_free=2.8, wall=189886 2021-06-20 23:23:43 | INFO | train_inner | epoch 006: 1571 / 3002 loss=2.48, ppl=5.58, wps=5957.1, ups=0.09, wpb=64956, bsz=128, num_updates=16485, lr=9.98761e-05, gnorm=1.917, loss_scale=16, train_wall=10, gb_free=2.8, wall=189897 2021-06-20 23:23:54 | INFO | train_inner | epoch 006: 1572 / 3002 loss=2.493, ppl=5.63, wps=5789.4, ups=0.09, wpb=64839, bsz=128, num_updates=16486, lr=9.98761e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=189909 2021-06-20 23:24:05 | INFO | train_inner | epoch 006: 1573 / 3002 loss=2.367, ppl=5.16, wps=5848.8, ups=0.09, wpb=64809, bsz=128, num_updates=16487, lr=9.98761e-05, gnorm=1.942, loss_scale=16, train_wall=11, gb_free=2.8, wall=189920 2021-06-20 23:24:16 | INFO | train_inner | epoch 006: 1574 / 3002 loss=2.362, ppl=5.14, wps=5793, ups=0.09, wpb=64886, bsz=128, num_updates=16488, lr=9.98761e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=189931 2021-06-20 23:24:28 | INFO | train_inner | epoch 006: 1575 / 3002 loss=2.362, ppl=5.14, wps=5791.7, ups=0.09, wpb=64885, bsz=128, num_updates=16489, lr=9.98761e-05, gnorm=1.833, loss_scale=16, train_wall=11, gb_free=2.8, wall=189942 2021-06-20 23:24:39 | INFO | train_inner | epoch 006: 1576 / 3002 loss=2.481, ppl=5.58, wps=5899.9, ups=0.09, wpb=64883, bsz=128, num_updates=16490, lr=9.98761e-05, gnorm=1.927, loss_scale=16, train_wall=11, gb_free=2.8, wall=189953 2021-06-20 23:24:50 | INFO | train_inner | epoch 006: 1577 / 3002 loss=2.471, ppl=5.54, wps=5841.1, ups=0.09, wpb=64873, bsz=128, num_updates=16491, lr=9.98761e-05, gnorm=1.939, loss_scale=16, train_wall=11, gb_free=2.8, wall=189964 2021-06-20 23:25:01 | INFO | train_inner | epoch 006: 1578 / 3002 loss=2.509, ppl=5.69, wps=5919.6, ups=0.09, wpb=64865, bsz=128, num_updates=16492, lr=9.98761e-05, gnorm=1.952, loss_scale=16, train_wall=10, gb_free=2.8, wall=189975 2021-06-20 23:25:12 | INFO | train_inner | epoch 006: 1579 / 3002 loss=2.624, ppl=6.16, wps=5780.6, ups=0.09, wpb=64792, bsz=128, num_updates=16493, lr=9.9876e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=189986 2021-06-20 23:25:23 | INFO | train_inner | epoch 006: 1580 / 3002 loss=2.483, ppl=5.59, wps=5909.3, ups=0.09, wpb=64796, bsz=128, num_updates=16494, lr=9.9876e-05, gnorm=1.853, loss_scale=16, train_wall=11, gb_free=2.8, wall=189997 2021-06-20 23:25:34 | INFO | train_inner | epoch 006: 1581 / 3002 loss=2.348, ppl=5.09, wps=5867.9, ups=0.09, wpb=64829, bsz=128, num_updates=16495, lr=9.9876e-05, gnorm=1.898, loss_scale=16, train_wall=11, gb_free=2.8, wall=190008 2021-06-20 23:25:45 | INFO | train_inner | epoch 006: 1582 / 3002 loss=2.357, ppl=5.12, wps=5858.9, ups=0.09, wpb=64828, bsz=128, num_updates=16496, lr=9.9876e-05, gnorm=1.874, loss_scale=16, train_wall=11, gb_free=2.8, wall=190019 2021-06-20 23:25:56 | INFO | train_inner | epoch 006: 1583 / 3002 loss=2.446, ppl=5.45, wps=5731.6, ups=0.09, wpb=64617, bsz=128, num_updates=16497, lr=9.9876e-05, gnorm=1.834, loss_scale=16, train_wall=11, gb_free=2.8, wall=190031 2021-06-20 23:26:07 | INFO | train_inner | epoch 006: 1584 / 3002 loss=2.463, ppl=5.51, wps=5915.9, ups=0.09, wpb=64842, bsz=128, num_updates=16498, lr=9.9876e-05, gnorm=1.931, loss_scale=16, train_wall=10, gb_free=2.8, wall=190042 2021-06-20 23:26:18 | INFO | train_inner | epoch 006: 1585 / 3002 loss=2.443, ppl=5.44, wps=5831.5, ups=0.09, wpb=64818, bsz=128, num_updates=16499, lr=9.9876e-05, gnorm=2.721, loss_scale=16, train_wall=11, gb_free=2.8, wall=190053 2021-06-20 23:26:30 | INFO | train_inner | epoch 006: 1586 / 3002 loss=2.608, ppl=6.1, wps=5812, ups=0.09, wpb=64728, bsz=128, num_updates=16500, lr=9.9876e-05, gnorm=1.899, loss_scale=16, train_wall=11, gb_free=2.8, wall=190064 2021-06-20 23:26:41 | INFO | train_inner | epoch 006: 1587 / 3002 loss=2.355, ppl=5.12, wps=5796.7, ups=0.09, wpb=64770, bsz=128, num_updates=16501, lr=9.9876e-05, gnorm=1.886, loss_scale=16, train_wall=11, gb_free=2.8, wall=190075 2021-06-20 23:26:52 | INFO | train_inner | epoch 006: 1588 / 3002 loss=2.611, ppl=6.11, wps=5848.4, ups=0.09, wpb=64800, bsz=128, num_updates=16502, lr=9.9876e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=190086 2021-06-20 23:27:03 | INFO | train_inner | epoch 006: 1589 / 3002 loss=2.47, ppl=5.54, wps=5945.5, ups=0.09, wpb=64809, bsz=128, num_updates=16503, lr=9.9876e-05, gnorm=1.899, loss_scale=16, train_wall=10, gb_free=2.8, wall=190097 2021-06-20 23:27:14 | INFO | train_inner | epoch 006: 1590 / 3002 loss=2.502, ppl=5.67, wps=5915.9, ups=0.09, wpb=64918, bsz=128, num_updates=16504, lr=9.9876e-05, gnorm=2.005, loss_scale=16, train_wall=10, gb_free=2.8, wall=190108 2021-06-20 23:27:25 | INFO | train_inner | epoch 006: 1591 / 3002 loss=2.546, ppl=5.84, wps=5717.1, ups=0.09, wpb=64784, bsz=128, num_updates=16505, lr=9.9876e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=190119 2021-06-20 23:27:36 | INFO | train_inner | epoch 006: 1592 / 3002 loss=2.373, ppl=5.18, wps=5868, ups=0.09, wpb=64872, bsz=128, num_updates=16506, lr=9.98759e-05, gnorm=2.279, loss_scale=16, train_wall=11, gb_free=2.8, wall=190130 2021-06-20 23:27:47 | INFO | train_inner | epoch 006: 1593 / 3002 loss=2.445, ppl=5.45, wps=5833.5, ups=0.09, wpb=64658, bsz=128, num_updates=16507, lr=9.98759e-05, gnorm=1.908, loss_scale=16, train_wall=11, gb_free=2.8, wall=190141 2021-06-20 23:27:58 | INFO | train_inner | epoch 006: 1594 / 3002 loss=2.529, ppl=5.77, wps=5878.6, ups=0.09, wpb=64845, bsz=128, num_updates=16508, lr=9.98759e-05, gnorm=1.978, loss_scale=16, train_wall=11, gb_free=2.8, wall=190153 2021-06-20 23:28:09 | INFO | train_inner | epoch 006: 1595 / 3002 loss=2.486, ppl=5.6, wps=5792.1, ups=0.09, wpb=64807, bsz=128, num_updates=16509, lr=9.98759e-05, gnorm=2.938, loss_scale=16, train_wall=11, gb_free=2.8, wall=190164 2021-06-20 23:28:20 | INFO | train_inner | epoch 006: 1596 / 3002 loss=2.492, ppl=5.62, wps=5875.2, ups=0.09, wpb=64826, bsz=128, num_updates=16510, lr=9.98759e-05, gnorm=2.019, loss_scale=16, train_wall=11, gb_free=2.8, wall=190175 2021-06-20 23:28:32 | INFO | train_inner | epoch 006: 1597 / 3002 loss=2.414, ppl=5.33, wps=5827.1, ups=0.09, wpb=64795, bsz=128, num_updates=16511, lr=9.98759e-05, gnorm=1.912, loss_scale=32, train_wall=11, gb_free=2.8, wall=190186 2021-06-20 23:28:42 | INFO | train_inner | epoch 006: 1598 / 3002 loss=2.467, ppl=5.53, wps=5917.9, ups=0.09, wpb=64826, bsz=128, num_updates=16512, lr=9.98759e-05, gnorm=2.093, loss_scale=32, train_wall=11, gb_free=2.8, wall=190197 2021-06-20 23:28:53 | INFO | train_inner | epoch 006: 1599 / 3002 loss=2.511, ppl=5.7, wps=5908.9, ups=0.09, wpb=64912, bsz=128, num_updates=16513, lr=9.98759e-05, gnorm=1.954, loss_scale=32, train_wall=11, gb_free=2.8, wall=190208 2021-06-20 23:29:05 | INFO | train_inner | epoch 006: 1600 / 3002 loss=2.523, ppl=5.75, wps=5715.8, ups=0.09, wpb=64769, bsz=128, num_updates=16514, lr=9.98759e-05, gnorm=1.933, loss_scale=32, train_wall=11, gb_free=2.8, wall=190219 2021-06-20 23:29:16 | INFO | train_inner | epoch 006: 1601 / 3002 loss=2.535, ppl=5.8, wps=5849.6, ups=0.09, wpb=64921, bsz=128, num_updates=16515, lr=9.98759e-05, gnorm=1.896, loss_scale=32, train_wall=11, gb_free=2.8, wall=190230 2021-06-20 23:29:27 | INFO | train_inner | epoch 006: 1602 / 3002 loss=2.617, ppl=6.14, wps=5846.1, ups=0.09, wpb=64839, bsz=128, num_updates=16516, lr=9.98759e-05, gnorm=1.896, loss_scale=32, train_wall=11, gb_free=2.8, wall=190241 2021-06-20 23:29:38 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 2021-06-20 23:29:49 | INFO | train_inner | epoch 006: 1604 / 3002 loss=2.328, ppl=5.02, wps=2928.2, ups=0.05, wpb=64830, bsz=128, num_updates=16517, lr=9.98759e-05, gnorm=1.899, loss_scale=16, train_wall=21, gb_free=2.8, wall=190263 2021-06-20 23:30:00 | INFO | train_inner | epoch 006: 1605 / 3002 loss=2.659, ppl=6.32, wps=5889, ups=0.09, wpb=64765, bsz=128, num_updates=16518, lr=9.98758e-05, gnorm=1.938, loss_scale=16, train_wall=11, gb_free=2.8, wall=190274 2021-06-20 23:30:11 | INFO | train_inner | epoch 006: 1606 / 3002 loss=2.503, ppl=5.67, wps=5862.2, ups=0.09, wpb=64803, bsz=128, num_updates=16519, lr=9.98758e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=190286 2021-06-20 23:30:22 | INFO | train_inner | epoch 006: 1607 / 3002 loss=2.37, ppl=5.17, wps=5912.1, ups=0.09, wpb=64894, bsz=128, num_updates=16520, lr=9.98758e-05, gnorm=1.901, loss_scale=16, train_wall=11, gb_free=2.8, wall=190296 2021-06-20 23:30:33 | INFO | train_inner | epoch 006: 1608 / 3002 loss=2.331, ppl=5.03, wps=5817.5, ups=0.09, wpb=64838, bsz=128, num_updates=16521, lr=9.98758e-05, gnorm=1.963, loss_scale=16, train_wall=11, gb_free=2.8, wall=190308 2021-06-20 23:30:44 | INFO | train_inner | epoch 006: 1609 / 3002 loss=2.555, ppl=5.88, wps=5797.1, ups=0.09, wpb=64781, bsz=128, num_updates=16522, lr=9.98758e-05, gnorm=2.128, loss_scale=16, train_wall=11, gb_free=2.8, wall=190319 2021-06-20 23:30:56 | INFO | train_inner | epoch 006: 1610 / 3002 loss=2.521, ppl=5.74, wps=5801.4, ups=0.09, wpb=64741, bsz=128, num_updates=16523, lr=9.98758e-05, gnorm=1.908, loss_scale=16, train_wall=11, gb_free=2.8, wall=190330 2021-06-20 23:31:07 | INFO | train_inner | epoch 006: 1611 / 3002 loss=2.433, ppl=5.4, wps=5911.3, ups=0.09, wpb=64871, bsz=128, num_updates=16524, lr=9.98758e-05, gnorm=1.904, loss_scale=16, train_wall=11, gb_free=2.8, wall=190341 2021-06-20 23:31:18 | INFO | train_inner | epoch 006: 1612 / 3002 loss=2.505, ppl=5.68, wps=5856.9, ups=0.09, wpb=64839, bsz=128, num_updates=16525, lr=9.98758e-05, gnorm=1.919, loss_scale=16, train_wall=11, gb_free=2.8, wall=190352 2021-06-20 23:31:29 | INFO | train_inner | epoch 006: 1613 / 3002 loss=2.416, ppl=5.34, wps=5811.5, ups=0.09, wpb=64910, bsz=128, num_updates=16526, lr=9.98758e-05, gnorm=1.866, loss_scale=16, train_wall=11, gb_free=2.8, wall=190363 2021-06-20 23:31:40 | INFO | train_inner | epoch 006: 1614 / 3002 loss=2.452, ppl=5.47, wps=5959.8, ups=0.09, wpb=64866, bsz=128, num_updates=16527, lr=9.98758e-05, gnorm=1.866, loss_scale=16, train_wall=10, gb_free=2.8, wall=190374 2021-06-20 23:31:51 | INFO | train_inner | epoch 006: 1615 / 3002 loss=2.403, ppl=5.29, wps=5836, ups=0.09, wpb=64848, bsz=128, num_updates=16528, lr=9.98758e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=190385 2021-06-20 23:32:02 | INFO | train_inner | epoch 006: 1616 / 3002 loss=2.574, ppl=5.95, wps=5753.7, ups=0.09, wpb=64821, bsz=128, num_updates=16529, lr=9.98758e-05, gnorm=1.83, loss_scale=16, train_wall=11, gb_free=2.8, wall=190396 2021-06-20 23:32:13 | INFO | train_inner | epoch 006: 1617 / 3002 loss=2.368, ppl=5.16, wps=5843.5, ups=0.09, wpb=64862, bsz=128, num_updates=16530, lr=9.98758e-05, gnorm=2.016, loss_scale=16, train_wall=11, gb_free=2.8, wall=190408 2021-06-20 23:32:24 | INFO | train_inner | epoch 006: 1618 / 3002 loss=2.349, ppl=5.1, wps=5875.6, ups=0.09, wpb=64780, bsz=128, num_updates=16531, lr=9.98757e-05, gnorm=1.868, loss_scale=16, train_wall=11, gb_free=2.8, wall=190419 2021-06-20 23:32:35 | INFO | train_inner | epoch 006: 1619 / 3002 loss=2.42, ppl=5.35, wps=5828.1, ups=0.09, wpb=64845, bsz=128, num_updates=16532, lr=9.98757e-05, gnorm=1.875, loss_scale=16, train_wall=11, gb_free=2.8, wall=190430 2021-06-20 23:32:46 | INFO | train_inner | epoch 006: 1620 / 3002 loss=2.393, ppl=5.25, wps=5940.9, ups=0.09, wpb=64881, bsz=128, num_updates=16533, lr=9.98757e-05, gnorm=1.906, loss_scale=16, train_wall=10, gb_free=2.8, wall=190441 2021-06-20 23:32:57 | INFO | train_inner | epoch 006: 1621 / 3002 loss=2.468, ppl=5.53, wps=5860.2, ups=0.09, wpb=64880, bsz=128, num_updates=16534, lr=9.98757e-05, gnorm=2.076, loss_scale=16, train_wall=11, gb_free=2.8, wall=190452 2021-06-20 23:33:09 | INFO | train_inner | epoch 006: 1622 / 3002 loss=2.507, ppl=5.69, wps=5792.3, ups=0.09, wpb=64867, bsz=128, num_updates=16535, lr=9.98757e-05, gnorm=2.05, loss_scale=16, train_wall=11, gb_free=2.8, wall=190463 2021-06-20 23:33:20 | INFO | train_inner | epoch 006: 1623 / 3002 loss=2.386, ppl=5.23, wps=5739.8, ups=0.09, wpb=64799, bsz=128, num_updates=16536, lr=9.98757e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=190474 2021-06-20 23:33:31 | INFO | train_inner | epoch 006: 1624 / 3002 loss=2.561, ppl=5.9, wps=5811.5, ups=0.09, wpb=64746, bsz=128, num_updates=16537, lr=9.98757e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=190485 2021-06-20 23:33:42 | INFO | train_inner | epoch 006: 1625 / 3002 loss=2.401, ppl=5.28, wps=5729.5, ups=0.09, wpb=64756, bsz=128, num_updates=16538, lr=9.98757e-05, gnorm=3.747, loss_scale=16, train_wall=11, gb_free=2.8, wall=190497 2021-06-20 23:33:54 | INFO | train_inner | epoch 006: 1626 / 3002 loss=2.402, ppl=5.29, wps=5737.9, ups=0.09, wpb=64743, bsz=128, num_updates=16539, lr=9.98757e-05, gnorm=2.048, loss_scale=16, train_wall=11, gb_free=2.8, wall=190508 2021-06-20 23:34:05 | INFO | train_inner | epoch 006: 1627 / 3002 loss=2.405, ppl=5.3, wps=5825, ups=0.09, wpb=64803, bsz=128, num_updates=16540, lr=9.98757e-05, gnorm=1.904, loss_scale=16, train_wall=11, gb_free=2.8, wall=190519 2021-06-20 23:34:16 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 2021-06-20 23:34:27 | INFO | train_inner | epoch 006: 1629 / 3002 loss=2.576, ppl=5.96, wps=2897.3, ups=0.04, wpb=64877, bsz=128, num_updates=16541, lr=9.98757e-05, gnorm=1.955, loss_scale=8, train_wall=21, gb_free=2.8, wall=190541 2021-06-20 23:34:38 | INFO | train_inner | epoch 006: 1630 / 3002 loss=2.416, ppl=5.34, wps=5850.6, ups=0.09, wpb=64866, bsz=128, num_updates=16542, lr=9.98757e-05, gnorm=1.891, loss_scale=8, train_wall=11, gb_free=2.8, wall=190553 2021-06-20 23:34:49 | INFO | train_inner | epoch 006: 1631 / 3002 loss=2.413, ppl=5.33, wps=5888.2, ups=0.09, wpb=64870, bsz=128, num_updates=16543, lr=9.98756e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=190564 2021-06-20 23:35:00 | INFO | train_inner | epoch 006: 1632 / 3002 loss=2.435, ppl=5.41, wps=5801.5, ups=0.09, wpb=64836, bsz=128, num_updates=16544, lr=9.98756e-05, gnorm=1.861, loss_scale=8, train_wall=11, gb_free=2.8, wall=190575 2021-06-20 23:35:12 | INFO | train_inner | epoch 006: 1633 / 3002 loss=2.583, ppl=5.99, wps=5794.3, ups=0.09, wpb=64845, bsz=128, num_updates=16545, lr=9.98756e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=190586 2021-06-20 23:35:23 | INFO | train_inner | epoch 006: 1634 / 3002 loss=2.532, ppl=5.78, wps=5801.7, ups=0.09, wpb=64793, bsz=128, num_updates=16546, lr=9.98756e-05, gnorm=1.858, loss_scale=8, train_wall=11, gb_free=2.8, wall=190597 2021-06-20 23:35:34 | INFO | train_inner | epoch 006: 1635 / 3002 loss=2.49, ppl=5.62, wps=5900.5, ups=0.09, wpb=64808, bsz=128, num_updates=16547, lr=9.98756e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=190608 2021-06-20 23:35:45 | INFO | train_inner | epoch 006: 1636 / 3002 loss=2.506, ppl=5.68, wps=5876, ups=0.09, wpb=64841, bsz=128, num_updates=16548, lr=9.98756e-05, gnorm=2.004, loss_scale=8, train_wall=11, gb_free=2.8, wall=190619 2021-06-20 23:35:56 | INFO | train_inner | epoch 006: 1637 / 3002 loss=2.543, ppl=5.83, wps=5783.3, ups=0.09, wpb=64840, bsz=128, num_updates=16549, lr=9.98756e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=190630 2021-06-20 23:36:07 | INFO | train_inner | epoch 006: 1638 / 3002 loss=2.459, ppl=5.5, wps=5997.5, ups=0.09, wpb=64835, bsz=128, num_updates=16550, lr=9.98756e-05, gnorm=1.96, loss_scale=8, train_wall=10, gb_free=2.8, wall=190641 2021-06-20 23:36:18 | INFO | train_inner | epoch 006: 1639 / 3002 loss=2.489, ppl=5.61, wps=5815.9, ups=0.09, wpb=64815, bsz=128, num_updates=16551, lr=9.98756e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=190652 2021-06-20 23:36:29 | INFO | train_inner | epoch 006: 1640 / 3002 loss=2.539, ppl=5.81, wps=5902.9, ups=0.09, wpb=64849, bsz=128, num_updates=16552, lr=9.98756e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=190663 2021-06-20 23:36:40 | INFO | train_inner | epoch 006: 1641 / 3002 loss=2.46, ppl=5.5, wps=5728.4, ups=0.09, wpb=64801, bsz=128, num_updates=16553, lr=9.98756e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=190675 2021-06-20 23:36:51 | INFO | train_inner | epoch 006: 1642 / 3002 loss=2.567, ppl=5.93, wps=5911.8, ups=0.09, wpb=64874, bsz=128, num_updates=16554, lr=9.98756e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=190686 2021-06-20 23:37:02 | INFO | train_inner | epoch 006: 1643 / 3002 loss=2.314, ppl=4.97, wps=5746, ups=0.09, wpb=64775, bsz=128, num_updates=16555, lr=9.98756e-05, gnorm=1.869, loss_scale=8, train_wall=11, gb_free=2.8, wall=190697 2021-06-20 23:37:14 | INFO | train_inner | epoch 006: 1644 / 3002 loss=2.496, ppl=5.64, wps=5771.8, ups=0.09, wpb=64853, bsz=128, num_updates=16556, lr=9.98755e-05, gnorm=1.831, loss_scale=8, train_wall=11, gb_free=2.8, wall=190708 2021-06-20 23:37:25 | INFO | train_inner | epoch 006: 1645 / 3002 loss=2.517, ppl=5.72, wps=5886.6, ups=0.09, wpb=64873, bsz=128, num_updates=16557, lr=9.98755e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=190719 2021-06-20 23:37:36 | INFO | train_inner | epoch 006: 1646 / 3002 loss=2.434, ppl=5.4, wps=5848.6, ups=0.09, wpb=64793, bsz=128, num_updates=16558, lr=9.98755e-05, gnorm=1.859, loss_scale=8, train_wall=11, gb_free=2.8, wall=190730 2021-06-20 23:37:47 | INFO | train_inner | epoch 006: 1647 / 3002 loss=2.585, ppl=6, wps=5840.7, ups=0.09, wpb=64783, bsz=128, num_updates=16559, lr=9.98755e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=190741 2021-06-20 23:37:58 | INFO | train_inner | epoch 006: 1648 / 3002 loss=2.377, ppl=5.2, wps=5812.6, ups=0.09, wpb=64806, bsz=128, num_updates=16560, lr=9.98755e-05, gnorm=1.919, loss_scale=8, train_wall=11, gb_free=2.8, wall=190752 2021-06-20 23:38:09 | INFO | train_inner | epoch 006: 1649 / 3002 loss=2.468, ppl=5.53, wps=5891.8, ups=0.09, wpb=64820, bsz=128, num_updates=16561, lr=9.98755e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=190763 2021-06-20 23:38:20 | INFO | train_inner | epoch 006: 1650 / 3002 loss=2.394, ppl=5.26, wps=5907.4, ups=0.09, wpb=64860, bsz=128, num_updates=16562, lr=9.98755e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=190774 2021-06-20 23:38:31 | INFO | train_inner | epoch 006: 1651 / 3002 loss=2.5, ppl=5.65, wps=5873.2, ups=0.09, wpb=64818, bsz=128, num_updates=16563, lr=9.98755e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=190785 2021-06-20 23:38:42 | INFO | train_inner | epoch 006: 1652 / 3002 loss=2.487, ppl=5.61, wps=5898.3, ups=0.09, wpb=64791, bsz=128, num_updates=16564, lr=9.98755e-05, gnorm=1.919, loss_scale=8, train_wall=11, gb_free=2.8, wall=190796 2021-06-20 23:38:53 | INFO | train_inner | epoch 006: 1653 / 3002 loss=2.538, ppl=5.81, wps=5801.4, ups=0.09, wpb=64786, bsz=128, num_updates=16565, lr=9.98755e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=190808 2021-06-20 23:39:04 | INFO | train_inner | epoch 006: 1654 / 3002 loss=2.408, ppl=5.31, wps=5853.4, ups=0.09, wpb=64750, bsz=128, num_updates=16566, lr=9.98755e-05, gnorm=2.032, loss_scale=8, train_wall=11, gb_free=2.8, wall=190819 2021-06-20 23:39:16 | INFO | train_inner | epoch 006: 1655 / 3002 loss=2.559, ppl=5.89, wps=5724.8, ups=0.09, wpb=64770, bsz=128, num_updates=16567, lr=9.98755e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=190830 2021-06-20 23:39:27 | INFO | train_inner | epoch 006: 1656 / 3002 loss=2.358, ppl=5.13, wps=5822, ups=0.09, wpb=64830, bsz=128, num_updates=16568, lr=9.98754e-05, gnorm=1.924, loss_scale=8, train_wall=11, gb_free=2.8, wall=190841 2021-06-20 23:39:38 | INFO | train_inner | epoch 006: 1657 / 3002 loss=2.453, ppl=5.48, wps=5869.7, ups=0.09, wpb=64821, bsz=128, num_updates=16569, lr=9.98754e-05, gnorm=2.083, loss_scale=8, train_wall=11, gb_free=2.8, wall=190852 2021-06-20 23:39:49 | INFO | train_inner | epoch 006: 1658 / 3002 loss=2.445, ppl=5.44, wps=5811.6, ups=0.09, wpb=64828, bsz=128, num_updates=16570, lr=9.98754e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=190863 2021-06-20 23:40:00 | INFO | train_inner | epoch 006: 1659 / 3002 loss=2.511, ppl=5.7, wps=5911.6, ups=0.09, wpb=64770, bsz=128, num_updates=16571, lr=9.98754e-05, gnorm=1.923, loss_scale=8, train_wall=10, gb_free=2.8, wall=190874 2021-06-20 23:40:11 | INFO | train_inner | epoch 006: 1660 / 3002 loss=2.491, ppl=5.62, wps=5935, ups=0.09, wpb=64843, bsz=128, num_updates=16572, lr=9.98754e-05, gnorm=1.984, loss_scale=8, train_wall=10, gb_free=2.8, wall=190885 2021-06-20 23:40:22 | INFO | train_inner | epoch 006: 1661 / 3002 loss=2.36, ppl=5.13, wps=5935.7, ups=0.09, wpb=64804, bsz=128, num_updates=16573, lr=9.98754e-05, gnorm=1.892, loss_scale=8, train_wall=10, gb_free=2.8, wall=190896 2021-06-20 23:40:33 | INFO | train_inner | epoch 006: 1662 / 3002 loss=2.466, ppl=5.53, wps=5790.6, ups=0.09, wpb=64848, bsz=128, num_updates=16574, lr=9.98754e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=190907 2021-06-20 23:40:44 | INFO | train_inner | epoch 006: 1663 / 3002 loss=2.502, ppl=5.67, wps=6018.1, ups=0.09, wpb=64884, bsz=128, num_updates=16575, lr=9.98754e-05, gnorm=1.935, loss_scale=8, train_wall=10, gb_free=2.8, wall=190918 2021-06-20 23:40:55 | INFO | train_inner | epoch 006: 1664 / 3002 loss=2.442, ppl=5.43, wps=5981.4, ups=0.09, wpb=64829, bsz=128, num_updates=16576, lr=9.98754e-05, gnorm=1.93, loss_scale=8, train_wall=10, gb_free=2.8, wall=190929 2021-06-20 23:41:06 | INFO | train_inner | epoch 006: 1665 / 3002 loss=2.355, ppl=5.11, wps=5857, ups=0.09, wpb=64766, bsz=128, num_updates=16577, lr=9.98754e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=190940 2021-06-20 23:41:17 | INFO | train_inner | epoch 006: 1666 / 3002 loss=2.367, ppl=5.16, wps=5916.7, ups=0.09, wpb=64887, bsz=128, num_updates=16578, lr=9.98754e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=190951 2021-06-20 23:41:27 | INFO | train_inner | epoch 006: 1667 / 3002 loss=2.566, ppl=5.92, wps=5943.4, ups=0.09, wpb=64825, bsz=128, num_updates=16579, lr=9.98754e-05, gnorm=1.953, loss_scale=8, train_wall=10, gb_free=2.8, wall=190962 2021-06-20 23:41:39 | INFO | train_inner | epoch 006: 1668 / 3002 loss=2.693, ppl=6.47, wps=5855.9, ups=0.09, wpb=64830, bsz=128, num_updates=16580, lr=9.98754e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=190973 2021-06-20 23:41:50 | INFO | train_inner | epoch 006: 1669 / 3002 loss=2.568, ppl=5.93, wps=5731.2, ups=0.09, wpb=64807, bsz=128, num_updates=16581, lr=9.98753e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=190984 2021-06-20 23:42:01 | INFO | train_inner | epoch 006: 1670 / 3002 loss=2.432, ppl=5.4, wps=5732.7, ups=0.09, wpb=64770, bsz=128, num_updates=16582, lr=9.98753e-05, gnorm=2.43, loss_scale=8, train_wall=11, gb_free=2.8, wall=190995 2021-06-20 23:42:12 | INFO | train_inner | epoch 006: 1671 / 3002 loss=2.431, ppl=5.39, wps=5893.5, ups=0.09, wpb=64785, bsz=128, num_updates=16583, lr=9.98753e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=191006 2021-06-20 23:42:23 | INFO | train_inner | epoch 006: 1672 / 3002 loss=2.712, ppl=6.55, wps=5837.1, ups=0.09, wpb=64782, bsz=128, num_updates=16584, lr=9.98753e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=191018 2021-06-20 23:42:35 | INFO | train_inner | epoch 006: 1673 / 3002 loss=2.459, ppl=5.5, wps=5721.6, ups=0.09, wpb=64901, bsz=128, num_updates=16585, lr=9.98753e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=191029 2021-06-20 23:42:45 | INFO | train_inner | epoch 006: 1674 / 3002 loss=2.431, ppl=5.39, wps=6029.9, ups=0.09, wpb=64891, bsz=128, num_updates=16586, lr=9.98753e-05, gnorm=1.95, loss_scale=8, train_wall=10, gb_free=2.8, wall=191040 2021-06-20 23:42:56 | INFO | train_inner | epoch 006: 1675 / 3002 loss=2.432, ppl=5.4, wps=5890.1, ups=0.09, wpb=64821, bsz=128, num_updates=16587, lr=9.98753e-05, gnorm=2.032, loss_scale=8, train_wall=11, gb_free=2.8, wall=191051 2021-06-20 23:43:07 | INFO | train_inner | epoch 006: 1676 / 3002 loss=2.444, ppl=5.44, wps=5857.4, ups=0.09, wpb=64881, bsz=128, num_updates=16588, lr=9.98753e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=191062 2021-06-20 23:43:19 | INFO | train_inner | epoch 006: 1677 / 3002 loss=2.446, ppl=5.45, wps=5713.7, ups=0.09, wpb=64847, bsz=128, num_updates=16589, lr=9.98753e-05, gnorm=1.915, loss_scale=8, train_wall=11, gb_free=2.8, wall=191073 2021-06-20 23:43:30 | INFO | train_inner | epoch 006: 1678 / 3002 loss=2.521, ppl=5.74, wps=5823.7, ups=0.09, wpb=64867, bsz=128, num_updates=16590, lr=9.98753e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=191084 2021-06-20 23:43:41 | INFO | train_inner | epoch 006: 1679 / 3002 loss=2.523, ppl=5.75, wps=5798.7, ups=0.09, wpb=64754, bsz=128, num_updates=16591, lr=9.98753e-05, gnorm=1.932, loss_scale=8, train_wall=11, gb_free=2.8, wall=191095 2021-06-20 23:43:52 | INFO | train_inner | epoch 006: 1680 / 3002 loss=2.629, ppl=6.19, wps=5824.1, ups=0.09, wpb=64854, bsz=128, num_updates=16592, lr=9.98753e-05, gnorm=2.058, loss_scale=8, train_wall=11, gb_free=2.8, wall=191107 2021-06-20 23:44:03 | INFO | train_inner | epoch 006: 1681 / 3002 loss=2.521, ppl=5.74, wps=5810.2, ups=0.09, wpb=64916, bsz=128, num_updates=16593, lr=9.98752e-05, gnorm=1.857, loss_scale=8, train_wall=11, gb_free=2.8, wall=191118 2021-06-20 23:44:15 | INFO | train_inner | epoch 006: 1682 / 3002 loss=2.615, ppl=6.13, wps=5784.5, ups=0.09, wpb=64784, bsz=128, num_updates=16594, lr=9.98752e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=191129 2021-06-20 23:44:25 | INFO | train_inner | epoch 006: 1683 / 3002 loss=2.558, ppl=5.89, wps=5951.2, ups=0.09, wpb=64808, bsz=128, num_updates=16595, lr=9.98752e-05, gnorm=1.927, loss_scale=8, train_wall=10, gb_free=2.8, wall=191140 2021-06-20 23:44:36 | INFO | train_inner | epoch 006: 1684 / 3002 loss=2.415, ppl=5.33, wps=5908.3, ups=0.09, wpb=64933, bsz=128, num_updates=16596, lr=9.98752e-05, gnorm=1.877, loss_scale=8, train_wall=11, gb_free=2.8, wall=191151 2021-06-20 23:44:47 | INFO | train_inner | epoch 006: 1685 / 3002 loss=2.496, ppl=5.64, wps=5895.8, ups=0.09, wpb=64826, bsz=128, num_updates=16597, lr=9.98752e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=191162 2021-06-20 23:44:58 | INFO | train_inner | epoch 006: 1686 / 3002 loss=2.32, ppl=4.99, wps=5915.8, ups=0.09, wpb=64907, bsz=128, num_updates=16598, lr=9.98752e-05, gnorm=1.813, loss_scale=8, train_wall=11, gb_free=2.8, wall=191173 2021-06-20 23:45:09 | INFO | train_inner | epoch 006: 1687 / 3002 loss=2.478, ppl=5.57, wps=5929.2, ups=0.09, wpb=64858, bsz=128, num_updates=16599, lr=9.98752e-05, gnorm=1.853, loss_scale=8, train_wall=10, gb_free=2.8, wall=191184 2021-06-20 23:45:21 | INFO | train_inner | epoch 006: 1688 / 3002 loss=2.524, ppl=5.75, wps=5806.3, ups=0.09, wpb=64844, bsz=128, num_updates=16600, lr=9.98752e-05, gnorm=2.192, loss_scale=8, train_wall=11, gb_free=2.8, wall=191195 2021-06-20 23:45:32 | INFO | train_inner | epoch 006: 1689 / 3002 loss=2.532, ppl=5.78, wps=5840.4, ups=0.09, wpb=64708, bsz=128, num_updates=16601, lr=9.98752e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=191206 2021-06-20 23:45:43 | INFO | train_inner | epoch 006: 1690 / 3002 loss=2.429, ppl=5.38, wps=5819.5, ups=0.09, wpb=64840, bsz=128, num_updates=16602, lr=9.98752e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=191217 2021-06-20 23:45:54 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-20 23:46:05 | INFO | train_inner | epoch 006: 1692 / 3002 loss=2.463, ppl=5.51, wps=2899.6, ups=0.04, wpb=64815, bsz=128, num_updates=16603, lr=9.98752e-05, gnorm=1.922, loss_scale=4, train_wall=21, gb_free=2.8, wall=191239 2021-06-20 23:46:16 | INFO | train_inner | epoch 006: 1693 / 3002 loss=2.538, ppl=5.81, wps=5808.4, ups=0.09, wpb=64861, bsz=128, num_updates=16604, lr=9.98752e-05, gnorm=1.986, loss_scale=4, train_wall=11, gb_free=2.8, wall=191251 2021-06-20 23:46:27 | INFO | train_inner | epoch 006: 1694 / 3002 loss=2.323, ppl=5, wps=5826.4, ups=0.09, wpb=64868, bsz=128, num_updates=16605, lr=9.98752e-05, gnorm=1.972, loss_scale=4, train_wall=11, gb_free=2.8, wall=191262 2021-06-20 23:46:38 | INFO | train_inner | epoch 006: 1695 / 3002 loss=2.492, ppl=5.63, wps=5850.2, ups=0.09, wpb=64760, bsz=128, num_updates=16606, lr=9.98751e-05, gnorm=1.956, loss_scale=4, train_wall=11, gb_free=2.8, wall=191273 2021-06-20 23:46:50 | INFO | train_inner | epoch 006: 1696 / 3002 loss=2.597, ppl=6.05, wps=5824, ups=0.09, wpb=64831, bsz=128, num_updates=16607, lr=9.98751e-05, gnorm=1.943, loss_scale=4, train_wall=11, gb_free=2.8, wall=191284 2021-06-20 23:47:01 | INFO | train_inner | epoch 006: 1697 / 3002 loss=2.435, ppl=5.41, wps=5823.6, ups=0.09, wpb=64852, bsz=128, num_updates=16608, lr=9.98751e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=191295 2021-06-20 23:47:12 | INFO | train_inner | epoch 006: 1698 / 3002 loss=2.525, ppl=5.75, wps=5936.2, ups=0.09, wpb=64817, bsz=128, num_updates=16609, lr=9.98751e-05, gnorm=1.979, loss_scale=4, train_wall=10, gb_free=2.8, wall=191306 2021-06-20 23:47:23 | INFO | train_inner | epoch 006: 1699 / 3002 loss=2.502, ppl=5.66, wps=5870.7, ups=0.09, wpb=64821, bsz=128, num_updates=16610, lr=9.98751e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=191317 2021-06-20 23:47:34 | INFO | train_inner | epoch 006: 1700 / 3002 loss=2.593, ppl=6.04, wps=5823.8, ups=0.09, wpb=64809, bsz=128, num_updates=16611, lr=9.98751e-05, gnorm=5.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=191328 2021-06-20 23:47:45 | INFO | train_inner | epoch 006: 1701 / 3002 loss=2.573, ppl=5.95, wps=5844.1, ups=0.09, wpb=64841, bsz=128, num_updates=16612, lr=9.98751e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=191339 2021-06-20 23:47:56 | INFO | train_inner | epoch 006: 1702 / 3002 loss=2.429, ppl=5.39, wps=5986.9, ups=0.09, wpb=64907, bsz=128, num_updates=16613, lr=9.98751e-05, gnorm=1.917, loss_scale=4, train_wall=10, gb_free=2.8, wall=191350 2021-06-20 23:48:07 | INFO | train_inner | epoch 006: 1703 / 3002 loss=2.567, ppl=5.93, wps=5799.2, ups=0.09, wpb=64803, bsz=128, num_updates=16614, lr=9.98751e-05, gnorm=2.041, loss_scale=4, train_wall=11, gb_free=2.8, wall=191361 2021-06-20 23:48:18 | INFO | train_inner | epoch 006: 1704 / 3002 loss=2.579, ppl=5.98, wps=5942.4, ups=0.09, wpb=64844, bsz=128, num_updates=16615, lr=9.98751e-05, gnorm=2.044, loss_scale=4, train_wall=10, gb_free=2.8, wall=191372 2021-06-20 23:48:29 | INFO | train_inner | epoch 006: 1705 / 3002 loss=2.471, ppl=5.55, wps=5816.4, ups=0.09, wpb=64771, bsz=128, num_updates=16616, lr=9.98751e-05, gnorm=1.842, loss_scale=4, train_wall=11, gb_free=2.8, wall=191383 2021-06-20 23:48:40 | INFO | train_inner | epoch 006: 1706 / 3002 loss=2.422, ppl=5.36, wps=5868.7, ups=0.09, wpb=64864, bsz=128, num_updates=16617, lr=9.98751e-05, gnorm=1.886, loss_scale=4, train_wall=11, gb_free=2.8, wall=191394 2021-06-20 23:48:51 | INFO | train_inner | epoch 006: 1707 / 3002 loss=2.484, ppl=5.59, wps=6008.6, ups=0.09, wpb=64895, bsz=128, num_updates=16618, lr=9.9875e-05, gnorm=2.017, loss_scale=4, train_wall=10, gb_free=2.8, wall=191405 2021-06-20 23:49:02 | INFO | train_inner | epoch 006: 1708 / 3002 loss=2.338, ppl=5.06, wps=5899.4, ups=0.09, wpb=64878, bsz=128, num_updates=16619, lr=9.9875e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=191416 2021-06-20 23:49:13 | INFO | train_inner | epoch 006: 1709 / 3002 loss=2.387, ppl=5.23, wps=5812, ups=0.09, wpb=64814, bsz=128, num_updates=16620, lr=9.9875e-05, gnorm=2.49, loss_scale=4, train_wall=11, gb_free=2.8, wall=191427 2021-06-20 23:49:24 | INFO | train_inner | epoch 006: 1710 / 3002 loss=2.21, ppl=4.63, wps=5898.8, ups=0.09, wpb=64838, bsz=128, num_updates=16621, lr=9.9875e-05, gnorm=1.904, loss_scale=4, train_wall=11, gb_free=2.8, wall=191438 2021-06-20 23:49:35 | INFO | train_inner | epoch 006: 1711 / 3002 loss=2.507, ppl=5.69, wps=5918.2, ups=0.09, wpb=64870, bsz=128, num_updates=16622, lr=9.9875e-05, gnorm=1.965, loss_scale=4, train_wall=10, gb_free=2.8, wall=191449 2021-06-20 23:49:46 | INFO | train_inner | epoch 006: 1712 / 3002 loss=2.28, ppl=4.86, wps=5893.7, ups=0.09, wpb=64845, bsz=128, num_updates=16623, lr=9.9875e-05, gnorm=1.929, loss_scale=4, train_wall=11, gb_free=2.8, wall=191460 2021-06-20 23:49:57 | INFO | train_inner | epoch 006: 1713 / 3002 loss=2.456, ppl=5.49, wps=5721.8, ups=0.09, wpb=64870, bsz=128, num_updates=16624, lr=9.9875e-05, gnorm=2.021, loss_scale=4, train_wall=11, gb_free=2.8, wall=191472 2021-06-20 23:50:08 | INFO | train_inner | epoch 006: 1714 / 3002 loss=2.496, ppl=5.64, wps=5836.3, ups=0.09, wpb=64807, bsz=128, num_updates=16625, lr=9.9875e-05, gnorm=1.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=191483 2021-06-20 23:50:20 | INFO | train_inner | epoch 006: 1715 / 3002 loss=2.423, ppl=5.36, wps=5791.6, ups=0.09, wpb=64811, bsz=128, num_updates=16626, lr=9.9875e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=191494 2021-06-20 23:50:31 | INFO | train_inner | epoch 006: 1716 / 3002 loss=2.463, ppl=5.52, wps=5815.4, ups=0.09, wpb=64829, bsz=128, num_updates=16627, lr=9.9875e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=191505 2021-06-20 23:50:42 | INFO | train_inner | epoch 006: 1717 / 3002 loss=2.48, ppl=5.58, wps=5916.7, ups=0.09, wpb=64878, bsz=128, num_updates=16628, lr=9.9875e-05, gnorm=1.918, loss_scale=4, train_wall=10, gb_free=2.8, wall=191516 2021-06-20 23:50:53 | INFO | train_inner | epoch 006: 1718 / 3002 loss=2.489, ppl=5.61, wps=5843.8, ups=0.09, wpb=64838, bsz=128, num_updates=16629, lr=9.9875e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=191527 2021-06-20 23:51:04 | INFO | train_inner | epoch 006: 1719 / 3002 loss=2.537, ppl=5.8, wps=5760.9, ups=0.09, wpb=64779, bsz=128, num_updates=16630, lr=9.98749e-05, gnorm=2.356, loss_scale=4, train_wall=11, gb_free=2.8, wall=191538 2021-06-20 23:51:15 | INFO | train_inner | epoch 006: 1720 / 3002 loss=2.407, ppl=5.3, wps=5908.9, ups=0.09, wpb=64911, bsz=128, num_updates=16631, lr=9.98749e-05, gnorm=1.891, loss_scale=4, train_wall=11, gb_free=2.8, wall=191549 2021-06-20 23:51:26 | INFO | train_inner | epoch 006: 1721 / 3002 loss=2.539, ppl=5.81, wps=5823.3, ups=0.09, wpb=64837, bsz=128, num_updates=16632, lr=9.98749e-05, gnorm=1.903, loss_scale=4, train_wall=11, gb_free=2.8, wall=191561 2021-06-20 23:51:37 | INFO | train_inner | epoch 006: 1722 / 3002 loss=2.356, ppl=5.12, wps=5923.9, ups=0.09, wpb=64875, bsz=128, num_updates=16633, lr=9.98749e-05, gnorm=1.891, loss_scale=4, train_wall=11, gb_free=2.8, wall=191571 2021-06-20 23:51:48 | INFO | train_inner | epoch 006: 1723 / 3002 loss=2.517, ppl=5.72, wps=5812.7, ups=0.09, wpb=64824, bsz=128, num_updates=16634, lr=9.98749e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=191583 2021-06-20 23:52:00 | INFO | train_inner | epoch 006: 1724 / 3002 loss=2.407, ppl=5.3, wps=5770.9, ups=0.09, wpb=64803, bsz=128, num_updates=16635, lr=9.98749e-05, gnorm=1.859, loss_scale=4, train_wall=11, gb_free=2.8, wall=191594 2021-06-20 23:52:11 | INFO | train_inner | epoch 006: 1725 / 3002 loss=2.44, ppl=5.43, wps=5800, ups=0.09, wpb=64809, bsz=128, num_updates=16636, lr=9.98749e-05, gnorm=1.898, loss_scale=4, train_wall=11, gb_free=2.8, wall=191605 2021-06-20 23:52:22 | INFO | train_inner | epoch 006: 1726 / 3002 loss=2.486, ppl=5.6, wps=5793, ups=0.09, wpb=64859, bsz=128, num_updates=16637, lr=9.98749e-05, gnorm=1.886, loss_scale=4, train_wall=11, gb_free=2.8, wall=191616 2021-06-20 23:52:33 | INFO | train_inner | epoch 006: 1727 / 3002 loss=2.427, ppl=5.38, wps=5800.7, ups=0.09, wpb=64912, bsz=128, num_updates=16638, lr=9.98749e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=191627 2021-06-20 23:52:44 | INFO | train_inner | epoch 006: 1728 / 3002 loss=2.497, ppl=5.64, wps=5854.1, ups=0.09, wpb=64815, bsz=128, num_updates=16639, lr=9.98749e-05, gnorm=1.907, loss_scale=4, train_wall=11, gb_free=2.8, wall=191638 2021-06-20 23:52:55 | INFO | train_inner | epoch 006: 1729 / 3002 loss=2.394, ppl=5.26, wps=5913.9, ups=0.09, wpb=64787, bsz=128, num_updates=16640, lr=9.98749e-05, gnorm=1.925, loss_scale=4, train_wall=11, gb_free=2.8, wall=191649 2021-06-20 23:53:06 | INFO | train_inner | epoch 006: 1730 / 3002 loss=2.473, ppl=5.55, wps=5811.9, ups=0.09, wpb=64871, bsz=128, num_updates=16641, lr=9.98749e-05, gnorm=1.911, loss_scale=4, train_wall=11, gb_free=2.8, wall=191661 2021-06-20 23:53:17 | INFO | train_inner | epoch 006: 1731 / 3002 loss=2.451, ppl=5.47, wps=5836, ups=0.09, wpb=64884, bsz=128, num_updates=16642, lr=9.98749e-05, gnorm=1.882, loss_scale=4, train_wall=11, gb_free=2.8, wall=191672 2021-06-20 23:53:29 | INFO | train_inner | epoch 006: 1732 / 3002 loss=2.409, ppl=5.31, wps=5789.8, ups=0.09, wpb=64893, bsz=128, num_updates=16643, lr=9.98748e-05, gnorm=1.938, loss_scale=4, train_wall=11, gb_free=2.8, wall=191683 2021-06-20 23:53:40 | INFO | train_inner | epoch 006: 1733 / 3002 loss=2.501, ppl=5.66, wps=5925.1, ups=0.09, wpb=64837, bsz=128, num_updates=16644, lr=9.98748e-05, gnorm=1.923, loss_scale=4, train_wall=10, gb_free=2.8, wall=191694 2021-06-20 23:53:51 | INFO | train_inner | epoch 006: 1734 / 3002 loss=2.324, ppl=5.01, wps=5879.1, ups=0.09, wpb=64904, bsz=128, num_updates=16645, lr=9.98748e-05, gnorm=1.914, loss_scale=4, train_wall=11, gb_free=2.8, wall=191705 2021-06-20 23:54:02 | INFO | train_inner | epoch 006: 1735 / 3002 loss=2.361, ppl=5.14, wps=5871.6, ups=0.09, wpb=64810, bsz=128, num_updates=16646, lr=9.98748e-05, gnorm=1.869, loss_scale=4, train_wall=11, gb_free=2.8, wall=191716 2021-06-20 23:54:13 | INFO | train_inner | epoch 006: 1736 / 3002 loss=2.522, ppl=5.74, wps=5863.8, ups=0.09, wpb=64833, bsz=128, num_updates=16647, lr=9.98748e-05, gnorm=1.93, loss_scale=4, train_wall=11, gb_free=2.8, wall=191727 2021-06-20 23:54:24 | INFO | train_inner | epoch 006: 1737 / 3002 loss=2.441, ppl=5.43, wps=5820.6, ups=0.09, wpb=64794, bsz=128, num_updates=16648, lr=9.98748e-05, gnorm=1.961, loss_scale=4, train_wall=11, gb_free=2.8, wall=191738 2021-06-20 23:54:35 | INFO | train_inner | epoch 006: 1738 / 3002 loss=2.441, ppl=5.43, wps=5956.5, ups=0.09, wpb=64807, bsz=128, num_updates=16649, lr=9.98748e-05, gnorm=1.918, loss_scale=4, train_wall=10, gb_free=2.8, wall=191749 2021-06-20 23:54:46 | INFO | train_inner | epoch 006: 1739 / 3002 loss=2.404, ppl=5.29, wps=5787.9, ups=0.09, wpb=64806, bsz=128, num_updates=16650, lr=9.98748e-05, gnorm=1.832, loss_scale=4, train_wall=11, gb_free=2.8, wall=191760 2021-06-20 23:54:57 | INFO | train_inner | epoch 006: 1740 / 3002 loss=2.497, ppl=5.65, wps=5929.6, ups=0.09, wpb=64770, bsz=128, num_updates=16651, lr=9.98748e-05, gnorm=1.974, loss_scale=4, train_wall=10, gb_free=2.8, wall=191771 2021-06-20 23:55:08 | INFO | train_inner | epoch 006: 1741 / 3002 loss=2.342, ppl=5.07, wps=5899.5, ups=0.09, wpb=64890, bsz=128, num_updates=16652, lr=9.98748e-05, gnorm=1.923, loss_scale=4, train_wall=11, gb_free=2.8, wall=191782 2021-06-20 23:55:19 | INFO | train_inner | epoch 006: 1742 / 3002 loss=2.556, ppl=5.88, wps=5831.3, ups=0.09, wpb=64824, bsz=128, num_updates=16653, lr=9.98748e-05, gnorm=1.92, loss_scale=4, train_wall=11, gb_free=2.8, wall=191793 2021-06-20 23:55:30 | INFO | train_inner | epoch 006: 1743 / 3002 loss=2.516, ppl=5.72, wps=6009.4, ups=0.09, wpb=64901, bsz=128, num_updates=16654, lr=9.98748e-05, gnorm=1.92, loss_scale=4, train_wall=10, gb_free=2.8, wall=191804 2021-06-20 23:55:41 | INFO | train_inner | epoch 006: 1744 / 3002 loss=2.507, ppl=5.69, wps=5857.6, ups=0.09, wpb=64840, bsz=128, num_updates=16655, lr=9.98747e-05, gnorm=2.028, loss_scale=4, train_wall=11, gb_free=2.8, wall=191815 2021-06-20 23:55:52 | INFO | train_inner | epoch 006: 1745 / 3002 loss=2.513, ppl=5.71, wps=5854.9, ups=0.09, wpb=64828, bsz=128, num_updates=16656, lr=9.98747e-05, gnorm=1.86, loss_scale=4, train_wall=11, gb_free=2.8, wall=191826 2021-06-20 23:56:03 | INFO | train_inner | epoch 006: 1746 / 3002 loss=2.535, ppl=5.79, wps=5760, ups=0.09, wpb=64895, bsz=128, num_updates=16657, lr=9.98747e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=191837 2021-06-20 23:56:14 | INFO | train_inner | epoch 006: 1747 / 3002 loss=2.519, ppl=5.73, wps=5867.1, ups=0.09, wpb=64816, bsz=128, num_updates=16658, lr=9.98747e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=191849 2021-06-20 23:56:25 | INFO | train_inner | epoch 006: 1748 / 3002 loss=2.51, ppl=5.69, wps=5973.7, ups=0.09, wpb=64861, bsz=128, num_updates=16659, lr=9.98747e-05, gnorm=1.875, loss_scale=4, train_wall=10, gb_free=2.8, wall=191859 2021-06-20 23:56:36 | INFO | train_inner | epoch 006: 1749 / 3002 loss=2.445, ppl=5.44, wps=5943.7, ups=0.09, wpb=64832, bsz=128, num_updates=16660, lr=9.98747e-05, gnorm=1.837, loss_scale=4, train_wall=10, gb_free=2.8, wall=191870 2021-06-20 23:56:47 | INFO | train_inner | epoch 006: 1750 / 3002 loss=2.372, ppl=5.18, wps=5857.8, ups=0.09, wpb=64788, bsz=128, num_updates=16661, lr=9.98747e-05, gnorm=1.857, loss_scale=4, train_wall=11, gb_free=2.8, wall=191881 2021-06-20 23:56:58 | INFO | train_inner | epoch 006: 1751 / 3002 loss=2.451, ppl=5.47, wps=5825.6, ups=0.09, wpb=64855, bsz=128, num_updates=16662, lr=9.98747e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=191892 2021-06-20 23:57:09 | INFO | train_inner | epoch 006: 1752 / 3002 loss=2.629, ppl=6.18, wps=5811.9, ups=0.09, wpb=64931, bsz=128, num_updates=16663, lr=9.98747e-05, gnorm=1.891, loss_scale=4, train_wall=11, gb_free=2.8, wall=191904 2021-06-20 23:57:20 | INFO | train_inner | epoch 006: 1753 / 3002 loss=2.398, ppl=5.27, wps=5899.8, ups=0.09, wpb=64801, bsz=128, num_updates=16664, lr=9.98747e-05, gnorm=1.885, loss_scale=4, train_wall=11, gb_free=2.8, wall=191915 2021-06-20 23:57:31 | INFO | train_inner | epoch 006: 1754 / 3002 loss=2.545, ppl=5.84, wps=5891.1, ups=0.09, wpb=64870, bsz=128, num_updates=16665, lr=9.98747e-05, gnorm=1.899, loss_scale=4, train_wall=11, gb_free=2.8, wall=191926 2021-06-20 23:57:42 | INFO | train_inner | epoch 006: 1755 / 3002 loss=2.494, ppl=5.63, wps=5879.3, ups=0.09, wpb=64756, bsz=128, num_updates=16666, lr=9.98747e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=191937 2021-06-20 23:57:54 | INFO | train_inner | epoch 006: 1756 / 3002 loss=2.486, ppl=5.6, wps=5747.9, ups=0.09, wpb=64806, bsz=128, num_updates=16667, lr=9.98747e-05, gnorm=1.877, loss_scale=4, train_wall=11, gb_free=2.8, wall=191948 2021-06-20 23:58:05 | INFO | train_inner | epoch 006: 1757 / 3002 loss=2.468, ppl=5.53, wps=5895.3, ups=0.09, wpb=64847, bsz=128, num_updates=16668, lr=9.98746e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=191959 2021-06-20 23:58:16 | INFO | train_inner | epoch 006: 1758 / 3002 loss=2.63, ppl=6.19, wps=5923.8, ups=0.09, wpb=64791, bsz=128, num_updates=16669, lr=9.98746e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=191970 2021-06-20 23:58:27 | INFO | train_inner | epoch 006: 1759 / 3002 loss=2.402, ppl=5.29, wps=5780.7, ups=0.09, wpb=64771, bsz=128, num_updates=16670, lr=9.98746e-05, gnorm=1.897, loss_scale=4, train_wall=11, gb_free=2.8, wall=191981 2021-06-20 23:58:38 | INFO | train_inner | epoch 006: 1760 / 3002 loss=2.284, ppl=4.87, wps=5815.7, ups=0.09, wpb=64750, bsz=128, num_updates=16671, lr=9.98746e-05, gnorm=1.836, loss_scale=4, train_wall=11, gb_free=2.8, wall=191992 2021-06-20 23:58:49 | INFO | train_inner | epoch 006: 1761 / 3002 loss=2.536, ppl=5.8, wps=5792.7, ups=0.09, wpb=64839, bsz=128, num_updates=16672, lr=9.98746e-05, gnorm=1.861, loss_scale=4, train_wall=11, gb_free=2.8, wall=192003 2021-06-20 23:59:00 | INFO | train_inner | epoch 006: 1762 / 3002 loss=2.501, ppl=5.66, wps=5889, ups=0.09, wpb=64822, bsz=128, num_updates=16673, lr=9.98746e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=192014 2021-06-20 23:59:11 | INFO | train_inner | epoch 006: 1763 / 3002 loss=2.482, ppl=5.59, wps=5789.2, ups=0.09, wpb=64840, bsz=128, num_updates=16674, lr=9.98746e-05, gnorm=1.98, loss_scale=4, train_wall=11, gb_free=2.8, wall=192026 2021-06-20 23:59:22 | INFO | train_inner | epoch 006: 1764 / 3002 loss=2.502, ppl=5.67, wps=5884.6, ups=0.09, wpb=64782, bsz=128, num_updates=16675, lr=9.98746e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=192037 2021-06-20 23:59:33 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-20 23:59:44 | INFO | train_inner | epoch 006: 1766 / 3002 loss=2.506, ppl=5.68, wps=2937.9, ups=0.05, wpb=64833, bsz=128, num_updates=16676, lr=9.98746e-05, gnorm=2.041, loss_scale=2, train_wall=21, gb_free=2.8, wall=192059 2021-06-20 23:59:55 | INFO | train_inner | epoch 006: 1767 / 3002 loss=2.366, ppl=5.16, wps=5826.7, ups=0.09, wpb=64893, bsz=128, num_updates=16677, lr=9.98746e-05, gnorm=1.869, loss_scale=2, train_wall=11, gb_free=2.8, wall=192070 2021-06-21 00:00:07 | INFO | train_inner | epoch 006: 1768 / 3002 loss=2.505, ppl=5.68, wps=5788, ups=0.09, wpb=64733, bsz=128, num_updates=16678, lr=9.98746e-05, gnorm=1.851, loss_scale=2, train_wall=11, gb_free=2.8, wall=192081 2021-06-21 00:00:18 | INFO | train_inner | epoch 006: 1769 / 3002 loss=2.443, ppl=5.44, wps=5835.3, ups=0.09, wpb=64864, bsz=128, num_updates=16679, lr=9.98746e-05, gnorm=1.833, loss_scale=2, train_wall=11, gb_free=2.8, wall=192092 2021-06-21 00:00:29 | INFO | train_inner | epoch 006: 1770 / 3002 loss=2.381, ppl=5.21, wps=5843.9, ups=0.09, wpb=64831, bsz=128, num_updates=16680, lr=9.98745e-05, gnorm=1.864, loss_scale=2, train_wall=11, gb_free=2.8, wall=192103 2021-06-21 00:00:40 | INFO | train_inner | epoch 006: 1771 / 3002 loss=2.516, ppl=5.72, wps=6028.2, ups=0.09, wpb=64815, bsz=128, num_updates=16681, lr=9.98745e-05, gnorm=1.968, loss_scale=2, train_wall=10, gb_free=2.8, wall=192114 2021-06-21 00:00:51 | INFO | train_inner | epoch 006: 1772 / 3002 loss=2.374, ppl=5.18, wps=5790.1, ups=0.09, wpb=64776, bsz=128, num_updates=16682, lr=9.98745e-05, gnorm=1.856, loss_scale=2, train_wall=11, gb_free=2.8, wall=192125 2021-06-21 00:01:02 | INFO | train_inner | epoch 006: 1773 / 3002 loss=2.504, ppl=5.67, wps=5796.8, ups=0.09, wpb=64815, bsz=128, num_updates=16683, lr=9.98745e-05, gnorm=1.941, loss_scale=2, train_wall=11, gb_free=2.8, wall=192136 2021-06-21 00:01:13 | INFO | train_inner | epoch 006: 1774 / 3002 loss=2.465, ppl=5.52, wps=5911.5, ups=0.09, wpb=64860, bsz=128, num_updates=16684, lr=9.98745e-05, gnorm=1.923, loss_scale=2, train_wall=10, gb_free=2.8, wall=192147 2021-06-21 00:01:24 | INFO | train_inner | epoch 006: 1775 / 3002 loss=2.45, ppl=5.47, wps=5818.8, ups=0.09, wpb=64850, bsz=128, num_updates=16685, lr=9.98745e-05, gnorm=1.917, loss_scale=2, train_wall=11, gb_free=2.8, wall=192158 2021-06-21 00:01:35 | INFO | train_inner | epoch 006: 1776 / 3002 loss=2.437, ppl=5.41, wps=5944.6, ups=0.09, wpb=64765, bsz=128, num_updates=16686, lr=9.98745e-05, gnorm=2.002, loss_scale=2, train_wall=10, gb_free=2.8, wall=192169 2021-06-21 00:01:46 | INFO | train_inner | epoch 006: 1777 / 3002 loss=2.5, ppl=5.66, wps=5872, ups=0.09, wpb=64768, bsz=128, num_updates=16687, lr=9.98745e-05, gnorm=1.919, loss_scale=2, train_wall=11, gb_free=2.8, wall=192180 2021-06-21 00:01:57 | INFO | train_inner | epoch 006: 1778 / 3002 loss=2.443, ppl=5.44, wps=5903, ups=0.09, wpb=64856, bsz=128, num_updates=16688, lr=9.98745e-05, gnorm=1.985, loss_scale=2, train_wall=11, gb_free=2.8, wall=192191 2021-06-21 00:02:08 | INFO | train_inner | epoch 006: 1779 / 3002 loss=2.449, ppl=5.46, wps=5887.3, ups=0.09, wpb=64854, bsz=128, num_updates=16689, lr=9.98745e-05, gnorm=1.998, loss_scale=2, train_wall=11, gb_free=2.8, wall=192202 2021-06-21 00:02:19 | INFO | train_inner | epoch 006: 1780 / 3002 loss=2.413, ppl=5.33, wps=5822.8, ups=0.09, wpb=64888, bsz=128, num_updates=16690, lr=9.98745e-05, gnorm=1.954, loss_scale=2, train_wall=11, gb_free=2.8, wall=192214 2021-06-21 00:02:30 | INFO | train_inner | epoch 006: 1781 / 3002 loss=2.314, ppl=4.97, wps=5943, ups=0.09, wpb=64843, bsz=128, num_updates=16691, lr=9.98745e-05, gnorm=1.912, loss_scale=2, train_wall=10, gb_free=2.8, wall=192224 2021-06-21 00:02:41 | INFO | train_inner | epoch 006: 1782 / 3002 loss=2.528, ppl=5.77, wps=5873.9, ups=0.09, wpb=64838, bsz=128, num_updates=16692, lr=9.98745e-05, gnorm=2.017, loss_scale=2, train_wall=11, gb_free=2.8, wall=192235 2021-06-21 00:02:52 | INFO | train_inner | epoch 006: 1783 / 3002 loss=2.453, ppl=5.48, wps=5936.9, ups=0.09, wpb=64796, bsz=128, num_updates=16693, lr=9.98744e-05, gnorm=1.908, loss_scale=2, train_wall=10, gb_free=2.8, wall=192246 2021-06-21 00:03:03 | INFO | train_inner | epoch 006: 1784 / 3002 loss=2.281, ppl=4.86, wps=5844.7, ups=0.09, wpb=64858, bsz=128, num_updates=16694, lr=9.98744e-05, gnorm=1.97, loss_scale=2, train_wall=11, gb_free=2.8, wall=192257 2021-06-21 00:03:14 | INFO | train_inner | epoch 006: 1785 / 3002 loss=2.398, ppl=5.27, wps=5796.9, ups=0.09, wpb=64784, bsz=128, num_updates=16695, lr=9.98744e-05, gnorm=1.898, loss_scale=2, train_wall=11, gb_free=2.8, wall=192269 2021-06-21 00:03:25 | INFO | train_inner | epoch 006: 1786 / 3002 loss=2.419, ppl=5.35, wps=5935.7, ups=0.09, wpb=64871, bsz=128, num_updates=16696, lr=9.98744e-05, gnorm=1.909, loss_scale=2, train_wall=10, gb_free=2.8, wall=192280 2021-06-21 00:03:37 | INFO | train_inner | epoch 006: 1787 / 3002 loss=2.429, ppl=5.38, wps=5733.9, ups=0.09, wpb=64826, bsz=128, num_updates=16697, lr=9.98744e-05, gnorm=1.851, loss_scale=2, train_wall=11, gb_free=2.8, wall=192291 2021-06-21 00:03:48 | INFO | train_inner | epoch 006: 1788 / 3002 loss=2.559, ppl=5.89, wps=5889.1, ups=0.09, wpb=64823, bsz=128, num_updates=16698, lr=9.98744e-05, gnorm=2.132, loss_scale=2, train_wall=11, gb_free=2.8, wall=192302 2021-06-21 00:03:58 | INFO | train_inner | epoch 006: 1789 / 3002 loss=2.436, ppl=5.41, wps=6025.1, ups=0.09, wpb=64822, bsz=128, num_updates=16699, lr=9.98744e-05, gnorm=1.834, loss_scale=2, train_wall=10, gb_free=2.8, wall=192313 2021-06-21 00:04:09 | INFO | train_inner | epoch 006: 1790 / 3002 loss=2.334, ppl=5.04, wps=5839.4, ups=0.09, wpb=64811, bsz=128, num_updates=16700, lr=9.98744e-05, gnorm=1.942, loss_scale=2, train_wall=11, gb_free=2.8, wall=192324 2021-06-21 00:04:20 | INFO | train_inner | epoch 006: 1791 / 3002 loss=2.305, ppl=4.94, wps=5905.6, ups=0.09, wpb=64831, bsz=128, num_updates=16701, lr=9.98744e-05, gnorm=1.889, loss_scale=2, train_wall=11, gb_free=2.8, wall=192335 2021-06-21 00:04:32 | INFO | train_inner | epoch 006: 1792 / 3002 loss=2.561, ppl=5.9, wps=5762.7, ups=0.09, wpb=64851, bsz=128, num_updates=16702, lr=9.98744e-05, gnorm=1.958, loss_scale=2, train_wall=11, gb_free=2.8, wall=192346 2021-06-21 00:04:43 | INFO | train_inner | epoch 006: 1793 / 3002 loss=2.47, ppl=5.54, wps=5784.1, ups=0.09, wpb=64833, bsz=128, num_updates=16703, lr=9.98744e-05, gnorm=2.048, loss_scale=2, train_wall=11, gb_free=2.8, wall=192357 2021-06-21 00:04:54 | INFO | train_inner | epoch 006: 1794 / 3002 loss=2.395, ppl=5.26, wps=5836.2, ups=0.09, wpb=64840, bsz=128, num_updates=16704, lr=9.98744e-05, gnorm=1.88, loss_scale=2, train_wall=11, gb_free=2.8, wall=192368 2021-06-21 00:05:05 | INFO | train_inner | epoch 006: 1795 / 3002 loss=2.345, ppl=5.08, wps=5952.7, ups=0.09, wpb=64839, bsz=128, num_updates=16705, lr=9.98743e-05, gnorm=1.889, loss_scale=2, train_wall=10, gb_free=2.8, wall=192379 2021-06-21 00:05:16 | INFO | train_inner | epoch 006: 1796 / 3002 loss=2.37, ppl=5.17, wps=5734.7, ups=0.09, wpb=64894, bsz=128, num_updates=16706, lr=9.98743e-05, gnorm=1.965, loss_scale=2, train_wall=11, gb_free=2.8, wall=192391 2021-06-21 00:05:27 | INFO | train_inner | epoch 006: 1797 / 3002 loss=2.469, ppl=5.54, wps=5765.8, ups=0.09, wpb=64901, bsz=128, num_updates=16707, lr=9.98743e-05, gnorm=1.907, loss_scale=2, train_wall=11, gb_free=2.8, wall=192402 2021-06-21 00:05:39 | INFO | train_inner | epoch 006: 1798 / 3002 loss=2.402, ppl=5.29, wps=5802.4, ups=0.09, wpb=64835, bsz=128, num_updates=16708, lr=9.98743e-05, gnorm=1.915, loss_scale=2, train_wall=11, gb_free=2.8, wall=192413 2021-06-21 00:05:50 | INFO | train_inner | epoch 006: 1799 / 3002 loss=2.464, ppl=5.52, wps=5887.5, ups=0.09, wpb=64793, bsz=128, num_updates=16709, lr=9.98743e-05, gnorm=3.214, loss_scale=2, train_wall=11, gb_free=2.8, wall=192424 2021-06-21 00:06:01 | INFO | train_inner | epoch 006: 1800 / 3002 loss=2.369, ppl=5.17, wps=5800, ups=0.09, wpb=64813, bsz=128, num_updates=16710, lr=9.98743e-05, gnorm=1.877, loss_scale=2, train_wall=11, gb_free=2.8, wall=192435 2021-06-21 00:06:12 | INFO | train_inner | epoch 006: 1801 / 3002 loss=2.536, ppl=5.8, wps=5813.3, ups=0.09, wpb=64783, bsz=128, num_updates=16711, lr=9.98743e-05, gnorm=1.851, loss_scale=2, train_wall=11, gb_free=2.8, wall=192446 2021-06-21 00:06:23 | INFO | train_inner | epoch 006: 1802 / 3002 loss=2.509, ppl=5.69, wps=5818.2, ups=0.09, wpb=64811, bsz=128, num_updates=16712, lr=9.98743e-05, gnorm=1.892, loss_scale=2, train_wall=11, gb_free=2.8, wall=192457 2021-06-21 00:06:34 | INFO | train_inner | epoch 006: 1803 / 3002 loss=2.474, ppl=5.55, wps=5849.6, ups=0.09, wpb=64794, bsz=128, num_updates=16713, lr=9.98743e-05, gnorm=1.92, loss_scale=2, train_wall=11, gb_free=2.8, wall=192468 2021-06-21 00:06:45 | INFO | train_inner | epoch 006: 1804 / 3002 loss=2.284, ppl=4.87, wps=5941.9, ups=0.09, wpb=64865, bsz=128, num_updates=16714, lr=9.98743e-05, gnorm=1.944, loss_scale=2, train_wall=10, gb_free=2.8, wall=192479 2021-06-21 00:06:56 | INFO | train_inner | epoch 006: 1805 / 3002 loss=2.494, ppl=5.63, wps=5885.1, ups=0.09, wpb=64821, bsz=128, num_updates=16715, lr=9.98743e-05, gnorm=4.336, loss_scale=2, train_wall=11, gb_free=2.8, wall=192490 2021-06-21 00:07:07 | INFO | train_inner | epoch 006: 1806 / 3002 loss=2.441, ppl=5.43, wps=5834.5, ups=0.09, wpb=64814, bsz=128, num_updates=16716, lr=9.98743e-05, gnorm=1.968, loss_scale=2, train_wall=11, gb_free=2.8, wall=192502 2021-06-21 00:07:18 | INFO | train_inner | epoch 006: 1807 / 3002 loss=2.493, ppl=5.63, wps=5852.4, ups=0.09, wpb=64819, bsz=128, num_updates=16717, lr=9.98743e-05, gnorm=1.941, loss_scale=2, train_wall=11, gb_free=2.8, wall=192513 2021-06-21 00:07:29 | INFO | train_inner | epoch 006: 1808 / 3002 loss=2.472, ppl=5.55, wps=5917.8, ups=0.09, wpb=64778, bsz=128, num_updates=16718, lr=9.98742e-05, gnorm=1.966, loss_scale=2, train_wall=10, gb_free=2.8, wall=192524 2021-06-21 00:07:40 | INFO | train_inner | epoch 006: 1809 / 3002 loss=2.385, ppl=5.22, wps=5801.7, ups=0.09, wpb=64834, bsz=128, num_updates=16719, lr=9.98742e-05, gnorm=1.851, loss_scale=2, train_wall=11, gb_free=2.8, wall=192535 2021-06-21 00:07:52 | INFO | train_inner | epoch 006: 1810 / 3002 loss=2.379, ppl=5.2, wps=5828.8, ups=0.09, wpb=64862, bsz=128, num_updates=16720, lr=9.98742e-05, gnorm=1.869, loss_scale=2, train_wall=11, gb_free=2.8, wall=192546 2021-06-21 00:08:03 | INFO | train_inner | epoch 006: 1811 / 3002 loss=2.522, ppl=5.75, wps=5791.5, ups=0.09, wpb=64844, bsz=128, num_updates=16721, lr=9.98742e-05, gnorm=1.932, loss_scale=2, train_wall=11, gb_free=2.8, wall=192557 2021-06-21 00:08:14 | INFO | train_inner | epoch 006: 1812 / 3002 loss=2.51, ppl=5.7, wps=5821, ups=0.09, wpb=64830, bsz=128, num_updates=16722, lr=9.98742e-05, gnorm=1.926, loss_scale=2, train_wall=11, gb_free=2.8, wall=192568 2021-06-21 00:08:25 | INFO | train_inner | epoch 006: 1813 / 3002 loss=2.573, ppl=5.95, wps=5813.7, ups=0.09, wpb=64811, bsz=128, num_updates=16723, lr=9.98742e-05, gnorm=1.911, loss_scale=2, train_wall=11, gb_free=2.8, wall=192579 2021-06-21 00:08:36 | INFO | train_inner | epoch 006: 1814 / 3002 loss=2.367, ppl=5.16, wps=6078.3, ups=0.09, wpb=64799, bsz=128, num_updates=16724, lr=9.98742e-05, gnorm=1.832, loss_scale=2, train_wall=10, gb_free=2.8, wall=192590 2021-06-21 00:08:47 | INFO | train_inner | epoch 006: 1815 / 3002 loss=2.566, ppl=5.92, wps=5874.2, ups=0.09, wpb=64852, bsz=128, num_updates=16725, lr=9.98742e-05, gnorm=1.926, loss_scale=2, train_wall=11, gb_free=2.8, wall=192601 2021-06-21 00:08:58 | INFO | train_inner | epoch 006: 1816 / 3002 loss=2.698, ppl=6.49, wps=5876.5, ups=0.09, wpb=64791, bsz=128, num_updates=16726, lr=9.98742e-05, gnorm=1.916, loss_scale=2, train_wall=11, gb_free=2.8, wall=192612 2021-06-21 00:09:09 | INFO | train_inner | epoch 006: 1817 / 3002 loss=2.495, ppl=5.64, wps=5751.7, ups=0.09, wpb=64914, bsz=128, num_updates=16727, lr=9.98742e-05, gnorm=1.891, loss_scale=2, train_wall=11, gb_free=2.8, wall=192623 2021-06-21 00:09:20 | INFO | train_inner | epoch 006: 1818 / 3002 loss=2.427, ppl=5.38, wps=5799, ups=0.09, wpb=64805, bsz=128, num_updates=16728, lr=9.98742e-05, gnorm=1.903, loss_scale=2, train_wall=11, gb_free=2.8, wall=192635 2021-06-21 00:09:31 | INFO | train_inner | epoch 006: 1819 / 3002 loss=2.637, ppl=6.22, wps=5894, ups=0.09, wpb=64897, bsz=128, num_updates=16729, lr=9.98742e-05, gnorm=1.993, loss_scale=2, train_wall=11, gb_free=2.8, wall=192646 2021-06-21 00:09:42 | INFO | train_inner | epoch 006: 1820 / 3002 loss=2.366, ppl=5.15, wps=5783.7, ups=0.09, wpb=64891, bsz=128, num_updates=16730, lr=9.98741e-05, gnorm=1.869, loss_scale=2, train_wall=11, gb_free=2.8, wall=192657 2021-06-21 00:09:54 | INFO | train_inner | epoch 006: 1821 / 3002 loss=2.581, ppl=5.98, wps=5832.6, ups=0.09, wpb=64757, bsz=128, num_updates=16731, lr=9.98741e-05, gnorm=2.492, loss_scale=2, train_wall=11, gb_free=2.8, wall=192668 2021-06-21 00:10:04 | INFO | train_inner | epoch 006: 1822 / 3002 loss=2.435, ppl=5.41, wps=5915.8, ups=0.09, wpb=64805, bsz=128, num_updates=16732, lr=9.98741e-05, gnorm=1.989, loss_scale=2, train_wall=11, gb_free=2.8, wall=192679 2021-06-21 00:10:16 | INFO | train_inner | epoch 006: 1823 / 3002 loss=2.223, ppl=4.67, wps=5796.3, ups=0.09, wpb=64831, bsz=128, num_updates=16733, lr=9.98741e-05, gnorm=1.903, loss_scale=2, train_wall=11, gb_free=2.8, wall=192690 2021-06-21 00:10:27 | INFO | train_inner | epoch 006: 1824 / 3002 loss=2.421, ppl=5.36, wps=5963.9, ups=0.09, wpb=64893, bsz=128, num_updates=16734, lr=9.98741e-05, gnorm=2.145, loss_scale=2, train_wall=10, gb_free=2.8, wall=192701 2021-06-21 00:10:38 | INFO | train_inner | epoch 006: 1825 / 3002 loss=2.45, ppl=5.46, wps=5892.5, ups=0.09, wpb=64865, bsz=128, num_updates=16735, lr=9.98741e-05, gnorm=1.968, loss_scale=2, train_wall=11, gb_free=2.8, wall=192712 2021-06-21 00:10:49 | INFO | train_inner | epoch 006: 1826 / 3002 loss=2.489, ppl=5.61, wps=5781.3, ups=0.09, wpb=64875, bsz=128, num_updates=16736, lr=9.98741e-05, gnorm=2.016, loss_scale=2, train_wall=11, gb_free=2.8, wall=192723 2021-06-21 00:11:00 | INFO | train_inner | epoch 006: 1827 / 3002 loss=2.539, ppl=5.81, wps=5947.4, ups=0.09, wpb=64840, bsz=128, num_updates=16737, lr=9.98741e-05, gnorm=2.02, loss_scale=2, train_wall=10, gb_free=2.8, wall=192734 2021-06-21 00:11:11 | INFO | train_inner | epoch 006: 1828 / 3002 loss=2.375, ppl=5.19, wps=5713.2, ups=0.09, wpb=64816, bsz=128, num_updates=16738, lr=9.98741e-05, gnorm=1.843, loss_scale=2, train_wall=11, gb_free=2.8, wall=192745 2021-06-21 00:11:22 | INFO | train_inner | epoch 006: 1829 / 3002 loss=2.469, ppl=5.54, wps=5950.8, ups=0.09, wpb=64758, bsz=128, num_updates=16739, lr=9.98741e-05, gnorm=2.035, loss_scale=2, train_wall=10, gb_free=2.8, wall=192756 2021-06-21 00:11:33 | INFO | train_inner | epoch 006: 1830 / 3002 loss=2.425, ppl=5.37, wps=5962, ups=0.09, wpb=64810, bsz=128, num_updates=16740, lr=9.98741e-05, gnorm=1.894, loss_scale=2, train_wall=10, gb_free=2.8, wall=192767 2021-06-21 00:11:44 | INFO | train_inner | epoch 006: 1831 / 3002 loss=2.528, ppl=5.77, wps=5874.5, ups=0.09, wpb=64827, bsz=128, num_updates=16741, lr=9.98741e-05, gnorm=1.863, loss_scale=2, train_wall=11, gb_free=2.8, wall=192778 2021-06-21 00:11:55 | INFO | train_inner | epoch 006: 1832 / 3002 loss=2.546, ppl=5.84, wps=5850, ups=0.09, wpb=64743, bsz=128, num_updates=16742, lr=9.98741e-05, gnorm=2.61, loss_scale=2, train_wall=11, gb_free=2.8, wall=192789 2021-06-21 00:12:06 | INFO | train_inner | epoch 006: 1833 / 3002 loss=2.497, ppl=5.65, wps=5841, ups=0.09, wpb=64815, bsz=128, num_updates=16743, lr=9.9874e-05, gnorm=1.91, loss_scale=2, train_wall=11, gb_free=2.8, wall=192800 2021-06-21 00:12:17 | INFO | train_inner | epoch 006: 1834 / 3002 loss=2.547, ppl=5.84, wps=5807.9, ups=0.09, wpb=64821, bsz=128, num_updates=16744, lr=9.9874e-05, gnorm=1.867, loss_scale=2, train_wall=11, gb_free=2.8, wall=192811 2021-06-21 00:12:28 | INFO | train_inner | epoch 006: 1835 / 3002 loss=2.471, ppl=5.54, wps=5843.6, ups=0.09, wpb=64779, bsz=128, num_updates=16745, lr=9.9874e-05, gnorm=1.827, loss_scale=2, train_wall=11, gb_free=2.8, wall=192823 2021-06-21 00:12:39 | INFO | train_inner | epoch 006: 1836 / 3002 loss=2.362, ppl=5.14, wps=5821.6, ups=0.09, wpb=64831, bsz=128, num_updates=16746, lr=9.9874e-05, gnorm=1.904, loss_scale=2, train_wall=11, gb_free=2.8, wall=192834 2021-06-21 00:12:50 | INFO | train_inner | epoch 006: 1837 / 3002 loss=2.512, ppl=5.7, wps=5899.2, ups=0.09, wpb=64838, bsz=128, num_updates=16747, lr=9.9874e-05, gnorm=2.095, loss_scale=2, train_wall=11, gb_free=2.8, wall=192845 2021-06-21 00:13:01 | INFO | train_inner | epoch 006: 1838 / 3002 loss=2.563, ppl=5.91, wps=5886.8, ups=0.09, wpb=64774, bsz=128, num_updates=16748, lr=9.9874e-05, gnorm=1.98, loss_scale=2, train_wall=11, gb_free=2.8, wall=192856 2021-06-21 00:13:13 | INFO | train_inner | epoch 006: 1839 / 3002 loss=2.348, ppl=5.09, wps=5773.6, ups=0.09, wpb=64867, bsz=128, num_updates=16749, lr=9.9874e-05, gnorm=1.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=192867 2021-06-21 00:13:24 | INFO | train_inner | epoch 006: 1840 / 3002 loss=2.448, ppl=5.46, wps=5880.1, ups=0.09, wpb=64830, bsz=128, num_updates=16750, lr=9.9874e-05, gnorm=7.481, loss_scale=2, train_wall=11, gb_free=2.8, wall=192878 2021-06-21 00:13:34 | INFO | train_inner | epoch 006: 1841 / 3002 loss=2.437, ppl=5.42, wps=5958.2, ups=0.09, wpb=64790, bsz=128, num_updates=16751, lr=9.9874e-05, gnorm=1.942, loss_scale=2, train_wall=10, gb_free=2.8, wall=192889 2021-06-21 00:13:45 | INFO | train_inner | epoch 006: 1842 / 3002 loss=2.508, ppl=5.69, wps=5927.2, ups=0.09, wpb=64818, bsz=128, num_updates=16752, lr=9.9874e-05, gnorm=2.358, loss_scale=2, train_wall=10, gb_free=2.8, wall=192900 2021-06-21 00:13:57 | INFO | train_inner | epoch 006: 1843 / 3002 loss=2.421, ppl=5.35, wps=5797.5, ups=0.09, wpb=64904, bsz=128, num_updates=16753, lr=9.9874e-05, gnorm=1.88, loss_scale=2, train_wall=11, gb_free=2.8, wall=192911 2021-06-21 00:14:08 | INFO | train_inner | epoch 006: 1844 / 3002 loss=2.497, ppl=5.64, wps=5842.5, ups=0.09, wpb=64760, bsz=128, num_updates=16754, lr=9.9874e-05, gnorm=3.611, loss_scale=2, train_wall=11, gb_free=2.8, wall=192922 2021-06-21 00:14:19 | INFO | train_inner | epoch 006: 1845 / 3002 loss=2.361, ppl=5.14, wps=5775.4, ups=0.09, wpb=64826, bsz=128, num_updates=16755, lr=9.98739e-05, gnorm=2.044, loss_scale=2, train_wall=11, gb_free=2.8, wall=192933 2021-06-21 00:14:30 | INFO | train_inner | epoch 006: 1846 / 3002 loss=2.535, ppl=5.8, wps=5796.1, ups=0.09, wpb=64714, bsz=128, num_updates=16756, lr=9.98739e-05, gnorm=2.033, loss_scale=2, train_wall=11, gb_free=2.8, wall=192944 2021-06-21 00:14:41 | INFO | train_inner | epoch 006: 1847 / 3002 loss=2.461, ppl=5.5, wps=5759.3, ups=0.09, wpb=64761, bsz=128, num_updates=16757, lr=9.98739e-05, gnorm=2.305, loss_scale=2, train_wall=11, gb_free=2.8, wall=192956 2021-06-21 00:14:53 | INFO | train_inner | epoch 006: 1848 / 3002 loss=2.493, ppl=5.63, wps=5791.3, ups=0.09, wpb=64857, bsz=128, num_updates=16758, lr=9.98739e-05, gnorm=1.963, loss_scale=2, train_wall=11, gb_free=2.8, wall=192967 2021-06-21 00:15:04 | INFO | train_inner | epoch 006: 1849 / 3002 loss=2.454, ppl=5.48, wps=5866.9, ups=0.09, wpb=64832, bsz=128, num_updates=16759, lr=9.98739e-05, gnorm=1.889, loss_scale=2, train_wall=11, gb_free=2.8, wall=192978 2021-06-21 00:15:15 | INFO | train_inner | epoch 006: 1850 / 3002 loss=2.459, ppl=5.5, wps=5891, ups=0.09, wpb=64805, bsz=128, num_updates=16760, lr=9.98739e-05, gnorm=2.241, loss_scale=2, train_wall=11, gb_free=2.8, wall=192989 2021-06-21 00:15:26 | INFO | train_inner | epoch 006: 1851 / 3002 loss=2.428, ppl=5.38, wps=5885.6, ups=0.09, wpb=64861, bsz=128, num_updates=16761, lr=9.98739e-05, gnorm=1.867, loss_scale=2, train_wall=11, gb_free=2.8, wall=193000 2021-06-21 00:15:37 | INFO | train_inner | epoch 006: 1852 / 3002 loss=2.425, ppl=5.37, wps=5902.7, ups=0.09, wpb=64761, bsz=128, num_updates=16762, lr=9.98739e-05, gnorm=1.902, loss_scale=2, train_wall=11, gb_free=2.8, wall=193011 2021-06-21 00:15:48 | INFO | train_inner | epoch 006: 1853 / 3002 loss=2.388, ppl=5.23, wps=5883.8, ups=0.09, wpb=64857, bsz=128, num_updates=16763, lr=9.98739e-05, gnorm=1.869, loss_scale=2, train_wall=11, gb_free=2.8, wall=193022 2021-06-21 00:15:59 | INFO | train_inner | epoch 006: 1854 / 3002 loss=2.549, ppl=5.85, wps=5790.6, ups=0.09, wpb=64802, bsz=128, num_updates=16764, lr=9.98739e-05, gnorm=1.911, loss_scale=2, train_wall=11, gb_free=2.8, wall=193033 2021-06-21 00:16:10 | INFO | train_inner | epoch 006: 1855 / 3002 loss=2.495, ppl=5.64, wps=5783.6, ups=0.09, wpb=64857, bsz=128, num_updates=16765, lr=9.98739e-05, gnorm=1.962, loss_scale=2, train_wall=11, gb_free=2.8, wall=193044 2021-06-21 00:16:21 | INFO | train_inner | epoch 006: 1856 / 3002 loss=2.403, ppl=5.29, wps=5880.9, ups=0.09, wpb=64921, bsz=128, num_updates=16766, lr=9.98739e-05, gnorm=1.99, loss_scale=2, train_wall=11, gb_free=2.8, wall=193055 2021-06-21 00:16:32 | INFO | train_inner | epoch 006: 1857 / 3002 loss=2.49, ppl=5.62, wps=5862.2, ups=0.09, wpb=64916, bsz=128, num_updates=16767, lr=9.98739e-05, gnorm=2.027, loss_scale=2, train_wall=11, gb_free=2.8, wall=193066 2021-06-21 00:16:43 | INFO | train_inner | epoch 006: 1858 / 3002 loss=2.412, ppl=5.32, wps=5905.6, ups=0.09, wpb=64894, bsz=128, num_updates=16768, lr=9.98738e-05, gnorm=2.419, loss_scale=2, train_wall=11, gb_free=2.8, wall=193077 2021-06-21 00:16:54 | INFO | train_inner | epoch 006: 1859 / 3002 loss=2.478, ppl=5.57, wps=5814.8, ups=0.09, wpb=64837, bsz=128, num_updates=16769, lr=9.98738e-05, gnorm=1.88, loss_scale=2, train_wall=11, gb_free=2.8, wall=193089 2021-06-21 00:17:06 | INFO | train_inner | epoch 006: 1860 / 3002 loss=2.437, ppl=5.41, wps=5762.8, ups=0.09, wpb=64826, bsz=128, num_updates=16770, lr=9.98738e-05, gnorm=1.857, loss_scale=2, train_wall=11, gb_free=2.8, wall=193100 2021-06-21 00:17:17 | INFO | train_inner | epoch 006: 1861 / 3002 loss=2.476, ppl=5.56, wps=5795.5, ups=0.09, wpb=64772, bsz=128, num_updates=16771, lr=9.98738e-05, gnorm=1.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=193111 2021-06-21 00:17:28 | INFO | train_inner | epoch 006: 1862 / 3002 loss=2.235, ppl=4.71, wps=5912.3, ups=0.09, wpb=64844, bsz=128, num_updates=16772, lr=9.98738e-05, gnorm=1.866, loss_scale=2, train_wall=11, gb_free=2.8, wall=193122 2021-06-21 00:17:39 | INFO | train_inner | epoch 006: 1863 / 3002 loss=2.495, ppl=5.64, wps=5792.2, ups=0.09, wpb=64699, bsz=128, num_updates=16773, lr=9.98738e-05, gnorm=1.943, loss_scale=2, train_wall=11, gb_free=2.8, wall=193133 2021-06-21 00:17:50 | INFO | train_inner | epoch 006: 1864 / 3002 loss=2.541, ppl=5.82, wps=5855.4, ups=0.09, wpb=64826, bsz=128, num_updates=16774, lr=9.98738e-05, gnorm=1.906, loss_scale=2, train_wall=11, gb_free=2.8, wall=193144 2021-06-21 00:18:01 | INFO | train_inner | epoch 006: 1865 / 3002 loss=2.541, ppl=5.82, wps=5774.5, ups=0.09, wpb=64863, bsz=128, num_updates=16775, lr=9.98738e-05, gnorm=1.982, loss_scale=2, train_wall=11, gb_free=2.8, wall=193155 2021-06-21 00:18:12 | INFO | train_inner | epoch 006: 1866 / 3002 loss=2.421, ppl=5.35, wps=5828.7, ups=0.09, wpb=64877, bsz=128, num_updates=16776, lr=9.98738e-05, gnorm=2.02, loss_scale=2, train_wall=11, gb_free=2.8, wall=193167 2021-06-21 00:18:23 | INFO | train_inner | epoch 006: 1867 / 3002 loss=2.467, ppl=5.53, wps=5818.8, ups=0.09, wpb=64813, bsz=128, num_updates=16777, lr=9.98738e-05, gnorm=3.704, loss_scale=2, train_wall=11, gb_free=2.8, wall=193178 2021-06-21 00:18:34 | INFO | train_inner | epoch 006: 1868 / 3002 loss=2.388, ppl=5.23, wps=5892.2, ups=0.09, wpb=64903, bsz=128, num_updates=16778, lr=9.98738e-05, gnorm=1.92, loss_scale=2, train_wall=11, gb_free=2.8, wall=193189 2021-06-21 00:18:45 | INFO | train_inner | epoch 006: 1869 / 3002 loss=2.404, ppl=5.29, wps=5900.1, ups=0.09, wpb=64855, bsz=128, num_updates=16779, lr=9.98738e-05, gnorm=2.006, loss_scale=2, train_wall=11, gb_free=2.8, wall=193200 2021-06-21 00:18:57 | INFO | train_inner | epoch 006: 1870 / 3002 loss=2.469, ppl=5.54, wps=5793, ups=0.09, wpb=64808, bsz=128, num_updates=16780, lr=9.98737e-05, gnorm=1.894, loss_scale=2, train_wall=11, gb_free=2.8, wall=193211 2021-06-21 00:19:08 | INFO | train_inner | epoch 006: 1871 / 3002 loss=2.515, ppl=5.72, wps=5826.9, ups=0.09, wpb=64801, bsz=128, num_updates=16781, lr=9.98737e-05, gnorm=4.936, loss_scale=2, train_wall=11, gb_free=2.8, wall=193222 2021-06-21 00:19:19 | INFO | train_inner | epoch 006: 1872 / 3002 loss=2.481, ppl=5.58, wps=5874.2, ups=0.09, wpb=64848, bsz=128, num_updates=16782, lr=9.98737e-05, gnorm=1.922, loss_scale=2, train_wall=11, gb_free=2.8, wall=193233 2021-06-21 00:19:30 | INFO | train_inner | epoch 006: 1873 / 3002 loss=2.506, ppl=5.68, wps=5852.6, ups=0.09, wpb=64832, bsz=128, num_updates=16783, lr=9.98737e-05, gnorm=2.613, loss_scale=2, train_wall=11, gb_free=2.8, wall=193244 2021-06-21 00:19:41 | INFO | train_inner | epoch 006: 1874 / 3002 loss=2.379, ppl=5.2, wps=5960.1, ups=0.09, wpb=64871, bsz=128, num_updates=16784, lr=9.98737e-05, gnorm=2.122, loss_scale=2, train_wall=10, gb_free=2.8, wall=193255 2021-06-21 00:19:52 | INFO | train_inner | epoch 006: 1875 / 3002 loss=2.483, ppl=5.59, wps=6010.8, ups=0.09, wpb=64789, bsz=128, num_updates=16785, lr=9.98737e-05, gnorm=1.904, loss_scale=2, train_wall=10, gb_free=2.8, wall=193266 2021-06-21 00:20:02 | INFO | train_inner | epoch 006: 1876 / 3002 loss=2.538, ppl=5.81, wps=5953.8, ups=0.09, wpb=64766, bsz=128, num_updates=16786, lr=9.98737e-05, gnorm=3.252, loss_scale=2, train_wall=10, gb_free=2.8, wall=193277 2021-06-21 00:20:13 | INFO | train_inner | epoch 006: 1877 / 3002 loss=2.446, ppl=5.45, wps=5875.8, ups=0.09, wpb=64832, bsz=128, num_updates=16787, lr=9.98737e-05, gnorm=2.02, loss_scale=2, train_wall=11, gb_free=2.8, wall=193288 2021-06-21 00:20:25 | INFO | train_inner | epoch 006: 1878 / 3002 loss=2.284, ppl=4.87, wps=5810.8, ups=0.09, wpb=64796, bsz=128, num_updates=16788, lr=9.98737e-05, gnorm=1.872, loss_scale=2, train_wall=11, gb_free=2.8, wall=193299 2021-06-21 00:20:36 | INFO | train_inner | epoch 006: 1879 / 3002 loss=2.55, ppl=5.86, wps=5846.1, ups=0.09, wpb=64803, bsz=128, num_updates=16789, lr=9.98737e-05, gnorm=2.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=193310 2021-06-21 00:20:47 | INFO | train_inner | epoch 006: 1880 / 3002 loss=2.451, ppl=5.47, wps=5832.2, ups=0.09, wpb=64782, bsz=128, num_updates=16790, lr=9.98737e-05, gnorm=1.918, loss_scale=2, train_wall=11, gb_free=2.8, wall=193321 2021-06-21 00:20:58 | INFO | train_inner | epoch 006: 1881 / 3002 loss=2.415, ppl=5.33, wps=5861.5, ups=0.09, wpb=64826, bsz=128, num_updates=16791, lr=9.98737e-05, gnorm=1.968, loss_scale=2, train_wall=11, gb_free=2.8, wall=193332 2021-06-21 00:21:09 | INFO | train_inner | epoch 006: 1882 / 3002 loss=2.552, ppl=5.87, wps=5799.9, ups=0.09, wpb=64729, bsz=128, num_updates=16792, lr=9.98737e-05, gnorm=2.019, loss_scale=2, train_wall=11, gb_free=2.8, wall=193343 2021-06-21 00:21:20 | INFO | train_inner | epoch 006: 1883 / 3002 loss=2.417, ppl=5.34, wps=5852.4, ups=0.09, wpb=64887, bsz=128, num_updates=16793, lr=9.98736e-05, gnorm=1.882, loss_scale=2, train_wall=11, gb_free=2.8, wall=193354 2021-06-21 00:21:31 | INFO | train_inner | epoch 006: 1884 / 3002 loss=2.431, ppl=5.39, wps=5984.1, ups=0.09, wpb=64861, bsz=128, num_updates=16794, lr=9.98736e-05, gnorm=1.944, loss_scale=2, train_wall=10, gb_free=2.8, wall=193365 2021-06-21 00:21:42 | INFO | train_inner | epoch 006: 1885 / 3002 loss=2.506, ppl=5.68, wps=5875.8, ups=0.09, wpb=64940, bsz=128, num_updates=16795, lr=9.98736e-05, gnorm=1.904, loss_scale=2, train_wall=11, gb_free=2.8, wall=193376 2021-06-21 00:21:53 | INFO | train_inner | epoch 006: 1886 / 3002 loss=2.595, ppl=6.04, wps=5818.3, ups=0.09, wpb=64844, bsz=128, num_updates=16796, lr=9.98736e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=193387 2021-06-21 00:22:04 | INFO | train_inner | epoch 006: 1887 / 3002 loss=2.435, ppl=5.41, wps=5841.1, ups=0.09, wpb=64786, bsz=128, num_updates=16797, lr=9.98736e-05, gnorm=1.948, loss_scale=2, train_wall=11, gb_free=2.8, wall=193399 2021-06-21 00:22:15 | INFO | train_inner | epoch 006: 1888 / 3002 loss=2.525, ppl=5.75, wps=5787.3, ups=0.09, wpb=64827, bsz=128, num_updates=16798, lr=9.98736e-05, gnorm=1.891, loss_scale=2, train_wall=11, gb_free=2.8, wall=193410 2021-06-21 00:22:26 | INFO | train_inner | epoch 006: 1889 / 3002 loss=2.64, ppl=6.23, wps=5885.9, ups=0.09, wpb=64832, bsz=128, num_updates=16799, lr=9.98736e-05, gnorm=1.95, loss_scale=2, train_wall=11, gb_free=2.8, wall=193421 2021-06-21 00:22:37 | INFO | train_inner | epoch 006: 1890 / 3002 loss=2.434, ppl=5.4, wps=5909.8, ups=0.09, wpb=64867, bsz=128, num_updates=16800, lr=9.98736e-05, gnorm=1.894, loss_scale=2, train_wall=11, gb_free=2.8, wall=193432 2021-06-21 00:22:49 | INFO | train_inner | epoch 006: 1891 / 3002 loss=2.43, ppl=5.39, wps=5818.3, ups=0.09, wpb=64911, bsz=128, num_updates=16801, lr=9.98736e-05, gnorm=1.914, loss_scale=2, train_wall=11, gb_free=2.8, wall=193443 2021-06-21 00:22:59 | INFO | train_inner | epoch 006: 1892 / 3002 loss=2.339, ppl=5.06, wps=5935.8, ups=0.09, wpb=64834, bsz=128, num_updates=16802, lr=9.98736e-05, gnorm=1.857, loss_scale=2, train_wall=10, gb_free=2.8, wall=193454 2021-06-21 00:23:11 | INFO | train_inner | epoch 006: 1893 / 3002 loss=2.518, ppl=5.73, wps=5855.3, ups=0.09, wpb=64863, bsz=128, num_updates=16803, lr=9.98736e-05, gnorm=2.043, loss_scale=4, train_wall=11, gb_free=2.8, wall=193465 2021-06-21 00:23:22 | INFO | train_inner | epoch 006: 1894 / 3002 loss=2.478, ppl=5.57, wps=5884.1, ups=0.09, wpb=64863, bsz=128, num_updates=16804, lr=9.98736e-05, gnorm=3.896, loss_scale=4, train_wall=11, gb_free=2.8, wall=193476 2021-06-21 00:23:33 | INFO | train_inner | epoch 006: 1895 / 3002 loss=2.46, ppl=5.5, wps=5880.2, ups=0.09, wpb=64882, bsz=128, num_updates=16805, lr=9.98735e-05, gnorm=1.884, loss_scale=4, train_wall=11, gb_free=2.8, wall=193487 2021-06-21 00:23:43 | INFO | train_inner | epoch 006: 1896 / 3002 loss=2.296, ppl=4.91, wps=5959.9, ups=0.09, wpb=64779, bsz=128, num_updates=16806, lr=9.98735e-05, gnorm=1.888, loss_scale=4, train_wall=10, gb_free=2.8, wall=193498 2021-06-21 00:23:55 | INFO | train_inner | epoch 006: 1897 / 3002 loss=2.441, ppl=5.43, wps=5869.1, ups=0.09, wpb=64828, bsz=128, num_updates=16807, lr=9.98735e-05, gnorm=1.89, loss_scale=4, train_wall=11, gb_free=2.8, wall=193509 2021-06-21 00:24:05 | INFO | train_inner | epoch 006: 1898 / 3002 loss=2.536, ppl=5.8, wps=5937.2, ups=0.09, wpb=64828, bsz=128, num_updates=16808, lr=9.98735e-05, gnorm=2.003, loss_scale=4, train_wall=10, gb_free=2.8, wall=193520 2021-06-21 00:24:16 | INFO | train_inner | epoch 006: 1899 / 3002 loss=2.464, ppl=5.52, wps=5964.3, ups=0.09, wpb=64916, bsz=128, num_updates=16809, lr=9.98735e-05, gnorm=1.932, loss_scale=4, train_wall=10, gb_free=2.8, wall=193531 2021-06-21 00:24:27 | INFO | train_inner | epoch 006: 1900 / 3002 loss=2.53, ppl=5.78, wps=5895.1, ups=0.09, wpb=64855, bsz=128, num_updates=16810, lr=9.98735e-05, gnorm=1.879, loss_scale=4, train_wall=11, gb_free=2.8, wall=193542 2021-06-21 00:24:38 | INFO | train_inner | epoch 006: 1901 / 3002 loss=2.427, ppl=5.38, wps=5880.3, ups=0.09, wpb=64870, bsz=128, num_updates=16811, lr=9.98735e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=193553 2021-06-21 00:24:50 | INFO | train_inner | epoch 006: 1902 / 3002 loss=2.403, ppl=5.29, wps=5733.9, ups=0.09, wpb=64794, bsz=128, num_updates=16812, lr=9.98735e-05, gnorm=2.035, loss_scale=4, train_wall=11, gb_free=2.8, wall=193564 2021-06-21 00:25:01 | INFO | train_inner | epoch 006: 1903 / 3002 loss=2.588, ppl=6.01, wps=5840.5, ups=0.09, wpb=64763, bsz=128, num_updates=16813, lr=9.98735e-05, gnorm=2, loss_scale=4, train_wall=11, gb_free=2.8, wall=193575 2021-06-21 00:25:12 | INFO | train_inner | epoch 006: 1904 / 3002 loss=2.337, ppl=5.05, wps=5904.5, ups=0.09, wpb=64837, bsz=128, num_updates=16814, lr=9.98735e-05, gnorm=1.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=193586 2021-06-21 00:25:23 | INFO | train_inner | epoch 006: 1905 / 3002 loss=2.556, ppl=5.88, wps=5869.8, ups=0.09, wpb=64788, bsz=128, num_updates=16815, lr=9.98735e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=193597 2021-06-21 00:25:34 | INFO | train_inner | epoch 006: 1906 / 3002 loss=2.426, ppl=5.37, wps=5825, ups=0.09, wpb=64850, bsz=128, num_updates=16816, lr=9.98735e-05, gnorm=2, loss_scale=4, train_wall=11, gb_free=2.8, wall=193608 2021-06-21 00:25:45 | INFO | train_inner | epoch 006: 1907 / 3002 loss=2.5, ppl=5.66, wps=5792.2, ups=0.09, wpb=64744, bsz=128, num_updates=16817, lr=9.98735e-05, gnorm=1.907, loss_scale=4, train_wall=11, gb_free=2.8, wall=193619 2021-06-21 00:25:56 | INFO | train_inner | epoch 006: 1908 / 3002 loss=2.485, ppl=5.6, wps=5911.1, ups=0.09, wpb=64837, bsz=128, num_updates=16818, lr=9.98734e-05, gnorm=1.94, loss_scale=4, train_wall=10, gb_free=2.8, wall=193630 2021-06-21 00:26:07 | INFO | train_inner | epoch 006: 1909 / 3002 loss=2.482, ppl=5.59, wps=5931.9, ups=0.09, wpb=64894, bsz=128, num_updates=16819, lr=9.98734e-05, gnorm=1.942, loss_scale=4, train_wall=10, gb_free=2.8, wall=193641 2021-06-21 00:26:18 | INFO | train_inner | epoch 006: 1910 / 3002 loss=2.414, ppl=5.33, wps=6039.3, ups=0.09, wpb=64814, bsz=128, num_updates=16820, lr=9.98734e-05, gnorm=1.965, loss_scale=4, train_wall=10, gb_free=2.8, wall=193652 2021-06-21 00:26:29 | INFO | train_inner | epoch 006: 1911 / 3002 loss=2.45, ppl=5.46, wps=5939.1, ups=0.09, wpb=64808, bsz=128, num_updates=16821, lr=9.98734e-05, gnorm=2.103, loss_scale=4, train_wall=10, gb_free=2.8, wall=193663 2021-06-21 00:26:40 | INFO | train_inner | epoch 006: 1912 / 3002 loss=2.38, ppl=5.2, wps=5816.3, ups=0.09, wpb=64806, bsz=128, num_updates=16822, lr=9.98734e-05, gnorm=2.572, loss_scale=4, train_wall=11, gb_free=2.8, wall=193674 2021-06-21 00:26:51 | INFO | train_inner | epoch 006: 1913 / 3002 loss=2.438, ppl=5.42, wps=5914.9, ups=0.09, wpb=64809, bsz=128, num_updates=16823, lr=9.98734e-05, gnorm=1.86, loss_scale=4, train_wall=10, gb_free=2.8, wall=193685 2021-06-21 00:27:02 | INFO | train_inner | epoch 006: 1914 / 3002 loss=2.525, ppl=5.76, wps=5839.1, ups=0.09, wpb=64858, bsz=128, num_updates=16824, lr=9.98734e-05, gnorm=2.132, loss_scale=4, train_wall=11, gb_free=2.8, wall=193696 2021-06-21 00:27:13 | INFO | train_inner | epoch 006: 1915 / 3002 loss=2.39, ppl=5.24, wps=5970.5, ups=0.09, wpb=64883, bsz=128, num_updates=16825, lr=9.98734e-05, gnorm=1.972, loss_scale=4, train_wall=10, gb_free=2.8, wall=193707 2021-06-21 00:27:24 | INFO | train_inner | epoch 006: 1916 / 3002 loss=2.49, ppl=5.62, wps=5944.8, ups=0.09, wpb=64877, bsz=128, num_updates=16826, lr=9.98734e-05, gnorm=2.074, loss_scale=4, train_wall=10, gb_free=2.8, wall=193718 2021-06-21 00:27:35 | INFO | train_inner | epoch 006: 1917 / 3002 loss=2.406, ppl=5.3, wps=5894.2, ups=0.09, wpb=64769, bsz=128, num_updates=16827, lr=9.98734e-05, gnorm=1.95, loss_scale=4, train_wall=11, gb_free=2.8, wall=193729 2021-06-21 00:27:46 | INFO | train_inner | epoch 006: 1918 / 3002 loss=2.561, ppl=5.9, wps=5918.1, ups=0.09, wpb=64819, bsz=128, num_updates=16828, lr=9.98734e-05, gnorm=1.908, loss_scale=4, train_wall=10, gb_free=2.8, wall=193740 2021-06-21 00:27:57 | INFO | train_inner | epoch 006: 1919 / 3002 loss=2.415, ppl=5.33, wps=5822.3, ups=0.09, wpb=64815, bsz=128, num_updates=16829, lr=9.98734e-05, gnorm=1.959, loss_scale=4, train_wall=11, gb_free=2.8, wall=193751 2021-06-21 00:28:08 | INFO | train_inner | epoch 006: 1920 / 3002 loss=2.422, ppl=5.36, wps=5808.1, ups=0.09, wpb=64832, bsz=128, num_updates=16830, lr=9.98733e-05, gnorm=1.95, loss_scale=4, train_wall=11, gb_free=2.8, wall=193762 2021-06-21 00:28:19 | INFO | train_inner | epoch 006: 1921 / 3002 loss=2.416, ppl=5.34, wps=5857.5, ups=0.09, wpb=64895, bsz=128, num_updates=16831, lr=9.98733e-05, gnorm=3.794, loss_scale=4, train_wall=11, gb_free=2.8, wall=193773 2021-06-21 00:28:30 | INFO | train_inner | epoch 006: 1922 / 3002 loss=2.448, ppl=5.46, wps=5845.5, ups=0.09, wpb=64829, bsz=128, num_updates=16832, lr=9.98733e-05, gnorm=1.893, loss_scale=4, train_wall=11, gb_free=2.8, wall=193784 2021-06-21 00:28:41 | INFO | train_inner | epoch 006: 1923 / 3002 loss=2.435, ppl=5.41, wps=5967.4, ups=0.09, wpb=64861, bsz=128, num_updates=16833, lr=9.98733e-05, gnorm=1.997, loss_scale=4, train_wall=10, gb_free=2.8, wall=193795 2021-06-21 00:28:52 | INFO | train_inner | epoch 006: 1924 / 3002 loss=2.473, ppl=5.55, wps=5820.7, ups=0.09, wpb=64843, bsz=128, num_updates=16834, lr=9.98733e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=193806 2021-06-21 00:29:03 | INFO | train_inner | epoch 006: 1925 / 3002 loss=2.508, ppl=5.69, wps=5885, ups=0.09, wpb=64771, bsz=128, num_updates=16835, lr=9.98733e-05, gnorm=1.99, loss_scale=4, train_wall=11, gb_free=2.8, wall=193817 2021-06-21 00:29:14 | INFO | train_inner | epoch 006: 1926 / 3002 loss=2.471, ppl=5.55, wps=5884.6, ups=0.09, wpb=64835, bsz=128, num_updates=16836, lr=9.98733e-05, gnorm=1.837, loss_scale=4, train_wall=11, gb_free=2.8, wall=193828 2021-06-21 00:29:25 | INFO | train_inner | epoch 006: 1927 / 3002 loss=2.614, ppl=6.12, wps=5803.7, ups=0.09, wpb=64804, bsz=128, num_updates=16837, lr=9.98733e-05, gnorm=1.966, loss_scale=4, train_wall=11, gb_free=2.8, wall=193840 2021-06-21 00:29:36 | INFO | train_inner | epoch 006: 1928 / 3002 loss=2.477, ppl=5.57, wps=5908.4, ups=0.09, wpb=64927, bsz=128, num_updates=16838, lr=9.98733e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=193851 2021-06-21 00:29:47 | INFO | train_inner | epoch 006: 1929 / 3002 loss=2.529, ppl=5.77, wps=5780.8, ups=0.09, wpb=64848, bsz=128, num_updates=16839, lr=9.98733e-05, gnorm=2.053, loss_scale=4, train_wall=11, gb_free=2.8, wall=193862 2021-06-21 00:29:58 | INFO | train_inner | epoch 006: 1930 / 3002 loss=2.392, ppl=5.25, wps=5905.5, ups=0.09, wpb=64729, bsz=128, num_updates=16840, lr=9.98733e-05, gnorm=1.89, loss_scale=4, train_wall=10, gb_free=2.8, wall=193873 2021-06-21 00:30:10 | INFO | train_inner | epoch 006: 1931 / 3002 loss=2.348, ppl=5.09, wps=5786.1, ups=0.09, wpb=64848, bsz=128, num_updates=16841, lr=9.98733e-05, gnorm=2.747, loss_scale=4, train_wall=11, gb_free=2.8, wall=193884 2021-06-21 00:30:21 | INFO | train_inner | epoch 006: 1932 / 3002 loss=2.351, ppl=5.1, wps=5718.4, ups=0.09, wpb=64813, bsz=128, num_updates=16842, lr=9.98733e-05, gnorm=1.948, loss_scale=4, train_wall=11, gb_free=2.8, wall=193895 2021-06-21 00:30:32 | INFO | train_inner | epoch 006: 1933 / 3002 loss=2.564, ppl=5.91, wps=5836, ups=0.09, wpb=64899, bsz=128, num_updates=16843, lr=9.98732e-05, gnorm=1.906, loss_scale=4, train_wall=11, gb_free=2.8, wall=193906 2021-06-21 00:30:43 | INFO | train_inner | epoch 006: 1934 / 3002 loss=2.424, ppl=5.37, wps=5903.5, ups=0.09, wpb=64861, bsz=128, num_updates=16844, lr=9.98732e-05, gnorm=5.873, loss_scale=4, train_wall=11, gb_free=2.8, wall=193917 2021-06-21 00:30:54 | INFO | train_inner | epoch 006: 1935 / 3002 loss=2.35, ppl=5.1, wps=5871.1, ups=0.09, wpb=64867, bsz=128, num_updates=16845, lr=9.98732e-05, gnorm=1.92, loss_scale=4, train_wall=11, gb_free=2.8, wall=193928 2021-06-21 00:31:05 | INFO | train_inner | epoch 006: 1936 / 3002 loss=2.527, ppl=5.76, wps=5860.1, ups=0.09, wpb=64875, bsz=128, num_updates=16846, lr=9.98732e-05, gnorm=1.947, loss_scale=4, train_wall=11, gb_free=2.8, wall=193940 2021-06-21 00:31:16 | INFO | train_inner | epoch 006: 1937 / 3002 loss=2.344, ppl=5.08, wps=5881, ups=0.09, wpb=64891, bsz=128, num_updates=16847, lr=9.98732e-05, gnorm=1.876, loss_scale=4, train_wall=11, gb_free=2.8, wall=193951 2021-06-21 00:31:27 | INFO | train_inner | epoch 006: 1938 / 3002 loss=2.412, ppl=5.32, wps=5857.3, ups=0.09, wpb=64790, bsz=128, num_updates=16848, lr=9.98732e-05, gnorm=1.902, loss_scale=4, train_wall=11, gb_free=2.8, wall=193962 2021-06-21 00:31:38 | INFO | train_inner | epoch 006: 1939 / 3002 loss=2.445, ppl=5.45, wps=5883.2, ups=0.09, wpb=64790, bsz=128, num_updates=16849, lr=9.98732e-05, gnorm=1.888, loss_scale=4, train_wall=11, gb_free=2.8, wall=193973 2021-06-21 00:31:49 | INFO | train_inner | epoch 006: 1940 / 3002 loss=2.4, ppl=5.28, wps=5972.5, ups=0.09, wpb=64859, bsz=128, num_updates=16850, lr=9.98732e-05, gnorm=1.896, loss_scale=4, train_wall=10, gb_free=2.8, wall=193983 2021-06-21 00:32:00 | INFO | train_inner | epoch 006: 1941 / 3002 loss=2.403, ppl=5.29, wps=5948.8, ups=0.09, wpb=64797, bsz=128, num_updates=16851, lr=9.98732e-05, gnorm=1.98, loss_scale=4, train_wall=10, gb_free=2.8, wall=193994 2021-06-21 00:32:11 | INFO | train_inner | epoch 006: 1942 / 3002 loss=2.509, ppl=5.69, wps=5832.1, ups=0.09, wpb=64857, bsz=128, num_updates=16852, lr=9.98732e-05, gnorm=2.582, loss_scale=4, train_wall=11, gb_free=2.8, wall=194005 2021-06-21 00:32:22 | INFO | train_inner | epoch 006: 1943 / 3002 loss=2.42, ppl=5.35, wps=5811, ups=0.09, wpb=64810, bsz=128, num_updates=16853, lr=9.98732e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=194017 2021-06-21 00:32:33 | INFO | train_inner | epoch 006: 1944 / 3002 loss=2.54, ppl=5.82, wps=5845.8, ups=0.09, wpb=64894, bsz=128, num_updates=16854, lr=9.98732e-05, gnorm=1.939, loss_scale=4, train_wall=11, gb_free=2.8, wall=194028 2021-06-21 00:32:44 | INFO | train_inner | epoch 006: 1945 / 3002 loss=2.397, ppl=5.27, wps=5891.7, ups=0.09, wpb=64838, bsz=128, num_updates=16855, lr=9.98731e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=194039 2021-06-21 00:32:55 | INFO | train_inner | epoch 006: 1946 / 3002 loss=2.466, ppl=5.52, wps=5881.8, ups=0.09, wpb=64845, bsz=128, num_updates=16856, lr=9.98731e-05, gnorm=2.33, loss_scale=4, train_wall=11, gb_free=2.8, wall=194050 2021-06-21 00:33:07 | INFO | train_inner | epoch 006: 1947 / 3002 loss=2.366, ppl=5.15, wps=5864.7, ups=0.09, wpb=64898, bsz=128, num_updates=16857, lr=9.98731e-05, gnorm=1.905, loss_scale=4, train_wall=11, gb_free=2.8, wall=194061 2021-06-21 00:33:18 | INFO | train_inner | epoch 006: 1948 / 3002 loss=2.517, ppl=5.72, wps=5828.8, ups=0.09, wpb=64817, bsz=128, num_updates=16858, lr=9.98731e-05, gnorm=2.111, loss_scale=4, train_wall=11, gb_free=2.8, wall=194072 2021-06-21 00:33:29 | INFO | train_inner | epoch 006: 1949 / 3002 loss=2.418, ppl=5.35, wps=5821.5, ups=0.09, wpb=64863, bsz=128, num_updates=16859, lr=9.98731e-05, gnorm=4.359, loss_scale=4, train_wall=11, gb_free=2.8, wall=194083 2021-06-21 00:33:40 | INFO | train_inner | epoch 006: 1950 / 3002 loss=2.319, ppl=4.99, wps=5805.9, ups=0.09, wpb=64847, bsz=128, num_updates=16860, lr=9.98731e-05, gnorm=1.899, loss_scale=4, train_wall=11, gb_free=2.8, wall=194094 2021-06-21 00:33:51 | INFO | train_inner | epoch 006: 1951 / 3002 loss=2.504, ppl=5.67, wps=5730.7, ups=0.09, wpb=64825, bsz=128, num_updates=16861, lr=9.98731e-05, gnorm=1.864, loss_scale=4, train_wall=11, gb_free=2.8, wall=194106 2021-06-21 00:34:02 | INFO | train_inner | epoch 006: 1952 / 3002 loss=2.619, ppl=6.14, wps=5768.7, ups=0.09, wpb=64808, bsz=128, num_updates=16862, lr=9.98731e-05, gnorm=2.409, loss_scale=4, train_wall=11, gb_free=2.8, wall=194117 2021-06-21 00:34:14 | INFO | train_inner | epoch 006: 1953 / 3002 loss=2.455, ppl=5.48, wps=5773.7, ups=0.09, wpb=64758, bsz=128, num_updates=16863, lr=9.98731e-05, gnorm=2.154, loss_scale=4, train_wall=11, gb_free=2.8, wall=194128 2021-06-21 00:34:25 | INFO | train_inner | epoch 006: 1954 / 3002 loss=2.413, ppl=5.33, wps=5849.7, ups=0.09, wpb=64841, bsz=128, num_updates=16864, lr=9.98731e-05, gnorm=1.984, loss_scale=4, train_wall=11, gb_free=2.8, wall=194139 2021-06-21 00:34:36 | INFO | train_inner | epoch 006: 1955 / 3002 loss=2.365, ppl=5.15, wps=5827.2, ups=0.09, wpb=64729, bsz=128, num_updates=16865, lr=9.98731e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=194150 2021-06-21 00:34:47 | INFO | train_inner | epoch 006: 1956 / 3002 loss=2.474, ppl=5.56, wps=5862, ups=0.09, wpb=64764, bsz=128, num_updates=16866, lr=9.98731e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=194161 2021-06-21 00:34:58 | INFO | train_inner | epoch 006: 1957 / 3002 loss=2.539, ppl=5.81, wps=5869, ups=0.09, wpb=64767, bsz=128, num_updates=16867, lr=9.98731e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=194172 2021-06-21 00:35:09 | INFO | train_inner | epoch 006: 1958 / 3002 loss=2.457, ppl=5.49, wps=5891.2, ups=0.09, wpb=64901, bsz=128, num_updates=16868, lr=9.9873e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=194183 2021-06-21 00:35:20 | INFO | train_inner | epoch 006: 1959 / 3002 loss=2.47, ppl=5.54, wps=5867.4, ups=0.09, wpb=64836, bsz=128, num_updates=16869, lr=9.9873e-05, gnorm=2.047, loss_scale=4, train_wall=11, gb_free=2.8, wall=194194 2021-06-21 00:35:31 | INFO | train_inner | epoch 006: 1960 / 3002 loss=2.401, ppl=5.28, wps=5914.7, ups=0.09, wpb=64867, bsz=128, num_updates=16870, lr=9.9873e-05, gnorm=2.069, loss_scale=4, train_wall=11, gb_free=2.8, wall=194205 2021-06-21 00:35:42 | INFO | train_inner | epoch 006: 1961 / 3002 loss=2.271, ppl=4.83, wps=5807.8, ups=0.09, wpb=64905, bsz=128, num_updates=16871, lr=9.9873e-05, gnorm=2.501, loss_scale=4, train_wall=11, gb_free=2.8, wall=194217 2021-06-21 00:35:53 | INFO | train_inner | epoch 006: 1962 / 3002 loss=2.457, ppl=5.49, wps=5844.4, ups=0.09, wpb=64833, bsz=128, num_updates=16872, lr=9.9873e-05, gnorm=3.688, loss_scale=4, train_wall=11, gb_free=2.8, wall=194228 2021-06-21 00:36:05 | INFO | train_inner | epoch 006: 1963 / 3002 loss=2.584, ppl=6, wps=5750.6, ups=0.09, wpb=64817, bsz=128, num_updates=16873, lr=9.9873e-05, gnorm=2.145, loss_scale=4, train_wall=11, gb_free=2.8, wall=194239 2021-06-21 00:36:16 | INFO | train_inner | epoch 006: 1964 / 3002 loss=2.464, ppl=5.52, wps=5827.8, ups=0.09, wpb=64777, bsz=128, num_updates=16874, lr=9.9873e-05, gnorm=1.977, loss_scale=4, train_wall=11, gb_free=2.8, wall=194250 2021-06-21 00:36:27 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-21 00:36:38 | INFO | train_inner | epoch 006: 1966 / 3002 loss=2.455, ppl=5.48, wps=2928.9, ups=0.05, wpb=64861, bsz=128, num_updates=16875, lr=9.9873e-05, gnorm=3.118, loss_scale=2, train_wall=21, gb_free=2.8, wall=194272 2021-06-21 00:36:49 | INFO | train_inner | epoch 006: 1967 / 3002 loss=2.497, ppl=5.64, wps=5752.6, ups=0.09, wpb=64824, bsz=128, num_updates=16876, lr=9.9873e-05, gnorm=2.66, loss_scale=2, train_wall=11, gb_free=2.8, wall=194283 2021-06-21 00:37:00 | INFO | train_inner | epoch 006: 1968 / 3002 loss=2.583, ppl=5.99, wps=5906.8, ups=0.09, wpb=64729, bsz=128, num_updates=16877, lr=9.9873e-05, gnorm=2.32, loss_scale=2, train_wall=10, gb_free=2.8, wall=194294 2021-06-21 00:37:11 | INFO | train_inner | epoch 006: 1969 / 3002 loss=2.511, ppl=5.7, wps=5735.9, ups=0.09, wpb=64811, bsz=128, num_updates=16878, lr=9.9873e-05, gnorm=2.054, loss_scale=2, train_wall=11, gb_free=2.8, wall=194306 2021-06-21 00:37:22 | INFO | train_inner | epoch 006: 1970 / 3002 loss=2.38, ppl=5.21, wps=5983.1, ups=0.09, wpb=64911, bsz=128, num_updates=16879, lr=9.9873e-05, gnorm=4.079, loss_scale=2, train_wall=10, gb_free=2.8, wall=194317 2021-06-21 00:37:33 | INFO | train_inner | epoch 006: 1971 / 3002 loss=2.53, ppl=5.78, wps=5805.1, ups=0.09, wpb=64796, bsz=128, num_updates=16880, lr=9.98729e-05, gnorm=2.043, loss_scale=2, train_wall=11, gb_free=2.8, wall=194328 2021-06-21 00:37:44 | INFO | train_inner | epoch 006: 1972 / 3002 loss=2.318, ppl=4.98, wps=5856.9, ups=0.09, wpb=64949, bsz=128, num_updates=16881, lr=9.98729e-05, gnorm=2.404, loss_scale=2, train_wall=11, gb_free=2.8, wall=194339 2021-06-21 00:37:55 | INFO | train_inner | epoch 006: 1973 / 3002 loss=2.393, ppl=5.25, wps=5910.7, ups=0.09, wpb=64782, bsz=128, num_updates=16882, lr=9.98729e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=194350 2021-06-21 00:38:06 | INFO | train_inner | epoch 006: 1974 / 3002 loss=2.432, ppl=5.4, wps=5844.4, ups=0.09, wpb=64839, bsz=128, num_updates=16883, lr=9.98729e-05, gnorm=2.024, loss_scale=2, train_wall=11, gb_free=2.8, wall=194361 2021-06-21 00:38:18 | INFO | train_inner | epoch 006: 1975 / 3002 loss=2.522, ppl=5.74, wps=5732.3, ups=0.09, wpb=64865, bsz=128, num_updates=16884, lr=9.98729e-05, gnorm=2.017, loss_scale=2, train_wall=11, gb_free=2.8, wall=194372 2021-06-21 00:38:29 | INFO | train_inner | epoch 006: 1976 / 3002 loss=2.431, ppl=5.39, wps=5836.2, ups=0.09, wpb=64803, bsz=128, num_updates=16885, lr=9.98729e-05, gnorm=2.018, loss_scale=2, train_wall=11, gb_free=2.8, wall=194383 2021-06-21 00:38:40 | INFO | train_inner | epoch 006: 1977 / 3002 loss=2.338, ppl=5.06, wps=5793.1, ups=0.09, wpb=64858, bsz=128, num_updates=16886, lr=9.98729e-05, gnorm=3.146, loss_scale=2, train_wall=11, gb_free=2.8, wall=194394 2021-06-21 00:38:51 | INFO | train_inner | epoch 006: 1978 / 3002 loss=2.567, ppl=5.92, wps=5831.5, ups=0.09, wpb=64838, bsz=128, num_updates=16887, lr=9.98729e-05, gnorm=2.195, loss_scale=2, train_wall=11, gb_free=2.8, wall=194406 2021-06-21 00:39:02 | INFO | train_inner | epoch 006: 1979 / 3002 loss=2.492, ppl=5.63, wps=5858.6, ups=0.09, wpb=64743, bsz=128, num_updates=16888, lr=9.98729e-05, gnorm=2.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=194417 2021-06-21 00:39:13 | INFO | train_inner | epoch 006: 1980 / 3002 loss=2.418, ppl=5.35, wps=5829.3, ups=0.09, wpb=64789, bsz=128, num_updates=16889, lr=9.98729e-05, gnorm=2.02, loss_scale=2, train_wall=11, gb_free=2.8, wall=194428 2021-06-21 00:39:24 | INFO | train_inner | epoch 006: 1981 / 3002 loss=2.532, ppl=5.78, wps=5911.2, ups=0.09, wpb=64813, bsz=128, num_updates=16890, lr=9.98729e-05, gnorm=2.041, loss_scale=2, train_wall=11, gb_free=2.8, wall=194439 2021-06-21 00:39:35 | INFO | train_inner | epoch 006: 1982 / 3002 loss=2.457, ppl=5.49, wps=5882, ups=0.09, wpb=64894, bsz=128, num_updates=16891, lr=9.98729e-05, gnorm=2.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=194450 2021-06-21 00:39:47 | INFO | train_inner | epoch 006: 1983 / 3002 loss=2.445, ppl=5.45, wps=5797.3, ups=0.09, wpb=64883, bsz=128, num_updates=16892, lr=9.98729e-05, gnorm=1.978, loss_scale=2, train_wall=11, gb_free=2.8, wall=194461 2021-06-21 00:39:58 | INFO | train_inner | epoch 006: 1984 / 3002 loss=2.429, ppl=5.39, wps=5851.7, ups=0.09, wpb=64867, bsz=128, num_updates=16893, lr=9.98728e-05, gnorm=2.013, loss_scale=2, train_wall=11, gb_free=2.8, wall=194472 2021-06-21 00:40:09 | INFO | train_inner | epoch 006: 1985 / 3002 loss=2.467, ppl=5.53, wps=5893.6, ups=0.09, wpb=64884, bsz=128, num_updates=16894, lr=9.98728e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=194483 2021-06-21 00:40:20 | INFO | train_inner | epoch 006: 1986 / 3002 loss=2.46, ppl=5.5, wps=5870.5, ups=0.09, wpb=64896, bsz=128, num_updates=16895, lr=9.98728e-05, gnorm=2.14, loss_scale=2, train_wall=11, gb_free=2.8, wall=194494 2021-06-21 00:40:31 | INFO | train_inner | epoch 006: 1987 / 3002 loss=2.594, ppl=6.04, wps=5871.8, ups=0.09, wpb=64898, bsz=128, num_updates=16896, lr=9.98728e-05, gnorm=2.041, loss_scale=2, train_wall=11, gb_free=2.8, wall=194505 2021-06-21 00:40:42 | INFO | train_inner | epoch 006: 1988 / 3002 loss=2.332, ppl=5.03, wps=5786.4, ups=0.09, wpb=64810, bsz=128, num_updates=16897, lr=9.98728e-05, gnorm=2.055, loss_scale=2, train_wall=11, gb_free=2.8, wall=194516 2021-06-21 00:40:53 | INFO | train_inner | epoch 006: 1989 / 3002 loss=2.58, ppl=5.98, wps=5855.3, ups=0.09, wpb=64816, bsz=128, num_updates=16898, lr=9.98728e-05, gnorm=2.044, loss_scale=2, train_wall=11, gb_free=2.8, wall=194527 2021-06-21 00:41:04 | INFO | train_inner | epoch 006: 1990 / 3002 loss=2.447, ppl=5.45, wps=5926.6, ups=0.09, wpb=64865, bsz=128, num_updates=16899, lr=9.98728e-05, gnorm=2.029, loss_scale=2, train_wall=10, gb_free=2.8, wall=194538 2021-06-21 00:41:15 | INFO | train_inner | epoch 006: 1991 / 3002 loss=2.511, ppl=5.7, wps=5758.7, ups=0.09, wpb=64834, bsz=128, num_updates=16900, lr=9.98728e-05, gnorm=3.423, loss_scale=2, train_wall=11, gb_free=2.8, wall=194550 2021-06-21 00:41:26 | INFO | train_inner | epoch 006: 1992 / 3002 loss=2.411, ppl=5.32, wps=5900.4, ups=0.09, wpb=64875, bsz=128, num_updates=16901, lr=9.98728e-05, gnorm=2.033, loss_scale=2, train_wall=11, gb_free=2.8, wall=194561 2021-06-21 00:41:37 | INFO | train_inner | epoch 006: 1993 / 3002 loss=2.488, ppl=5.61, wps=6011.7, ups=0.09, wpb=64807, bsz=128, num_updates=16902, lr=9.98728e-05, gnorm=2.05, loss_scale=2, train_wall=10, gb_free=2.8, wall=194571 2021-06-21 00:41:48 | INFO | train_inner | epoch 006: 1994 / 3002 loss=2.44, ppl=5.43, wps=5931.9, ups=0.09, wpb=64785, bsz=128, num_updates=16903, lr=9.98728e-05, gnorm=2.052, loss_scale=2, train_wall=10, gb_free=2.8, wall=194582 2021-06-21 00:41:59 | INFO | train_inner | epoch 006: 1995 / 3002 loss=2.513, ppl=5.71, wps=5903.6, ups=0.09, wpb=64774, bsz=128, num_updates=16904, lr=9.98728e-05, gnorm=2.04, loss_scale=2, train_wall=11, gb_free=2.8, wall=194593 2021-06-21 00:42:10 | INFO | train_inner | epoch 006: 1996 / 3002 loss=2.515, ppl=5.71, wps=5955.9, ups=0.09, wpb=64827, bsz=128, num_updates=16905, lr=9.98727e-05, gnorm=2.02, loss_scale=2, train_wall=10, gb_free=2.8, wall=194604 2021-06-21 00:42:21 | INFO | train_inner | epoch 006: 1997 / 3002 loss=2.445, ppl=5.45, wps=5722.6, ups=0.09, wpb=64800, bsz=128, num_updates=16906, lr=9.98727e-05, gnorm=1.943, loss_scale=2, train_wall=11, gb_free=2.8, wall=194615 2021-06-21 00:42:32 | INFO | train_inner | epoch 006: 1998 / 3002 loss=2.534, ppl=5.79, wps=5788, ups=0.09, wpb=64834, bsz=128, num_updates=16907, lr=9.98727e-05, gnorm=1.942, loss_scale=2, train_wall=11, gb_free=2.8, wall=194627 2021-06-21 00:42:43 | INFO | train_inner | epoch 006: 1999 / 3002 loss=2.392, ppl=5.25, wps=5831.8, ups=0.09, wpb=64769, bsz=128, num_updates=16908, lr=9.98727e-05, gnorm=2.053, loss_scale=2, train_wall=11, gb_free=2.8, wall=194638 2021-06-21 00:42:55 | INFO | train_inner | epoch 006: 2000 / 3002 loss=2.495, ppl=5.64, wps=5778.1, ups=0.09, wpb=64955, bsz=128, num_updates=16909, lr=9.98727e-05, gnorm=2.009, loss_scale=2, train_wall=11, gb_free=2.8, wall=194649 2021-06-21 00:43:06 | INFO | train_inner | epoch 006: 2001 / 3002 loss=2.715, ppl=6.56, wps=5858.6, ups=0.09, wpb=64833, bsz=128, num_updates=16910, lr=9.98727e-05, gnorm=2.056, loss_scale=2, train_wall=11, gb_free=2.8, wall=194660 2021-06-21 00:43:17 | INFO | train_inner | epoch 006: 2002 / 3002 loss=2.412, ppl=5.32, wps=5840.4, ups=0.09, wpb=64844, bsz=128, num_updates=16911, lr=9.98727e-05, gnorm=2.08, loss_scale=2, train_wall=11, gb_free=2.8, wall=194671 2021-06-21 00:43:28 | INFO | train_inner | epoch 006: 2003 / 3002 loss=2.601, ppl=6.07, wps=5817.1, ups=0.09, wpb=64781, bsz=128, num_updates=16912, lr=9.98727e-05, gnorm=2.015, loss_scale=2, train_wall=11, gb_free=2.8, wall=194682 2021-06-21 00:43:39 | INFO | train_inner | epoch 006: 2004 / 3002 loss=2.365, ppl=5.15, wps=5875, ups=0.09, wpb=64852, bsz=128, num_updates=16913, lr=9.98727e-05, gnorm=1.961, loss_scale=2, train_wall=11, gb_free=2.8, wall=194693 2021-06-21 00:43:50 | INFO | train_inner | epoch 006: 2005 / 3002 loss=2.581, ppl=5.99, wps=5827.6, ups=0.09, wpb=64785, bsz=128, num_updates=16914, lr=9.98727e-05, gnorm=2.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=194704 2021-06-21 00:44:01 | INFO | train_inner | epoch 006: 2006 / 3002 loss=2.429, ppl=5.39, wps=5882, ups=0.09, wpb=64813, bsz=128, num_updates=16915, lr=9.98727e-05, gnorm=2.058, loss_scale=2, train_wall=11, gb_free=2.8, wall=194716 2021-06-21 00:44:12 | INFO | train_inner | epoch 006: 2007 / 3002 loss=2.665, ppl=6.34, wps=5909.2, ups=0.09, wpb=64809, bsz=128, num_updates=16916, lr=9.98727e-05, gnorm=2.582, loss_scale=2, train_wall=11, gb_free=2.8, wall=194726 2021-06-21 00:44:23 | INFO | train_inner | epoch 006: 2008 / 3002 loss=2.516, ppl=5.72, wps=5832.7, ups=0.09, wpb=64844, bsz=128, num_updates=16917, lr=9.98727e-05, gnorm=1.993, loss_scale=2, train_wall=11, gb_free=2.8, wall=194738 2021-06-21 00:44:34 | INFO | train_inner | epoch 006: 2009 / 3002 loss=2.563, ppl=5.91, wps=5798.3, ups=0.09, wpb=64838, bsz=128, num_updates=16918, lr=9.98726e-05, gnorm=1.962, loss_scale=2, train_wall=11, gb_free=2.8, wall=194749 2021-06-21 00:44:46 | INFO | train_inner | epoch 006: 2010 / 3002 loss=2.445, ppl=5.45, wps=5849.9, ups=0.09, wpb=64852, bsz=128, num_updates=16919, lr=9.98726e-05, gnorm=5.413, loss_scale=2, train_wall=11, gb_free=2.8, wall=194760 2021-06-21 00:44:57 | INFO | train_inner | epoch 006: 2011 / 3002 loss=2.422, ppl=5.36, wps=5838.4, ups=0.09, wpb=64873, bsz=128, num_updates=16920, lr=9.98726e-05, gnorm=2.203, loss_scale=2, train_wall=11, gb_free=2.8, wall=194771 2021-06-21 00:45:08 | INFO | train_inner | epoch 006: 2012 / 3002 loss=2.596, ppl=6.05, wps=5944.2, ups=0.09, wpb=64872, bsz=128, num_updates=16921, lr=9.98726e-05, gnorm=2.031, loss_scale=2, train_wall=10, gb_free=2.8, wall=194782 2021-06-21 00:45:19 | INFO | train_inner | epoch 006: 2013 / 3002 loss=2.461, ppl=5.51, wps=5875.7, ups=0.09, wpb=64899, bsz=128, num_updates=16922, lr=9.98726e-05, gnorm=2.114, loss_scale=2, train_wall=11, gb_free=2.8, wall=194793 2021-06-21 00:45:30 | INFO | train_inner | epoch 006: 2014 / 3002 loss=2.473, ppl=5.55, wps=5826.7, ups=0.09, wpb=64850, bsz=128, num_updates=16923, lr=9.98726e-05, gnorm=2.03, loss_scale=2, train_wall=11, gb_free=2.8, wall=194804 2021-06-21 00:45:41 | INFO | train_inner | epoch 006: 2015 / 3002 loss=2.563, ppl=5.91, wps=5872.2, ups=0.09, wpb=64677, bsz=128, num_updates=16924, lr=9.98726e-05, gnorm=2.045, loss_scale=2, train_wall=11, gb_free=2.8, wall=194815 2021-06-21 00:45:52 | INFO | train_inner | epoch 006: 2016 / 3002 loss=2.531, ppl=5.78, wps=5978.4, ups=0.09, wpb=64858, bsz=128, num_updates=16925, lr=9.98726e-05, gnorm=2.035, loss_scale=2, train_wall=10, gb_free=2.8, wall=194826 2021-06-21 00:46:03 | INFO | train_inner | epoch 006: 2017 / 3002 loss=2.411, ppl=5.32, wps=5757.7, ups=0.09, wpb=64793, bsz=128, num_updates=16926, lr=9.98726e-05, gnorm=1.921, loss_scale=2, train_wall=11, gb_free=2.8, wall=194837 2021-06-21 00:46:14 | INFO | train_inner | epoch 006: 2018 / 3002 loss=2.495, ppl=5.64, wps=5875.8, ups=0.09, wpb=64858, bsz=128, num_updates=16927, lr=9.98726e-05, gnorm=2.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=194848 2021-06-21 00:46:25 | INFO | train_inner | epoch 006: 2019 / 3002 loss=2.645, ppl=6.25, wps=5838.3, ups=0.09, wpb=64753, bsz=128, num_updates=16928, lr=9.98726e-05, gnorm=2.054, loss_scale=2, train_wall=11, gb_free=2.8, wall=194859 2021-06-21 00:46:36 | INFO | train_inner | epoch 006: 2020 / 3002 loss=2.483, ppl=5.59, wps=6016.9, ups=0.09, wpb=64837, bsz=128, num_updates=16929, lr=9.98726e-05, gnorm=1.99, loss_scale=2, train_wall=10, gb_free=2.8, wall=194870 2021-06-21 00:46:47 | INFO | train_inner | epoch 006: 2021 / 3002 loss=2.584, ppl=6, wps=6020.1, ups=0.09, wpb=64839, bsz=128, num_updates=16930, lr=9.98725e-05, gnorm=2.013, loss_scale=2, train_wall=10, gb_free=2.8, wall=194881 2021-06-21 00:46:58 | INFO | train_inner | epoch 006: 2022 / 3002 loss=2.404, ppl=5.29, wps=5846.4, ups=0.09, wpb=64759, bsz=128, num_updates=16931, lr=9.98725e-05, gnorm=1.988, loss_scale=2, train_wall=11, gb_free=2.8, wall=194892 2021-06-21 00:47:09 | INFO | train_inner | epoch 006: 2023 / 3002 loss=2.476, ppl=5.56, wps=5769.7, ups=0.09, wpb=64820, bsz=128, num_updates=16932, lr=9.98725e-05, gnorm=2.042, loss_scale=2, train_wall=11, gb_free=2.8, wall=194903 2021-06-21 00:47:20 | INFO | train_inner | epoch 006: 2024 / 3002 loss=2.413, ppl=5.32, wps=5803.9, ups=0.09, wpb=64808, bsz=128, num_updates=16933, lr=9.98725e-05, gnorm=2.617, loss_scale=2, train_wall=11, gb_free=2.8, wall=194914 2021-06-21 00:47:31 | INFO | train_inner | epoch 006: 2025 / 3002 loss=2.601, ppl=6.07, wps=5870.8, ups=0.09, wpb=64807, bsz=128, num_updates=16934, lr=9.98725e-05, gnorm=2.042, loss_scale=2, train_wall=11, gb_free=2.8, wall=194925 2021-06-21 00:47:42 | INFO | train_inner | epoch 006: 2026 / 3002 loss=2.388, ppl=5.23, wps=5876.6, ups=0.09, wpb=64868, bsz=128, num_updates=16935, lr=9.98725e-05, gnorm=2.001, loss_scale=2, train_wall=11, gb_free=2.8, wall=194936 2021-06-21 00:47:53 | INFO | train_inner | epoch 006: 2027 / 3002 loss=2.458, ppl=5.49, wps=5877.3, ups=0.09, wpb=64855, bsz=128, num_updates=16936, lr=9.98725e-05, gnorm=2.142, loss_scale=2, train_wall=11, gb_free=2.8, wall=194947 2021-06-21 00:48:04 | INFO | train_inner | epoch 006: 2028 / 3002 loss=2.358, ppl=5.12, wps=5876.2, ups=0.09, wpb=64921, bsz=128, num_updates=16937, lr=9.98725e-05, gnorm=1.951, loss_scale=2, train_wall=11, gb_free=2.8, wall=194958 2021-06-21 00:48:15 | INFO | train_inner | epoch 006: 2029 / 3002 loss=2.351, ppl=5.1, wps=5902.1, ups=0.09, wpb=64846, bsz=128, num_updates=16938, lr=9.98725e-05, gnorm=2.018, loss_scale=2, train_wall=11, gb_free=2.8, wall=194969 2021-06-21 00:48:26 | INFO | train_inner | epoch 006: 2030 / 3002 loss=2.53, ppl=5.78, wps=6057.6, ups=0.09, wpb=64825, bsz=128, num_updates=16939, lr=9.98725e-05, gnorm=1.971, loss_scale=2, train_wall=10, gb_free=2.8, wall=194980 2021-06-21 00:48:37 | INFO | train_inner | epoch 006: 2031 / 3002 loss=2.612, ppl=6.11, wps=5904.6, ups=0.09, wpb=64771, bsz=128, num_updates=16940, lr=9.98725e-05, gnorm=2.095, loss_scale=2, train_wall=11, gb_free=2.8, wall=194991 2021-06-21 00:48:48 | INFO | train_inner | epoch 006: 2032 / 3002 loss=2.592, ppl=6.03, wps=5803.7, ups=0.09, wpb=64781, bsz=128, num_updates=16941, lr=9.98725e-05, gnorm=2.042, loss_scale=2, train_wall=11, gb_free=2.8, wall=195002 2021-06-21 00:48:59 | INFO | train_inner | epoch 006: 2033 / 3002 loss=2.536, ppl=5.8, wps=5845.7, ups=0.09, wpb=64734, bsz=128, num_updates=16942, lr=9.98725e-05, gnorm=1.965, loss_scale=2, train_wall=11, gb_free=2.8, wall=195013 2021-06-21 00:49:10 | INFO | train_inner | epoch 006: 2034 / 3002 loss=2.558, ppl=5.89, wps=5812.3, ups=0.09, wpb=64830, bsz=128, num_updates=16943, lr=9.98724e-05, gnorm=1.924, loss_scale=2, train_wall=11, gb_free=2.8, wall=195025 2021-06-21 00:49:21 | INFO | train_inner | epoch 006: 2035 / 3002 loss=2.66, ppl=6.32, wps=5936.4, ups=0.09, wpb=64808, bsz=128, num_updates=16944, lr=9.98724e-05, gnorm=2.084, loss_scale=2, train_wall=10, gb_free=2.8, wall=195035 2021-06-21 00:49:32 | INFO | train_inner | epoch 006: 2036 / 3002 loss=2.477, ppl=5.57, wps=5839.3, ups=0.09, wpb=64811, bsz=128, num_updates=16945, lr=9.98724e-05, gnorm=1.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=195047 2021-06-21 00:49:43 | INFO | train_inner | epoch 006: 2037 / 3002 loss=2.496, ppl=5.64, wps=5920.5, ups=0.09, wpb=64819, bsz=128, num_updates=16946, lr=9.98724e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=195058 2021-06-21 00:49:54 | INFO | train_inner | epoch 006: 2038 / 3002 loss=2.484, ppl=5.6, wps=5881.3, ups=0.09, wpb=64842, bsz=128, num_updates=16947, lr=9.98724e-05, gnorm=44.939, loss_scale=2, train_wall=11, gb_free=2.8, wall=195069 2021-06-21 00:50:05 | INFO | train_inner | epoch 006: 2039 / 3002 loss=2.549, ppl=5.85, wps=5855.6, ups=0.09, wpb=64818, bsz=128, num_updates=16948, lr=9.98724e-05, gnorm=1.938, loss_scale=2, train_wall=11, gb_free=2.8, wall=195080 2021-06-21 00:50:16 | INFO | train_inner | epoch 006: 2040 / 3002 loss=2.41, ppl=5.31, wps=5903.7, ups=0.09, wpb=64838, bsz=128, num_updates=16949, lr=9.98724e-05, gnorm=1.988, loss_scale=2, train_wall=11, gb_free=2.8, wall=195091 2021-06-21 00:50:27 | INFO | train_inner | epoch 006: 2041 / 3002 loss=2.516, ppl=5.72, wps=5878.9, ups=0.09, wpb=64819, bsz=128, num_updates=16950, lr=9.98724e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=195102 2021-06-21 00:50:39 | INFO | train_inner | epoch 006: 2042 / 3002 loss=2.486, ppl=5.6, wps=5738.5, ups=0.09, wpb=64835, bsz=128, num_updates=16951, lr=9.98724e-05, gnorm=2.01, loss_scale=2, train_wall=11, gb_free=2.8, wall=195113 2021-06-21 00:50:50 | INFO | train_inner | epoch 006: 2043 / 3002 loss=2.526, ppl=5.76, wps=5936.2, ups=0.09, wpb=64880, bsz=128, num_updates=16952, lr=9.98724e-05, gnorm=1.986, loss_scale=2, train_wall=10, gb_free=2.8, wall=195124 2021-06-21 00:51:00 | INFO | train_inner | epoch 006: 2044 / 3002 loss=2.49, ppl=5.62, wps=5948.2, ups=0.09, wpb=64924, bsz=128, num_updates=16953, lr=9.98724e-05, gnorm=2.012, loss_scale=2, train_wall=10, gb_free=2.8, wall=195135 2021-06-21 00:51:11 | INFO | train_inner | epoch 006: 2045 / 3002 loss=2.584, ppl=5.99, wps=5901.5, ups=0.09, wpb=64775, bsz=128, num_updates=16954, lr=9.98724e-05, gnorm=2.39, loss_scale=2, train_wall=11, gb_free=2.8, wall=195146 2021-06-21 00:51:22 | INFO | train_inner | epoch 006: 2046 / 3002 loss=2.372, ppl=5.18, wps=5925.9, ups=0.09, wpb=64884, bsz=128, num_updates=16955, lr=9.98723e-05, gnorm=2.034, loss_scale=2, train_wall=11, gb_free=2.8, wall=195157 2021-06-21 00:51:33 | INFO | train_inner | epoch 006: 2047 / 3002 loss=2.508, ppl=5.69, wps=5836.9, ups=0.09, wpb=64830, bsz=128, num_updates=16956, lr=9.98723e-05, gnorm=2.015, loss_scale=2, train_wall=11, gb_free=2.8, wall=195168 2021-06-21 00:51:45 | INFO | train_inner | epoch 006: 2048 / 3002 loss=2.395, ppl=5.26, wps=5795.5, ups=0.09, wpb=64778, bsz=128, num_updates=16957, lr=9.98723e-05, gnorm=2.054, loss_scale=2, train_wall=11, gb_free=2.8, wall=195179 2021-06-21 00:51:56 | INFO | train_inner | epoch 006: 2049 / 3002 loss=2.563, ppl=5.91, wps=5852.6, ups=0.09, wpb=64855, bsz=128, num_updates=16958, lr=9.98723e-05, gnorm=3.843, loss_scale=2, train_wall=11, gb_free=2.8, wall=195190 2021-06-21 00:52:07 | INFO | train_inner | epoch 006: 2050 / 3002 loss=2.463, ppl=5.51, wps=5774.6, ups=0.09, wpb=64856, bsz=128, num_updates=16959, lr=9.98723e-05, gnorm=2.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=195201 2021-06-21 00:52:18 | INFO | train_inner | epoch 006: 2051 / 3002 loss=2.565, ppl=5.92, wps=5825.3, ups=0.09, wpb=64840, bsz=128, num_updates=16960, lr=9.98723e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=195212 2021-06-21 00:52:29 | INFO | train_inner | epoch 006: 2052 / 3002 loss=2.477, ppl=5.57, wps=5947.4, ups=0.09, wpb=64897, bsz=128, num_updates=16961, lr=9.98723e-05, gnorm=2.003, loss_scale=2, train_wall=10, gb_free=2.8, wall=195223 2021-06-21 00:52:40 | INFO | train_inner | epoch 006: 2053 / 3002 loss=2.463, ppl=5.51, wps=5863.9, ups=0.09, wpb=64806, bsz=128, num_updates=16962, lr=9.98723e-05, gnorm=2.328, loss_scale=2, train_wall=11, gb_free=2.8, wall=195234 2021-06-21 00:52:51 | INFO | train_inner | epoch 006: 2054 / 3002 loss=2.499, ppl=5.65, wps=5809.2, ups=0.09, wpb=64754, bsz=128, num_updates=16963, lr=9.98723e-05, gnorm=2.616, loss_scale=2, train_wall=11, gb_free=2.8, wall=195246 2021-06-21 00:53:02 | INFO | train_inner | epoch 006: 2055 / 3002 loss=2.372, ppl=5.18, wps=5799.6, ups=0.09, wpb=64876, bsz=128, num_updates=16964, lr=9.98723e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=195257 2021-06-21 00:53:13 | INFO | train_inner | epoch 006: 2056 / 3002 loss=2.381, ppl=5.21, wps=5870, ups=0.09, wpb=64816, bsz=128, num_updates=16965, lr=9.98723e-05, gnorm=9.084, loss_scale=2, train_wall=11, gb_free=2.8, wall=195268 2021-06-21 00:53:25 | INFO | train_inner | epoch 006: 2057 / 3002 loss=2.462, ppl=5.51, wps=5832.8, ups=0.09, wpb=64826, bsz=128, num_updates=16966, lr=9.98723e-05, gnorm=2.516, loss_scale=2, train_wall=11, gb_free=2.8, wall=195279 2021-06-21 00:53:35 | INFO | train_inner | epoch 006: 2058 / 3002 loss=2.423, ppl=5.36, wps=5957.9, ups=0.09, wpb=64819, bsz=128, num_updates=16967, lr=9.98723e-05, gnorm=2.075, loss_scale=2, train_wall=10, gb_free=2.8, wall=195290 2021-06-21 00:53:46 | INFO | train_inner | epoch 006: 2059 / 3002 loss=2.395, ppl=5.26, wps=5943.1, ups=0.09, wpb=64843, bsz=128, num_updates=16968, lr=9.98722e-05, gnorm=2.891, loss_scale=2, train_wall=10, gb_free=2.8, wall=195301 2021-06-21 00:53:58 | INFO | train_inner | epoch 006: 2060 / 3002 loss=2.656, ppl=6.3, wps=5725.5, ups=0.09, wpb=64781, bsz=128, num_updates=16969, lr=9.98722e-05, gnorm=2.884, loss_scale=2, train_wall=11, gb_free=2.8, wall=195312 2021-06-21 00:54:09 | INFO | train_inner | epoch 006: 2061 / 3002 loss=2.537, ppl=5.81, wps=5805.4, ups=0.09, wpb=64847, bsz=128, num_updates=16970, lr=9.98722e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=195323 2021-06-21 00:54:20 | INFO | train_inner | epoch 006: 2062 / 3002 loss=2.485, ppl=5.6, wps=5842.3, ups=0.09, wpb=64890, bsz=128, num_updates=16971, lr=9.98722e-05, gnorm=2.145, loss_scale=2, train_wall=11, gb_free=2.8, wall=195334 2021-06-21 00:54:31 | INFO | train_inner | epoch 006: 2063 / 3002 loss=2.41, ppl=5.31, wps=5854.1, ups=0.09, wpb=64836, bsz=128, num_updates=16972, lr=9.98722e-05, gnorm=2.308, loss_scale=2, train_wall=11, gb_free=2.8, wall=195345 2021-06-21 00:54:42 | INFO | train_inner | epoch 006: 2064 / 3002 loss=2.44, ppl=5.43, wps=5803.4, ups=0.09, wpb=64795, bsz=128, num_updates=16973, lr=9.98722e-05, gnorm=2.446, loss_scale=2, train_wall=11, gb_free=2.8, wall=195356 2021-06-21 00:54:53 | INFO | train_inner | epoch 006: 2065 / 3002 loss=2.594, ppl=6.04, wps=5888.6, ups=0.09, wpb=64796, bsz=128, num_updates=16974, lr=9.98722e-05, gnorm=2.214, loss_scale=2, train_wall=11, gb_free=2.8, wall=195367 2021-06-21 00:55:04 | INFO | train_inner | epoch 006: 2066 / 3002 loss=2.502, ppl=5.67, wps=5730.2, ups=0.09, wpb=64914, bsz=128, num_updates=16975, lr=9.98722e-05, gnorm=2.398, loss_scale=2, train_wall=11, gb_free=2.8, wall=195379 2021-06-21 00:55:15 | INFO | train_inner | epoch 006: 2067 / 3002 loss=2.474, ppl=5.55, wps=5943, ups=0.09, wpb=64753, bsz=128, num_updates=16976, lr=9.98722e-05, gnorm=2.372, loss_scale=2, train_wall=10, gb_free=2.8, wall=195390 2021-06-21 00:55:26 | INFO | train_inner | epoch 006: 2068 / 3002 loss=2.477, ppl=5.57, wps=5836.2, ups=0.09, wpb=64794, bsz=128, num_updates=16977, lr=9.98722e-05, gnorm=2.18, loss_scale=2, train_wall=11, gb_free=2.8, wall=195401 2021-06-21 00:55:38 | INFO | train_inner | epoch 006: 2069 / 3002 loss=2.516, ppl=5.72, wps=5857, ups=0.09, wpb=64811, bsz=128, num_updates=16978, lr=9.98722e-05, gnorm=21.403, loss_scale=2, train_wall=11, gb_free=2.8, wall=195412 2021-06-21 00:55:49 | INFO | train_inner | epoch 006: 2070 / 3002 loss=2.451, ppl=5.47, wps=5834.3, ups=0.09, wpb=64788, bsz=128, num_updates=16979, lr=9.98722e-05, gnorm=2.199, loss_scale=2, train_wall=11, gb_free=2.8, wall=195423 2021-06-21 00:56:00 | INFO | train_inner | epoch 006: 2071 / 3002 loss=2.473, ppl=5.55, wps=5919.6, ups=0.09, wpb=64768, bsz=128, num_updates=16980, lr=9.98721e-05, gnorm=2.098, loss_scale=2, train_wall=10, gb_free=2.8, wall=195434 2021-06-21 00:56:11 | INFO | train_inner | epoch 006: 2072 / 3002 loss=2.338, ppl=5.06, wps=5773.4, ups=0.09, wpb=64817, bsz=128, num_updates=16981, lr=9.98721e-05, gnorm=2.778, loss_scale=2, train_wall=11, gb_free=2.8, wall=195445 2021-06-21 00:56:22 | INFO | train_inner | epoch 006: 2073 / 3002 loss=2.507, ppl=5.68, wps=5780.6, ups=0.09, wpb=64745, bsz=128, num_updates=16982, lr=9.98721e-05, gnorm=2.022, loss_scale=2, train_wall=11, gb_free=2.8, wall=195456 2021-06-21 00:56:33 | INFO | train_inner | epoch 006: 2074 / 3002 loss=2.576, ppl=5.96, wps=5933.3, ups=0.09, wpb=64865, bsz=128, num_updates=16983, lr=9.98721e-05, gnorm=2.16, loss_scale=2, train_wall=10, gb_free=2.8, wall=195467 2021-06-21 00:56:44 | INFO | train_inner | epoch 006: 2075 / 3002 loss=2.592, ppl=6.03, wps=6010, ups=0.09, wpb=64867, bsz=128, num_updates=16984, lr=9.98721e-05, gnorm=2.163, loss_scale=2, train_wall=10, gb_free=2.8, wall=195478 2021-06-21 00:56:55 | INFO | train_inner | epoch 006: 2076 / 3002 loss=2.437, ppl=5.41, wps=5863.2, ups=0.09, wpb=64841, bsz=128, num_updates=16985, lr=9.98721e-05, gnorm=2.06, loss_scale=2, train_wall=11, gb_free=2.8, wall=195489 2021-06-21 00:57:06 | INFO | train_inner | epoch 006: 2077 / 3002 loss=2.588, ppl=6.01, wps=5918, ups=0.09, wpb=64772, bsz=128, num_updates=16986, lr=9.98721e-05, gnorm=2.266, loss_scale=2, train_wall=11, gb_free=2.8, wall=195500 2021-06-21 00:57:17 | INFO | train_inner | epoch 006: 2078 / 3002 loss=2.449, ppl=5.46, wps=5853, ups=0.09, wpb=64825, bsz=128, num_updates=16987, lr=9.98721e-05, gnorm=2.126, loss_scale=2, train_wall=11, gb_free=2.8, wall=195511 2021-06-21 00:57:28 | INFO | train_inner | epoch 006: 2079 / 3002 loss=2.428, ppl=5.38, wps=5958.4, ups=0.09, wpb=64924, bsz=128, num_updates=16988, lr=9.98721e-05, gnorm=2.031, loss_scale=2, train_wall=10, gb_free=2.8, wall=195522 2021-06-21 00:57:39 | INFO | train_inner | epoch 006: 2080 / 3002 loss=2.477, ppl=5.57, wps=5823, ups=0.09, wpb=64754, bsz=128, num_updates=16989, lr=9.98721e-05, gnorm=2.086, loss_scale=2, train_wall=11, gb_free=2.8, wall=195533 2021-06-21 00:57:50 | INFO | train_inner | epoch 006: 2081 / 3002 loss=2.472, ppl=5.55, wps=5783.1, ups=0.09, wpb=64822, bsz=128, num_updates=16990, lr=9.98721e-05, gnorm=1.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=195544 2021-06-21 00:58:01 | INFO | train_inner | epoch 006: 2082 / 3002 loss=2.437, ppl=5.42, wps=5824.1, ups=0.09, wpb=64857, bsz=128, num_updates=16991, lr=9.98721e-05, gnorm=2.26, loss_scale=2, train_wall=11, gb_free=2.8, wall=195556 2021-06-21 00:58:12 | INFO | train_inner | epoch 006: 2083 / 3002 loss=2.64, ppl=6.23, wps=5932.1, ups=0.09, wpb=64742, bsz=128, num_updates=16992, lr=9.98721e-05, gnorm=2.341, loss_scale=2, train_wall=10, gb_free=2.8, wall=195566 2021-06-21 00:58:23 | INFO | train_inner | epoch 006: 2084 / 3002 loss=2.603, ppl=6.07, wps=5766.6, ups=0.09, wpb=64820, bsz=128, num_updates=16993, lr=9.9872e-05, gnorm=2.646, loss_scale=2, train_wall=11, gb_free=2.8, wall=195578 2021-06-21 00:58:34 | INFO | train_inner | epoch 006: 2085 / 3002 loss=2.49, ppl=5.62, wps=5848.8, ups=0.09, wpb=64848, bsz=128, num_updates=16994, lr=9.9872e-05, gnorm=1.925, loss_scale=2, train_wall=11, gb_free=2.8, wall=195589 2021-06-21 00:58:45 | INFO | train_inner | epoch 006: 2086 / 3002 loss=2.449, ppl=5.46, wps=5989, ups=0.09, wpb=64923, bsz=128, num_updates=16995, lr=9.9872e-05, gnorm=1.988, loss_scale=2, train_wall=10, gb_free=2.8, wall=195600 2021-06-21 00:58:56 | INFO | train_inner | epoch 006: 2087 / 3002 loss=2.512, ppl=5.7, wps=5819.8, ups=0.09, wpb=64808, bsz=128, num_updates=16996, lr=9.9872e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=195611 2021-06-21 00:59:07 | INFO | train_inner | epoch 006: 2088 / 3002 loss=2.497, ppl=5.64, wps=5892.9, ups=0.09, wpb=64864, bsz=128, num_updates=16997, lr=9.9872e-05, gnorm=2.035, loss_scale=2, train_wall=11, gb_free=2.8, wall=195622 2021-06-21 00:59:18 | INFO | train_inner | epoch 006: 2089 / 3002 loss=2.405, ppl=5.3, wps=5852.7, ups=0.09, wpb=64783, bsz=128, num_updates=16998, lr=9.9872e-05, gnorm=1.96, loss_scale=2, train_wall=11, gb_free=2.8, wall=195633 2021-06-21 00:59:30 | INFO | train_inner | epoch 006: 2090 / 3002 loss=2.435, ppl=5.41, wps=5714.1, ups=0.09, wpb=64769, bsz=128, num_updates=16999, lr=9.9872e-05, gnorm=1.99, loss_scale=2, train_wall=11, gb_free=2.8, wall=195644 2021-06-21 00:59:41 | INFO | train_inner | epoch 006: 2091 / 3002 loss=2.498, ppl=5.65, wps=5843.6, ups=0.09, wpb=64807, bsz=128, num_updates=17000, lr=9.9872e-05, gnorm=1.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=195655 2021-06-21 00:59:52 | INFO | train_inner | epoch 006: 2092 / 3002 loss=2.491, ppl=5.62, wps=5811.2, ups=0.09, wpb=64751, bsz=128, num_updates=17001, lr=9.9872e-05, gnorm=2.063, loss_scale=2, train_wall=11, gb_free=2.8, wall=195666 2021-06-21 01:00:03 | INFO | train_inner | epoch 006: 2093 / 3002 loss=2.438, ppl=5.42, wps=5869, ups=0.09, wpb=64893, bsz=128, num_updates=17002, lr=9.9872e-05, gnorm=2.101, loss_scale=4, train_wall=11, gb_free=2.8, wall=195677 2021-06-21 01:00:14 | INFO | train_inner | epoch 006: 2094 / 3002 loss=2.401, ppl=5.28, wps=5840.4, ups=0.09, wpb=64862, bsz=128, num_updates=17003, lr=9.9872e-05, gnorm=1.993, loss_scale=4, train_wall=11, gb_free=2.8, wall=195689 2021-06-21 01:00:25 | INFO | train_inner | epoch 006: 2095 / 3002 loss=2.279, ppl=4.85, wps=5965.8, ups=0.09, wpb=64804, bsz=128, num_updates=17004, lr=9.9872e-05, gnorm=3.92, loss_scale=4, train_wall=10, gb_free=2.8, wall=195699 2021-06-21 01:00:36 | INFO | train_inner | epoch 006: 2096 / 3002 loss=2.495, ppl=5.64, wps=5827.3, ups=0.09, wpb=64848, bsz=128, num_updates=17005, lr=9.98719e-05, gnorm=2.022, loss_scale=4, train_wall=11, gb_free=2.8, wall=195711 2021-06-21 01:00:48 | INFO | train_inner | epoch 006: 2097 / 3002 loss=2.45, ppl=5.46, wps=5690.2, ups=0.09, wpb=64868, bsz=128, num_updates=17006, lr=9.98719e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=195722 2021-06-21 01:00:59 | INFO | train_inner | epoch 006: 2098 / 3002 loss=2.478, ppl=5.57, wps=5870.4, ups=0.09, wpb=64819, bsz=128, num_updates=17007, lr=9.98719e-05, gnorm=1.916, loss_scale=4, train_wall=11, gb_free=2.8, wall=195733 2021-06-21 01:01:10 | INFO | train_inner | epoch 006: 2099 / 3002 loss=2.411, ppl=5.32, wps=5769.4, ups=0.09, wpb=64842, bsz=128, num_updates=17008, lr=9.98719e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=195744 2021-06-21 01:01:21 | INFO | train_inner | epoch 006: 2100 / 3002 loss=2.613, ppl=6.12, wps=5910.3, ups=0.09, wpb=64809, bsz=128, num_updates=17009, lr=9.98719e-05, gnorm=1.907, loss_scale=4, train_wall=10, gb_free=2.8, wall=195755 2021-06-21 01:01:32 | INFO | train_inner | epoch 006: 2101 / 3002 loss=2.55, ppl=5.86, wps=5967.3, ups=0.09, wpb=64765, bsz=128, num_updates=17010, lr=9.98719e-05, gnorm=1.998, loss_scale=4, train_wall=10, gb_free=2.8, wall=195766 2021-06-21 01:01:43 | INFO | train_inner | epoch 006: 2102 / 3002 loss=2.515, ppl=5.72, wps=5791.4, ups=0.09, wpb=64803, bsz=128, num_updates=17011, lr=9.98719e-05, gnorm=2.65, loss_scale=4, train_wall=11, gb_free=2.8, wall=195777 2021-06-21 01:01:54 | INFO | train_inner | epoch 006: 2103 / 3002 loss=2.516, ppl=5.72, wps=5815.5, ups=0.09, wpb=64764, bsz=128, num_updates=17012, lr=9.98719e-05, gnorm=2.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=195788 2021-06-21 01:02:05 | INFO | train_inner | epoch 006: 2104 / 3002 loss=2.523, ppl=5.75, wps=5830, ups=0.09, wpb=64784, bsz=128, num_updates=17013, lr=9.98719e-05, gnorm=2.134, loss_scale=4, train_wall=11, gb_free=2.8, wall=195799 2021-06-21 01:02:16 | INFO | train_inner | epoch 006: 2105 / 3002 loss=2.53, ppl=5.78, wps=5886.7, ups=0.09, wpb=64817, bsz=128, num_updates=17014, lr=9.98719e-05, gnorm=2.011, loss_scale=4, train_wall=11, gb_free=2.8, wall=195811 2021-06-21 01:02:27 | INFO | train_inner | epoch 006: 2106 / 3002 loss=2.529, ppl=5.77, wps=5844.5, ups=0.09, wpb=64784, bsz=128, num_updates=17015, lr=9.98719e-05, gnorm=1.951, loss_scale=4, train_wall=11, gb_free=2.8, wall=195822 2021-06-21 01:02:38 | INFO | train_inner | epoch 006: 2107 / 3002 loss=2.421, ppl=5.36, wps=5812.2, ups=0.09, wpb=64763, bsz=128, num_updates=17016, lr=9.98719e-05, gnorm=1.9, loss_scale=4, train_wall=11, gb_free=2.8, wall=195833 2021-06-21 01:02:50 | INFO | train_inner | epoch 006: 2108 / 3002 loss=2.63, ppl=6.19, wps=5727.5, ups=0.09, wpb=64822, bsz=128, num_updates=17017, lr=9.98719e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=195844 2021-06-21 01:03:01 | INFO | train_inner | epoch 006: 2109 / 3002 loss=2.333, ppl=5.04, wps=5844.8, ups=0.09, wpb=64865, bsz=128, num_updates=17018, lr=9.98718e-05, gnorm=1.961, loss_scale=4, train_wall=11, gb_free=2.8, wall=195855 2021-06-21 01:03:12 | INFO | train_inner | epoch 006: 2110 / 3002 loss=2.58, ppl=5.98, wps=5823.3, ups=0.09, wpb=64767, bsz=128, num_updates=17019, lr=9.98718e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=195866 2021-06-21 01:03:23 | INFO | train_inner | epoch 006: 2111 / 3002 loss=2.54, ppl=5.81, wps=5765.1, ups=0.09, wpb=64795, bsz=128, num_updates=17020, lr=9.98718e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=195878 2021-06-21 01:03:34 | INFO | train_inner | epoch 006: 2112 / 3002 loss=2.6, ppl=6.06, wps=5844.5, ups=0.09, wpb=64866, bsz=128, num_updates=17021, lr=9.98718e-05, gnorm=1.921, loss_scale=4, train_wall=11, gb_free=2.8, wall=195889 2021-06-21 01:03:45 | INFO | train_inner | epoch 006: 2113 / 3002 loss=2.649, ppl=6.27, wps=5840.4, ups=0.09, wpb=64838, bsz=128, num_updates=17022, lr=9.98718e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=195900 2021-06-21 01:03:56 | INFO | train_inner | epoch 006: 2114 / 3002 loss=2.459, ppl=5.5, wps=5842.6, ups=0.09, wpb=64768, bsz=128, num_updates=17023, lr=9.98718e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=195911 2021-06-21 01:04:08 | INFO | train_inner | epoch 006: 2115 / 3002 loss=2.455, ppl=5.48, wps=5807.3, ups=0.09, wpb=64849, bsz=128, num_updates=17024, lr=9.98718e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=195922 2021-06-21 01:04:19 | INFO | train_inner | epoch 006: 2116 / 3002 loss=2.508, ppl=5.69, wps=5918.3, ups=0.09, wpb=64812, bsz=128, num_updates=17025, lr=9.98718e-05, gnorm=1.879, loss_scale=4, train_wall=10, gb_free=2.8, wall=195933 2021-06-21 01:04:30 | INFO | train_inner | epoch 006: 2117 / 3002 loss=2.514, ppl=5.71, wps=5773.1, ups=0.09, wpb=64779, bsz=128, num_updates=17026, lr=9.98718e-05, gnorm=1.964, loss_scale=4, train_wall=11, gb_free=2.8, wall=195944 2021-06-21 01:04:41 | INFO | train_inner | epoch 006: 2118 / 3002 loss=2.48, ppl=5.58, wps=5805.7, ups=0.09, wpb=64779, bsz=128, num_updates=17027, lr=9.98718e-05, gnorm=1.944, loss_scale=4, train_wall=11, gb_free=2.8, wall=195955 2021-06-21 01:04:52 | INFO | train_inner | epoch 006: 2119 / 3002 loss=2.507, ppl=5.68, wps=5923.4, ups=0.09, wpb=64835, bsz=128, num_updates=17028, lr=9.98718e-05, gnorm=1.996, loss_scale=4, train_wall=10, gb_free=2.8, wall=195966 2021-06-21 01:05:03 | INFO | train_inner | epoch 006: 2120 / 3002 loss=2.466, ppl=5.52, wps=5853.9, ups=0.09, wpb=64807, bsz=128, num_updates=17029, lr=9.98718e-05, gnorm=1.917, loss_scale=4, train_wall=11, gb_free=2.8, wall=195977 2021-06-21 01:05:14 | INFO | train_inner | epoch 006: 2121 / 3002 loss=2.61, ppl=6.11, wps=5883.8, ups=0.09, wpb=64821, bsz=128, num_updates=17030, lr=9.98717e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=195988 2021-06-21 01:05:25 | INFO | train_inner | epoch 006: 2122 / 3002 loss=2.557, ppl=5.88, wps=5852.8, ups=0.09, wpb=64890, bsz=128, num_updates=17031, lr=9.98717e-05, gnorm=1.966, loss_scale=4, train_wall=11, gb_free=2.8, wall=195999 2021-06-21 01:05:36 | INFO | train_inner | epoch 006: 2123 / 3002 loss=2.714, ppl=6.56, wps=5888.4, ups=0.09, wpb=64920, bsz=128, num_updates=17032, lr=9.98717e-05, gnorm=3.036, loss_scale=4, train_wall=11, gb_free=2.8, wall=196010 2021-06-21 01:05:47 | INFO | train_inner | epoch 006: 2124 / 3002 loss=2.554, ppl=5.87, wps=5969.5, ups=0.09, wpb=64859, bsz=128, num_updates=17033, lr=9.98717e-05, gnorm=1.955, loss_scale=4, train_wall=10, gb_free=2.8, wall=196021 2021-06-21 01:05:58 | INFO | train_inner | epoch 006: 2125 / 3002 loss=2.408, ppl=5.31, wps=5760.4, ups=0.09, wpb=64798, bsz=128, num_updates=17034, lr=9.98717e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=196033 2021-06-21 01:06:09 | INFO | train_inner | epoch 006: 2126 / 3002 loss=2.507, ppl=5.69, wps=5828, ups=0.09, wpb=64804, bsz=128, num_updates=17035, lr=9.98717e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=196044 2021-06-21 01:06:20 | INFO | train_inner | epoch 006: 2127 / 3002 loss=2.383, ppl=5.22, wps=5842.7, ups=0.09, wpb=64801, bsz=128, num_updates=17036, lr=9.98717e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=196055 2021-06-21 01:06:32 | INFO | train_inner | epoch 006: 2128 / 3002 loss=2.494, ppl=5.63, wps=5837.8, ups=0.09, wpb=64773, bsz=128, num_updates=17037, lr=9.98717e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=196066 2021-06-21 01:06:43 | INFO | train_inner | epoch 006: 2129 / 3002 loss=2.567, ppl=5.93, wps=5906.2, ups=0.09, wpb=64863, bsz=128, num_updates=17038, lr=9.98717e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=196077 2021-06-21 01:06:54 | INFO | train_inner | epoch 006: 2130 / 3002 loss=2.367, ppl=5.16, wps=5808.5, ups=0.09, wpb=64835, bsz=128, num_updates=17039, lr=9.98717e-05, gnorm=1.889, loss_scale=4, train_wall=11, gb_free=2.8, wall=196088 2021-06-21 01:07:05 | INFO | train_inner | epoch 006: 2131 / 3002 loss=2.407, ppl=5.3, wps=5699.1, ups=0.09, wpb=64819, bsz=128, num_updates=17040, lr=9.98717e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=196099 2021-06-21 01:07:16 | INFO | train_inner | epoch 006: 2132 / 3002 loss=2.597, ppl=6.05, wps=5860.7, ups=0.09, wpb=64765, bsz=128, num_updates=17041, lr=9.98717e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=196110 2021-06-21 01:07:27 | INFO | train_inner | epoch 006: 2133 / 3002 loss=2.59, ppl=6.02, wps=5848.1, ups=0.09, wpb=64839, bsz=128, num_updates=17042, lr=9.98717e-05, gnorm=2.125, loss_scale=4, train_wall=11, gb_free=2.8, wall=196122 2021-06-21 01:07:38 | INFO | train_inner | epoch 006: 2134 / 3002 loss=2.495, ppl=5.64, wps=5819.1, ups=0.09, wpb=64781, bsz=128, num_updates=17043, lr=9.98716e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=196133 2021-06-21 01:07:49 | INFO | train_inner | epoch 006: 2135 / 3002 loss=2.43, ppl=5.39, wps=5858.2, ups=0.09, wpb=64894, bsz=128, num_updates=17044, lr=9.98716e-05, gnorm=1.927, loss_scale=4, train_wall=11, gb_free=2.8, wall=196144 2021-06-21 01:08:01 | INFO | train_inner | epoch 006: 2136 / 3002 loss=2.435, ppl=5.41, wps=5788.7, ups=0.09, wpb=64937, bsz=128, num_updates=17045, lr=9.98716e-05, gnorm=2.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=196155 2021-06-21 01:08:11 | INFO | train_inner | epoch 006: 2137 / 3002 loss=2.503, ppl=5.67, wps=5956.8, ups=0.09, wpb=64851, bsz=128, num_updates=17046, lr=9.98716e-05, gnorm=1.909, loss_scale=4, train_wall=10, gb_free=2.8, wall=196166 2021-06-21 01:08:22 | INFO | train_inner | epoch 006: 2138 / 3002 loss=2.527, ppl=5.76, wps=6038.9, ups=0.09, wpb=64880, bsz=128, num_updates=17047, lr=9.98716e-05, gnorm=2.051, loss_scale=4, train_wall=10, gb_free=2.8, wall=196177 2021-06-21 01:08:33 | INFO | train_inner | epoch 006: 2139 / 3002 loss=2.608, ppl=6.09, wps=5833.3, ups=0.09, wpb=64826, bsz=128, num_updates=17048, lr=9.98716e-05, gnorm=1.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=196188 2021-06-21 01:08:44 | INFO | train_inner | epoch 006: 2140 / 3002 loss=2.485, ppl=5.6, wps=5871.7, ups=0.09, wpb=64848, bsz=128, num_updates=17049, lr=9.98716e-05, gnorm=1.984, loss_scale=4, train_wall=11, gb_free=2.8, wall=196199 2021-06-21 01:08:55 | INFO | train_inner | epoch 006: 2141 / 3002 loss=2.345, ppl=5.08, wps=5918.8, ups=0.09, wpb=64906, bsz=128, num_updates=17050, lr=9.98716e-05, gnorm=2.004, loss_scale=4, train_wall=10, gb_free=2.8, wall=196210 2021-06-21 01:09:07 | INFO | train_inner | epoch 006: 2142 / 3002 loss=2.442, ppl=5.43, wps=5812.1, ups=0.09, wpb=64828, bsz=128, num_updates=17051, lr=9.98716e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=196221 2021-06-21 01:09:18 | INFO | train_inner | epoch 006: 2143 / 3002 loss=2.47, ppl=5.54, wps=5759.7, ups=0.09, wpb=64739, bsz=128, num_updates=17052, lr=9.98716e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=196232 2021-06-21 01:09:29 | INFO | train_inner | epoch 006: 2144 / 3002 loss=2.422, ppl=5.36, wps=5707.8, ups=0.09, wpb=64872, bsz=128, num_updates=17053, lr=9.98716e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=196243 2021-06-21 01:09:40 | INFO | train_inner | epoch 006: 2145 / 3002 loss=2.448, ppl=5.46, wps=5726.6, ups=0.09, wpb=64777, bsz=128, num_updates=17054, lr=9.98716e-05, gnorm=1.906, loss_scale=4, train_wall=11, gb_free=2.8, wall=196255 2021-06-21 01:09:52 | INFO | train_inner | epoch 006: 2146 / 3002 loss=2.682, ppl=6.42, wps=5863.2, ups=0.09, wpb=64948, bsz=128, num_updates=17055, lr=9.98715e-05, gnorm=1.925, loss_scale=4, train_wall=11, gb_free=2.8, wall=196266 2021-06-21 01:10:03 | INFO | train_inner | epoch 006: 2147 / 3002 loss=2.341, ppl=5.07, wps=5780.9, ups=0.09, wpb=64790, bsz=128, num_updates=17056, lr=9.98715e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=196277 2021-06-21 01:10:14 | INFO | train_inner | epoch 006: 2148 / 3002 loss=2.549, ppl=5.85, wps=5783.7, ups=0.09, wpb=64827, bsz=128, num_updates=17057, lr=9.98715e-05, gnorm=2.096, loss_scale=4, train_wall=11, gb_free=2.8, wall=196288 2021-06-21 01:10:25 | INFO | train_inner | epoch 006: 2149 / 3002 loss=2.606, ppl=6.09, wps=5870.1, ups=0.09, wpb=64807, bsz=128, num_updates=17058, lr=9.98715e-05, gnorm=2.075, loss_scale=4, train_wall=11, gb_free=2.8, wall=196299 2021-06-21 01:10:36 | INFO | train_inner | epoch 006: 2150 / 3002 loss=2.593, ppl=6.03, wps=5913.8, ups=0.09, wpb=64823, bsz=128, num_updates=17059, lr=9.98715e-05, gnorm=1.998, loss_scale=4, train_wall=10, gb_free=2.8, wall=196310 2021-06-21 01:10:47 | INFO | train_inner | epoch 006: 2151 / 3002 loss=2.675, ppl=6.39, wps=5713, ups=0.09, wpb=64785, bsz=128, num_updates=17060, lr=9.98715e-05, gnorm=1.974, loss_scale=4, train_wall=11, gb_free=2.8, wall=196322 2021-06-21 01:10:58 | INFO | train_inner | epoch 006: 2152 / 3002 loss=2.494, ppl=5.63, wps=5806, ups=0.09, wpb=64767, bsz=128, num_updates=17061, lr=9.98715e-05, gnorm=2, loss_scale=4, train_wall=11, gb_free=2.8, wall=196333 2021-06-21 01:11:10 | INFO | train_inner | epoch 006: 2153 / 3002 loss=2.533, ppl=5.79, wps=5715.2, ups=0.09, wpb=64874, bsz=128, num_updates=17062, lr=9.98715e-05, gnorm=1.937, loss_scale=4, train_wall=11, gb_free=2.8, wall=196344 2021-06-21 01:11:21 | INFO | train_inner | epoch 006: 2154 / 3002 loss=2.334, ppl=5.04, wps=5867.8, ups=0.09, wpb=64860, bsz=128, num_updates=17063, lr=9.98715e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=196355 2021-06-21 01:11:32 | INFO | train_inner | epoch 006: 2155 / 3002 loss=2.412, ppl=5.32, wps=5778.4, ups=0.09, wpb=64848, bsz=128, num_updates=17064, lr=9.98715e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=196366 2021-06-21 01:11:43 | INFO | train_inner | epoch 006: 2156 / 3002 loss=2.348, ppl=5.09, wps=5877.6, ups=0.09, wpb=64831, bsz=128, num_updates=17065, lr=9.98715e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=196377 2021-06-21 01:11:54 | INFO | train_inner | epoch 006: 2157 / 3002 loss=2.488, ppl=5.61, wps=5886.5, ups=0.09, wpb=64905, bsz=128, num_updates=17066, lr=9.98715e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=196388 2021-06-21 01:12:05 | INFO | train_inner | epoch 006: 2158 / 3002 loss=2.549, ppl=5.85, wps=5833, ups=0.09, wpb=64795, bsz=128, num_updates=17067, lr=9.98715e-05, gnorm=1.939, loss_scale=4, train_wall=11, gb_free=2.8, wall=196400 2021-06-21 01:12:16 | INFO | train_inner | epoch 006: 2159 / 3002 loss=2.499, ppl=5.65, wps=5937, ups=0.09, wpb=64865, bsz=128, num_updates=17068, lr=9.98714e-05, gnorm=2.001, loss_scale=4, train_wall=10, gb_free=2.8, wall=196410 2021-06-21 01:12:27 | INFO | train_inner | epoch 006: 2160 / 3002 loss=2.594, ppl=6.04, wps=5787.2, ups=0.09, wpb=64762, bsz=128, num_updates=17069, lr=9.98714e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=196422 2021-06-21 01:12:38 | INFO | train_inner | epoch 006: 2161 / 3002 loss=2.559, ppl=5.89, wps=5864.2, ups=0.09, wpb=64856, bsz=128, num_updates=17070, lr=9.98714e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=196433 2021-06-21 01:12:50 | INFO | train_inner | epoch 006: 2162 / 3002 loss=2.506, ppl=5.68, wps=5823.9, ups=0.09, wpb=64851, bsz=128, num_updates=17071, lr=9.98714e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=196444 2021-06-21 01:13:01 | INFO | train_inner | epoch 006: 2163 / 3002 loss=2.404, ppl=5.29, wps=5860.5, ups=0.09, wpb=64821, bsz=128, num_updates=17072, lr=9.98714e-05, gnorm=1.948, loss_scale=4, train_wall=11, gb_free=2.8, wall=196455 2021-06-21 01:13:12 | INFO | train_inner | epoch 006: 2164 / 3002 loss=2.451, ppl=5.47, wps=5825.2, ups=0.09, wpb=64840, bsz=128, num_updates=17073, lr=9.98714e-05, gnorm=2.098, loss_scale=4, train_wall=11, gb_free=2.8, wall=196466 2021-06-21 01:13:23 | INFO | train_inner | epoch 006: 2165 / 3002 loss=2.484, ppl=5.59, wps=5896.8, ups=0.09, wpb=64807, bsz=128, num_updates=17074, lr=9.98714e-05, gnorm=1.9, loss_scale=4, train_wall=11, gb_free=2.8, wall=196477 2021-06-21 01:13:34 | INFO | train_inner | epoch 006: 2166 / 3002 loss=2.352, ppl=5.11, wps=5872.8, ups=0.09, wpb=64763, bsz=128, num_updates=17075, lr=9.98714e-05, gnorm=1.909, loss_scale=4, train_wall=11, gb_free=2.8, wall=196488 2021-06-21 01:13:45 | INFO | train_inner | epoch 006: 2167 / 3002 loss=2.503, ppl=5.67, wps=5848.4, ups=0.09, wpb=64804, bsz=128, num_updates=17076, lr=9.98714e-05, gnorm=2.008, loss_scale=4, train_wall=11, gb_free=2.8, wall=196499 2021-06-21 01:13:56 | INFO | train_inner | epoch 006: 2168 / 3002 loss=2.38, ppl=5.2, wps=5865.6, ups=0.09, wpb=64774, bsz=128, num_updates=17077, lr=9.98714e-05, gnorm=2.045, loss_scale=4, train_wall=11, gb_free=2.8, wall=196510 2021-06-21 01:14:07 | INFO | train_inner | epoch 006: 2169 / 3002 loss=2.482, ppl=5.59, wps=5769, ups=0.09, wpb=64830, bsz=128, num_updates=17078, lr=9.98714e-05, gnorm=1.92, loss_scale=4, train_wall=11, gb_free=2.8, wall=196521 2021-06-21 01:14:18 | INFO | train_inner | epoch 006: 2170 / 3002 loss=2.456, ppl=5.49, wps=5858.5, ups=0.09, wpb=64805, bsz=128, num_updates=17079, lr=9.98714e-05, gnorm=2.236, loss_scale=4, train_wall=11, gb_free=2.8, wall=196533 2021-06-21 01:14:29 | INFO | train_inner | epoch 006: 2171 / 3002 loss=2.443, ppl=5.44, wps=5849.3, ups=0.09, wpb=64752, bsz=128, num_updates=17080, lr=9.98713e-05, gnorm=2.038, loss_scale=4, train_wall=11, gb_free=2.8, wall=196544 2021-06-21 01:14:40 | INFO | train_inner | epoch 006: 2172 / 3002 loss=2.453, ppl=5.47, wps=5874.6, ups=0.09, wpb=64777, bsz=128, num_updates=17081, lr=9.98713e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=196555 2021-06-21 01:14:51 | INFO | train_inner | epoch 006: 2173 / 3002 loss=2.37, ppl=5.17, wps=5928.7, ups=0.09, wpb=64802, bsz=128, num_updates=17082, lr=9.98713e-05, gnorm=1.933, loss_scale=4, train_wall=10, gb_free=2.8, wall=196566 2021-06-21 01:15:03 | INFO | train_inner | epoch 006: 2174 / 3002 loss=2.477, ppl=5.57, wps=5731.4, ups=0.09, wpb=64783, bsz=128, num_updates=17083, lr=9.98713e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=196577 2021-06-21 01:15:13 | INFO | train_inner | epoch 006: 2175 / 3002 loss=2.609, ppl=6.1, wps=5906.9, ups=0.09, wpb=64860, bsz=128, num_updates=17084, lr=9.98713e-05, gnorm=1.937, loss_scale=4, train_wall=10, gb_free=2.8, wall=196588 2021-06-21 01:15:24 | INFO | train_inner | epoch 006: 2176 / 3002 loss=2.654, ppl=6.29, wps=5915.6, ups=0.09, wpb=64880, bsz=128, num_updates=17085, lr=9.98713e-05, gnorm=2.049, loss_scale=4, train_wall=10, gb_free=2.8, wall=196599 2021-06-21 01:15:36 | INFO | train_inner | epoch 006: 2177 / 3002 loss=2.403, ppl=5.29, wps=5825.6, ups=0.09, wpb=64815, bsz=128, num_updates=17086, lr=9.98713e-05, gnorm=2.016, loss_scale=4, train_wall=11, gb_free=2.8, wall=196610 2021-06-21 01:15:47 | INFO | train_inner | epoch 006: 2178 / 3002 loss=2.623, ppl=6.16, wps=5788.2, ups=0.09, wpb=64849, bsz=128, num_updates=17087, lr=9.98713e-05, gnorm=2.022, loss_scale=4, train_wall=11, gb_free=2.8, wall=196621 2021-06-21 01:15:58 | INFO | train_inner | epoch 006: 2179 / 3002 loss=2.456, ppl=5.49, wps=5925.3, ups=0.09, wpb=64927, bsz=128, num_updates=17088, lr=9.98713e-05, gnorm=1.983, loss_scale=4, train_wall=11, gb_free=2.8, wall=196632 2021-06-21 01:16:09 | INFO | train_inner | epoch 006: 2180 / 3002 loss=2.311, ppl=4.96, wps=5822.5, ups=0.09, wpb=64837, bsz=128, num_updates=17089, lr=9.98713e-05, gnorm=4.337, loss_scale=4, train_wall=11, gb_free=2.8, wall=196643 2021-06-21 01:16:20 | INFO | train_inner | epoch 006: 2181 / 3002 loss=2.679, ppl=6.4, wps=5863.4, ups=0.09, wpb=64762, bsz=128, num_updates=17090, lr=9.98713e-05, gnorm=2.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=196654 2021-06-21 01:16:31 | INFO | train_inner | epoch 006: 2182 / 3002 loss=2.579, ppl=5.97, wps=5775.6, ups=0.09, wpb=64758, bsz=128, num_updates=17091, lr=9.98713e-05, gnorm=1.93, loss_scale=4, train_wall=11, gb_free=2.8, wall=196665 2021-06-21 01:16:42 | INFO | train_inner | epoch 006: 2183 / 3002 loss=2.42, ppl=5.35, wps=5879.1, ups=0.09, wpb=64782, bsz=128, num_updates=17092, lr=9.98713e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=196676 2021-06-21 01:16:53 | INFO | train_inner | epoch 006: 2184 / 3002 loss=2.335, ppl=5.05, wps=5822.9, ups=0.09, wpb=64806, bsz=128, num_updates=17093, lr=9.98712e-05, gnorm=1.879, loss_scale=4, train_wall=11, gb_free=2.8, wall=196688 2021-06-21 01:17:04 | INFO | train_inner | epoch 006: 2185 / 3002 loss=2.498, ppl=5.65, wps=5918, ups=0.09, wpb=64909, bsz=128, num_updates=17094, lr=9.98712e-05, gnorm=2.008, loss_scale=4, train_wall=11, gb_free=2.8, wall=196699 2021-06-21 01:17:15 | INFO | train_inner | epoch 006: 2186 / 3002 loss=2.49, ppl=5.62, wps=5773.2, ups=0.09, wpb=64798, bsz=128, num_updates=17095, lr=9.98712e-05, gnorm=2.099, loss_scale=4, train_wall=11, gb_free=2.8, wall=196710 2021-06-21 01:17:27 | INFO | train_inner | epoch 006: 2187 / 3002 loss=2.486, ppl=5.6, wps=5790.9, ups=0.09, wpb=64876, bsz=128, num_updates=17096, lr=9.98712e-05, gnorm=2.071, loss_scale=4, train_wall=11, gb_free=2.8, wall=196721 2021-06-21 01:17:38 | INFO | train_inner | epoch 006: 2188 / 3002 loss=2.662, ppl=6.33, wps=5843.1, ups=0.09, wpb=64821, bsz=128, num_updates=17097, lr=9.98712e-05, gnorm=1.947, loss_scale=4, train_wall=11, gb_free=2.8, wall=196732 2021-06-21 01:17:49 | INFO | train_inner | epoch 006: 2189 / 3002 loss=2.525, ppl=5.76, wps=5775.3, ups=0.09, wpb=64880, bsz=128, num_updates=17098, lr=9.98712e-05, gnorm=2.31, loss_scale=4, train_wall=11, gb_free=2.8, wall=196743 2021-06-21 01:18:00 | INFO | train_inner | epoch 006: 2190 / 3002 loss=2.406, ppl=5.3, wps=5844.6, ups=0.09, wpb=64900, bsz=128, num_updates=17099, lr=9.98712e-05, gnorm=2.522, loss_scale=4, train_wall=11, gb_free=2.8, wall=196754 2021-06-21 01:18:11 | INFO | train_inner | epoch 006: 2191 / 3002 loss=2.453, ppl=5.47, wps=5921.9, ups=0.09, wpb=64819, bsz=128, num_updates=17100, lr=9.98712e-05, gnorm=5.847, loss_scale=4, train_wall=10, gb_free=2.8, wall=196765 2021-06-21 01:18:22 | INFO | train_inner | epoch 006: 2192 / 3002 loss=2.474, ppl=5.55, wps=5880, ups=0.09, wpb=64842, bsz=128, num_updates=17101, lr=9.98712e-05, gnorm=2.016, loss_scale=4, train_wall=11, gb_free=2.8, wall=196776 2021-06-21 01:18:33 | INFO | train_inner | epoch 006: 2193 / 3002 loss=2.566, ppl=5.92, wps=5786.2, ups=0.09, wpb=64793, bsz=128, num_updates=17102, lr=9.98712e-05, gnorm=2.162, loss_scale=4, train_wall=11, gb_free=2.8, wall=196788 2021-06-21 01:18:44 | INFO | train_inner | epoch 006: 2194 / 3002 loss=2.363, ppl=5.15, wps=5922.4, ups=0.09, wpb=64904, bsz=128, num_updates=17103, lr=9.98712e-05, gnorm=3.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=196799 2021-06-21 01:18:55 | INFO | train_inner | epoch 006: 2195 / 3002 loss=2.536, ppl=5.8, wps=5798.7, ups=0.09, wpb=64822, bsz=128, num_updates=17104, lr=9.98712e-05, gnorm=3.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=196810 2021-06-21 01:19:07 | INFO | train_inner | epoch 006: 2196 / 3002 loss=2.488, ppl=5.61, wps=5822.8, ups=0.09, wpb=64874, bsz=128, num_updates=17105, lr=9.98711e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=196821 2021-06-21 01:19:17 | INFO | train_inner | epoch 006: 2197 / 3002 loss=2.475, ppl=5.56, wps=5941.8, ups=0.09, wpb=64801, bsz=128, num_updates=17106, lr=9.98711e-05, gnorm=35.871, loss_scale=4, train_wall=10, gb_free=2.8, wall=196832 2021-06-21 01:19:29 | INFO | train_inner | epoch 006: 2198 / 3002 loss=2.372, ppl=5.18, wps=5787.8, ups=0.09, wpb=64929, bsz=128, num_updates=17107, lr=9.98711e-05, gnorm=2.124, loss_scale=4, train_wall=11, gb_free=2.8, wall=196843 2021-06-21 01:19:40 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-21 01:19:51 | INFO | train_inner | epoch 006: 2200 / 3002 loss=2.429, ppl=5.38, wps=2895.4, ups=0.04, wpb=64831, bsz=128, num_updates=17108, lr=9.98711e-05, gnorm=1.952, loss_scale=2, train_wall=21, gb_free=2.8, wall=196865 2021-06-21 01:20:02 | INFO | train_inner | epoch 006: 2201 / 3002 loss=2.453, ppl=5.48, wps=5885.9, ups=0.09, wpb=64818, bsz=128, num_updates=17109, lr=9.98711e-05, gnorm=2.008, loss_scale=2, train_wall=11, gb_free=2.8, wall=196876 2021-06-21 01:20:13 | INFO | train_inner | epoch 006: 2202 / 3002 loss=2.38, ppl=5.21, wps=5827.1, ups=0.09, wpb=64830, bsz=128, num_updates=17110, lr=9.98711e-05, gnorm=1.927, loss_scale=2, train_wall=11, gb_free=2.8, wall=196888 2021-06-21 01:20:24 | INFO | train_inner | epoch 006: 2203 / 3002 loss=2.554, ppl=5.87, wps=5889, ups=0.09, wpb=64914, bsz=128, num_updates=17111, lr=9.98711e-05, gnorm=2.412, loss_scale=2, train_wall=11, gb_free=2.8, wall=196899 2021-06-21 01:20:35 | INFO | train_inner | epoch 006: 2204 / 3002 loss=2.405, ppl=5.29, wps=5959.8, ups=0.09, wpb=64872, bsz=128, num_updates=17112, lr=9.98711e-05, gnorm=1.957, loss_scale=2, train_wall=10, gb_free=2.8, wall=196909 2021-06-21 01:20:46 | INFO | train_inner | epoch 006: 2205 / 3002 loss=2.394, ppl=5.26, wps=5961.5, ups=0.09, wpb=64823, bsz=128, num_updates=17113, lr=9.98711e-05, gnorm=11.636, loss_scale=2, train_wall=10, gb_free=2.8, wall=196920 2021-06-21 01:20:57 | INFO | train_inner | epoch 006: 2206 / 3002 loss=2.629, ppl=6.19, wps=5724, ups=0.09, wpb=64835, bsz=128, num_updates=17114, lr=9.98711e-05, gnorm=2.243, loss_scale=2, train_wall=11, gb_free=2.8, wall=196932 2021-06-21 01:21:08 | INFO | train_inner | epoch 006: 2207 / 3002 loss=2.631, ppl=6.2, wps=5851.5, ups=0.09, wpb=64805, bsz=128, num_updates=17115, lr=9.98711e-05, gnorm=2.36, loss_scale=2, train_wall=11, gb_free=2.8, wall=196943 2021-06-21 01:21:19 | INFO | train_inner | epoch 006: 2208 / 3002 loss=2.341, ppl=5.06, wps=5886.4, ups=0.09, wpb=64870, bsz=128, num_updates=17116, lr=9.98711e-05, gnorm=2.054, loss_scale=2, train_wall=11, gb_free=2.8, wall=196954 2021-06-21 01:21:30 | INFO | train_inner | epoch 006: 2209 / 3002 loss=2.633, ppl=6.2, wps=5967.8, ups=0.09, wpb=64814, bsz=128, num_updates=17117, lr=9.98711e-05, gnorm=2.586, loss_scale=2, train_wall=10, gb_free=2.8, wall=196965 2021-06-21 01:21:41 | INFO | train_inner | epoch 006: 2210 / 3002 loss=2.591, ppl=6.02, wps=5915.4, ups=0.09, wpb=64773, bsz=128, num_updates=17118, lr=9.9871e-05, gnorm=2.13, loss_scale=2, train_wall=11, gb_free=2.8, wall=196976 2021-06-21 01:21:52 | INFO | train_inner | epoch 006: 2211 / 3002 loss=2.471, ppl=5.54, wps=5840.4, ups=0.09, wpb=64823, bsz=128, num_updates=17119, lr=9.9871e-05, gnorm=2.196, loss_scale=2, train_wall=11, gb_free=2.8, wall=196987 2021-06-21 01:22:03 | INFO | train_inner | epoch 006: 2212 / 3002 loss=2.51, ppl=5.7, wps=5926.5, ups=0.09, wpb=64853, bsz=128, num_updates=17120, lr=9.9871e-05, gnorm=2.266, loss_scale=2, train_wall=10, gb_free=2.8, wall=196998 2021-06-21 01:22:14 | INFO | train_inner | epoch 006: 2213 / 3002 loss=2.462, ppl=5.51, wps=5929.5, ups=0.09, wpb=64825, bsz=128, num_updates=17121, lr=9.9871e-05, gnorm=2.258, loss_scale=2, train_wall=10, gb_free=2.8, wall=197009 2021-06-21 01:22:25 | INFO | train_inner | epoch 006: 2214 / 3002 loss=2.507, ppl=5.69, wps=5778, ups=0.09, wpb=64774, bsz=128, num_updates=17122, lr=9.9871e-05, gnorm=2.378, loss_scale=2, train_wall=11, gb_free=2.8, wall=197020 2021-06-21 01:22:37 | INFO | train_inner | epoch 006: 2215 / 3002 loss=2.609, ppl=6.1, wps=5844.3, ups=0.09, wpb=64845, bsz=128, num_updates=17123, lr=9.9871e-05, gnorm=2.379, loss_scale=2, train_wall=11, gb_free=2.8, wall=197031 2021-06-21 01:22:48 | INFO | train_inner | epoch 006: 2216 / 3002 loss=2.317, ppl=4.98, wps=5858.7, ups=0.09, wpb=64919, bsz=128, num_updates=17124, lr=9.9871e-05, gnorm=2.272, loss_scale=2, train_wall=11, gb_free=2.8, wall=197042 2021-06-21 01:22:59 | INFO | train_inner | epoch 006: 2217 / 3002 loss=2.491, ppl=5.62, wps=5865.4, ups=0.09, wpb=64858, bsz=128, num_updates=17125, lr=9.9871e-05, gnorm=2.043, loss_scale=2, train_wall=11, gb_free=2.8, wall=197053 2021-06-21 01:23:10 | INFO | train_inner | epoch 006: 2218 / 3002 loss=2.564, ppl=5.91, wps=5882.6, ups=0.09, wpb=64846, bsz=128, num_updates=17126, lr=9.9871e-05, gnorm=2.049, loss_scale=2, train_wall=11, gb_free=2.8, wall=197064 2021-06-21 01:23:21 | INFO | train_inner | epoch 006: 2219 / 3002 loss=2.669, ppl=6.36, wps=5762.6, ups=0.09, wpb=64830, bsz=128, num_updates=17127, lr=9.9871e-05, gnorm=2.492, loss_scale=2, train_wall=11, gb_free=2.8, wall=197075 2021-06-21 01:23:32 | INFO | train_inner | epoch 006: 2220 / 3002 loss=2.528, ppl=5.77, wps=5922.8, ups=0.09, wpb=64809, bsz=128, num_updates=17128, lr=9.9871e-05, gnorm=2.097, loss_scale=2, train_wall=10, gb_free=2.8, wall=197086 2021-06-21 01:23:43 | INFO | train_inner | epoch 006: 2221 / 3002 loss=2.509, ppl=5.69, wps=5856.6, ups=0.09, wpb=64818, bsz=128, num_updates=17129, lr=9.9871e-05, gnorm=2.017, loss_scale=2, train_wall=11, gb_free=2.8, wall=197097 2021-06-21 01:23:54 | INFO | train_inner | epoch 006: 2222 / 3002 loss=2.602, ppl=6.07, wps=5800.4, ups=0.09, wpb=64801, bsz=128, num_updates=17130, lr=9.98709e-05, gnorm=6.487, loss_scale=2, train_wall=11, gb_free=2.8, wall=197108 2021-06-21 01:24:05 | INFO | train_inner | epoch 006: 2223 / 3002 loss=2.521, ppl=5.74, wps=5847.4, ups=0.09, wpb=64813, bsz=128, num_updates=17131, lr=9.98709e-05, gnorm=2.098, loss_scale=2, train_wall=11, gb_free=2.8, wall=197120 2021-06-21 01:24:16 | INFO | train_inner | epoch 006: 2224 / 3002 loss=2.388, ppl=5.23, wps=5884.9, ups=0.09, wpb=64864, bsz=128, num_updates=17132, lr=9.98709e-05, gnorm=1.957, loss_scale=2, train_wall=11, gb_free=2.8, wall=197131 2021-06-21 01:24:27 | INFO | train_inner | epoch 006: 2225 / 3002 loss=2.574, ppl=5.95, wps=5956.4, ups=0.09, wpb=64709, bsz=128, num_updates=17133, lr=9.98709e-05, gnorm=1.978, loss_scale=2, train_wall=10, gb_free=2.8, wall=197141 2021-06-21 01:24:38 | INFO | train_inner | epoch 006: 2226 / 3002 loss=2.446, ppl=5.45, wps=5836.2, ups=0.09, wpb=64859, bsz=128, num_updates=17134, lr=9.98709e-05, gnorm=2.061, loss_scale=2, train_wall=11, gb_free=2.8, wall=197153 2021-06-21 01:24:49 | INFO | train_inner | epoch 006: 2227 / 3002 loss=2.476, ppl=5.57, wps=5812, ups=0.09, wpb=64839, bsz=128, num_updates=17135, lr=9.98709e-05, gnorm=2.05, loss_scale=2, train_wall=11, gb_free=2.8, wall=197164 2021-06-21 01:25:00 | INFO | train_inner | epoch 006: 2228 / 3002 loss=2.328, ppl=5.02, wps=5827.4, ups=0.09, wpb=64841, bsz=128, num_updates=17136, lr=9.98709e-05, gnorm=1.951, loss_scale=2, train_wall=11, gb_free=2.8, wall=197175 2021-06-21 01:25:11 | INFO | train_inner | epoch 006: 2229 / 3002 loss=2.547, ppl=5.84, wps=5918.4, ups=0.09, wpb=64807, bsz=128, num_updates=17137, lr=9.98709e-05, gnorm=2.31, loss_scale=2, train_wall=10, gb_free=2.8, wall=197186 2021-06-21 01:25:22 | INFO | train_inner | epoch 006: 2230 / 3002 loss=2.367, ppl=5.16, wps=5877.8, ups=0.09, wpb=64778, bsz=128, num_updates=17138, lr=9.98709e-05, gnorm=1.941, loss_scale=2, train_wall=11, gb_free=2.8, wall=197197 2021-06-21 01:25:34 | INFO | train_inner | epoch 006: 2231 / 3002 loss=2.4, ppl=5.28, wps=5833.9, ups=0.09, wpb=64788, bsz=128, num_updates=17139, lr=9.98709e-05, gnorm=2.087, loss_scale=2, train_wall=11, gb_free=2.8, wall=197208 2021-06-21 01:25:45 | INFO | train_inner | epoch 006: 2232 / 3002 loss=2.538, ppl=5.81, wps=5835.2, ups=0.09, wpb=64729, bsz=128, num_updates=17140, lr=9.98709e-05, gnorm=2.409, loss_scale=2, train_wall=11, gb_free=2.8, wall=197219 2021-06-21 01:25:56 | INFO | train_inner | epoch 006: 2233 / 3002 loss=2.428, ppl=5.38, wps=5771.3, ups=0.09, wpb=64856, bsz=128, num_updates=17141, lr=9.98709e-05, gnorm=1.958, loss_scale=2, train_wall=11, gb_free=2.8, wall=197230 2021-06-21 01:26:07 | INFO | train_inner | epoch 006: 2234 / 3002 loss=2.512, ppl=5.7, wps=5905.1, ups=0.09, wpb=64866, bsz=128, num_updates=17142, lr=9.98709e-05, gnorm=2.181, loss_scale=2, train_wall=11, gb_free=2.8, wall=197241 2021-06-21 01:26:18 | INFO | train_inner | epoch 006: 2235 / 3002 loss=2.463, ppl=5.51, wps=5816.7, ups=0.09, wpb=64844, bsz=128, num_updates=17143, lr=9.98708e-05, gnorm=1.974, loss_scale=2, train_wall=11, gb_free=2.8, wall=197252 2021-06-21 01:26:29 | INFO | train_inner | epoch 006: 2236 / 3002 loss=2.53, ppl=5.77, wps=5843.1, ups=0.09, wpb=64909, bsz=128, num_updates=17144, lr=9.98708e-05, gnorm=35.888, loss_scale=2, train_wall=11, gb_free=2.8, wall=197263 2021-06-21 01:26:40 | INFO | train_inner | epoch 006: 2237 / 3002 loss=2.458, ppl=5.49, wps=5822.6, ups=0.09, wpb=64927, bsz=128, num_updates=17145, lr=9.98708e-05, gnorm=2.193, loss_scale=2, train_wall=11, gb_free=2.8, wall=197275 2021-06-21 01:26:51 | INFO | train_inner | epoch 006: 2238 / 3002 loss=2.379, ppl=5.2, wps=5870.4, ups=0.09, wpb=64796, bsz=128, num_updates=17146, lr=9.98708e-05, gnorm=4.402, loss_scale=2, train_wall=11, gb_free=2.8, wall=197286 2021-06-21 01:27:02 | INFO | train_inner | epoch 006: 2239 / 3002 loss=2.494, ppl=5.63, wps=5808.5, ups=0.09, wpb=64836, bsz=128, num_updates=17147, lr=9.98708e-05, gnorm=2.901, loss_scale=2, train_wall=11, gb_free=2.8, wall=197297 2021-06-21 01:27:13 | INFO | train_inner | epoch 006: 2240 / 3002 loss=2.499, ppl=5.65, wps=5923.6, ups=0.09, wpb=64819, bsz=128, num_updates=17148, lr=9.98708e-05, gnorm=5.692, loss_scale=2, train_wall=10, gb_free=2.8, wall=197308 2021-06-21 01:27:25 | INFO | train_inner | epoch 006: 2241 / 3002 loss=2.36, ppl=5.14, wps=5847.1, ups=0.09, wpb=64842, bsz=128, num_updates=17149, lr=9.98708e-05, gnorm=3.733, loss_scale=2, train_wall=11, gb_free=2.8, wall=197319 2021-06-21 01:27:36 | INFO | train_inner | epoch 006: 2242 / 3002 loss=2.447, ppl=5.45, wps=5814.4, ups=0.09, wpb=64729, bsz=128, num_updates=17150, lr=9.98708e-05, gnorm=4.58, loss_scale=2, train_wall=11, gb_free=2.8, wall=197330 2021-06-21 01:27:47 | INFO | train_inner | epoch 006: 2243 / 3002 loss=2.48, ppl=5.58, wps=5879.2, ups=0.09, wpb=64821, bsz=128, num_updates=17151, lr=9.98708e-05, gnorm=2.017, loss_scale=2, train_wall=11, gb_free=2.8, wall=197341 2021-06-21 01:27:58 | INFO | train_inner | epoch 006: 2244 / 3002 loss=2.511, ppl=5.7, wps=5858.7, ups=0.09, wpb=64848, bsz=128, num_updates=17152, lr=9.98708e-05, gnorm=2.147, loss_scale=2, train_wall=11, gb_free=2.8, wall=197352 2021-06-21 01:28:09 | INFO | train_inner | epoch 006: 2245 / 3002 loss=2.564, ppl=5.91, wps=5764.6, ups=0.09, wpb=64840, bsz=128, num_updates=17153, lr=9.98708e-05, gnorm=2.068, loss_scale=2, train_wall=11, gb_free=2.8, wall=197363 2021-06-21 01:28:20 | INFO | train_inner | epoch 006: 2246 / 3002 loss=2.533, ppl=5.79, wps=5963.3, ups=0.09, wpb=64798, bsz=128, num_updates=17154, lr=9.98708e-05, gnorm=2.025, loss_scale=2, train_wall=10, gb_free=2.8, wall=197374 2021-06-21 01:28:31 | INFO | train_inner | epoch 006: 2247 / 3002 loss=2.396, ppl=5.26, wps=5912.6, ups=0.09, wpb=64820, bsz=128, num_updates=17155, lr=9.98707e-05, gnorm=2.862, loss_scale=2, train_wall=11, gb_free=2.8, wall=197385 2021-06-21 01:28:42 | INFO | train_inner | epoch 006: 2248 / 3002 loss=2.602, ppl=6.07, wps=5879.5, ups=0.09, wpb=64787, bsz=128, num_updates=17156, lr=9.98707e-05, gnorm=2.159, loss_scale=2, train_wall=11, gb_free=2.8, wall=197396 2021-06-21 01:28:53 | INFO | train_inner | epoch 006: 2249 / 3002 loss=2.612, ppl=6.11, wps=5887.2, ups=0.09, wpb=64744, bsz=128, num_updates=17157, lr=9.98707e-05, gnorm=2.014, loss_scale=2, train_wall=11, gb_free=2.8, wall=197407 2021-06-21 01:29:04 | INFO | train_inner | epoch 006: 2250 / 3002 loss=2.489, ppl=5.61, wps=5908.1, ups=0.09, wpb=64947, bsz=128, num_updates=17158, lr=9.98707e-05, gnorm=2.002, loss_scale=2, train_wall=11, gb_free=2.8, wall=197418 2021-06-21 01:29:15 | INFO | train_inner | epoch 006: 2251 / 3002 loss=2.337, ppl=5.05, wps=5901.5, ups=0.09, wpb=64835, bsz=128, num_updates=17159, lr=9.98707e-05, gnorm=2.143, loss_scale=2, train_wall=11, gb_free=2.8, wall=197429 2021-06-21 01:29:26 | INFO | train_inner | epoch 006: 2252 / 3002 loss=2.457, ppl=5.49, wps=5867.4, ups=0.09, wpb=64801, bsz=128, num_updates=17160, lr=9.98707e-05, gnorm=2.16, loss_scale=2, train_wall=11, gb_free=2.8, wall=197440 2021-06-21 01:29:37 | INFO | train_inner | epoch 006: 2253 / 3002 loss=2.497, ppl=5.65, wps=5915.4, ups=0.09, wpb=64833, bsz=128, num_updates=17161, lr=9.98707e-05, gnorm=1.995, loss_scale=2, train_wall=11, gb_free=2.8, wall=197451 2021-06-21 01:29:48 | INFO | train_inner | epoch 006: 2254 / 3002 loss=2.389, ppl=5.24, wps=5848.5, ups=0.09, wpb=64775, bsz=128, num_updates=17162, lr=9.98707e-05, gnorm=2.202, loss_scale=2, train_wall=11, gb_free=2.8, wall=197462 2021-06-21 01:29:59 | INFO | train_inner | epoch 006: 2255 / 3002 loss=2.456, ppl=5.49, wps=5841.4, ups=0.09, wpb=64830, bsz=128, num_updates=17163, lr=9.98707e-05, gnorm=2.417, loss_scale=2, train_wall=11, gb_free=2.8, wall=197473 2021-06-21 01:30:10 | INFO | train_inner | epoch 006: 2256 / 3002 loss=2.504, ppl=5.67, wps=5940.9, ups=0.09, wpb=64748, bsz=128, num_updates=17164, lr=9.98707e-05, gnorm=1.918, loss_scale=2, train_wall=10, gb_free=2.8, wall=197484 2021-06-21 01:30:21 | INFO | train_inner | epoch 006: 2257 / 3002 loss=2.465, ppl=5.52, wps=5712, ups=0.09, wpb=64835, bsz=128, num_updates=17165, lr=9.98707e-05, gnorm=2.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=197496 2021-06-21 01:30:32 | INFO | train_inner | epoch 006: 2258 / 3002 loss=2.395, ppl=5.26, wps=5774.1, ups=0.09, wpb=64850, bsz=128, num_updates=17166, lr=9.98707e-05, gnorm=1.902, loss_scale=2, train_wall=11, gb_free=2.8, wall=197507 2021-06-21 01:30:44 | INFO | train_inner | epoch 006: 2259 / 3002 loss=2.418, ppl=5.34, wps=5857.4, ups=0.09, wpb=64901, bsz=128, num_updates=17167, lr=9.98707e-05, gnorm=2.945, loss_scale=2, train_wall=11, gb_free=2.8, wall=197518 2021-06-21 01:30:54 | INFO | train_inner | epoch 006: 2260 / 3002 loss=2.671, ppl=6.37, wps=5983.7, ups=0.09, wpb=64787, bsz=128, num_updates=17168, lr=9.98706e-05, gnorm=2.101, loss_scale=2, train_wall=10, gb_free=2.8, wall=197529 2021-06-21 01:31:06 | INFO | train_inner | epoch 006: 2261 / 3002 loss=2.471, ppl=5.54, wps=5794.4, ups=0.09, wpb=64764, bsz=128, num_updates=17169, lr=9.98706e-05, gnorm=2.112, loss_scale=2, train_wall=11, gb_free=2.8, wall=197540 2021-06-21 01:31:17 | INFO | train_inner | epoch 006: 2262 / 3002 loss=2.676, ppl=6.39, wps=5779.6, ups=0.09, wpb=64872, bsz=128, num_updates=17170, lr=9.98706e-05, gnorm=2.01, loss_scale=2, train_wall=11, gb_free=2.8, wall=197551 2021-06-21 01:31:28 | INFO | train_inner | epoch 006: 2263 / 3002 loss=2.599, ppl=6.06, wps=5975, ups=0.09, wpb=64898, bsz=128, num_updates=17171, lr=9.98706e-05, gnorm=2.014, loss_scale=2, train_wall=10, gb_free=2.8, wall=197562 2021-06-21 01:31:39 | INFO | train_inner | epoch 006: 2264 / 3002 loss=2.416, ppl=5.34, wps=5855.9, ups=0.09, wpb=64786, bsz=128, num_updates=17172, lr=9.98706e-05, gnorm=2.033, loss_scale=2, train_wall=11, gb_free=2.8, wall=197573 2021-06-21 01:31:50 | INFO | train_inner | epoch 006: 2265 / 3002 loss=2.567, ppl=5.92, wps=5884.4, ups=0.09, wpb=64817, bsz=128, num_updates=17173, lr=9.98706e-05, gnorm=2.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=197584 2021-06-21 01:32:01 | INFO | train_inner | epoch 006: 2266 / 3002 loss=2.658, ppl=6.31, wps=5773.5, ups=0.09, wpb=64817, bsz=128, num_updates=17174, lr=9.98706e-05, gnorm=2.177, loss_scale=2, train_wall=11, gb_free=2.8, wall=197595 2021-06-21 01:32:12 | INFO | train_inner | epoch 006: 2267 / 3002 loss=2.363, ppl=5.14, wps=5835.1, ups=0.09, wpb=64804, bsz=128, num_updates=17175, lr=9.98706e-05, gnorm=1.975, loss_scale=2, train_wall=11, gb_free=2.8, wall=197606 2021-06-21 01:32:23 | INFO | train_inner | epoch 006: 2268 / 3002 loss=2.657, ppl=6.31, wps=5858.7, ups=0.09, wpb=64708, bsz=128, num_updates=17176, lr=9.98706e-05, gnorm=1.989, loss_scale=2, train_wall=11, gb_free=2.8, wall=197617 2021-06-21 01:32:34 | INFO | train_inner | epoch 006: 2269 / 3002 loss=2.453, ppl=5.47, wps=5813.4, ups=0.09, wpb=64809, bsz=128, num_updates=17177, lr=9.98706e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=197629 2021-06-21 01:32:45 | INFO | train_inner | epoch 006: 2270 / 3002 loss=2.446, ppl=5.45, wps=5897.5, ups=0.09, wpb=64890, bsz=128, num_updates=17178, lr=9.98706e-05, gnorm=2.037, loss_scale=2, train_wall=11, gb_free=2.8, wall=197640 2021-06-21 01:32:56 | INFO | train_inner | epoch 006: 2271 / 3002 loss=2.508, ppl=5.69, wps=5770.5, ups=0.09, wpb=64845, bsz=128, num_updates=17179, lr=9.98706e-05, gnorm=2.33, loss_scale=2, train_wall=11, gb_free=2.8, wall=197651 2021-06-21 01:33:08 | INFO | train_inner | epoch 006: 2272 / 3002 loss=2.479, ppl=5.58, wps=5888.3, ups=0.09, wpb=64911, bsz=128, num_updates=17180, lr=9.98705e-05, gnorm=1.873, loss_scale=2, train_wall=11, gb_free=2.8, wall=197662 2021-06-21 01:33:19 | INFO | train_inner | epoch 006: 2273 / 3002 loss=2.489, ppl=5.62, wps=5752, ups=0.09, wpb=64827, bsz=128, num_updates=17181, lr=9.98705e-05, gnorm=2.083, loss_scale=2, train_wall=11, gb_free=2.8, wall=197673 2021-06-21 01:33:30 | INFO | train_inner | epoch 006: 2274 / 3002 loss=2.425, ppl=5.37, wps=5862, ups=0.09, wpb=64857, bsz=128, num_updates=17182, lr=9.98705e-05, gnorm=2.126, loss_scale=2, train_wall=11, gb_free=2.8, wall=197684 2021-06-21 01:33:41 | INFO | train_inner | epoch 006: 2275 / 3002 loss=2.629, ppl=6.19, wps=5860, ups=0.09, wpb=64860, bsz=128, num_updates=17183, lr=9.98705e-05, gnorm=1.998, loss_scale=2, train_wall=11, gb_free=2.8, wall=197695 2021-06-21 01:33:52 | INFO | train_inner | epoch 006: 2276 / 3002 loss=2.416, ppl=5.34, wps=5872.4, ups=0.09, wpb=64850, bsz=128, num_updates=17184, lr=9.98705e-05, gnorm=1.95, loss_scale=2, train_wall=11, gb_free=2.8, wall=197706 2021-06-21 01:34:03 | INFO | train_inner | epoch 006: 2277 / 3002 loss=2.529, ppl=5.77, wps=5959.5, ups=0.09, wpb=64785, bsz=128, num_updates=17185, lr=9.98705e-05, gnorm=1.93, loss_scale=2, train_wall=10, gb_free=2.8, wall=197717 2021-06-21 01:34:14 | INFO | train_inner | epoch 006: 2278 / 3002 loss=2.431, ppl=5.39, wps=5766.5, ups=0.09, wpb=64823, bsz=128, num_updates=17186, lr=9.98705e-05, gnorm=2.27, loss_scale=2, train_wall=11, gb_free=2.8, wall=197728 2021-06-21 01:34:25 | INFO | train_inner | epoch 006: 2279 / 3002 loss=2.486, ppl=5.6, wps=5860.9, ups=0.09, wpb=64770, bsz=128, num_updates=17187, lr=9.98705e-05, gnorm=1.87, loss_scale=2, train_wall=11, gb_free=2.8, wall=197739 2021-06-21 01:34:36 | INFO | train_inner | epoch 006: 2280 / 3002 loss=2.427, ppl=5.38, wps=5879.5, ups=0.09, wpb=64853, bsz=128, num_updates=17188, lr=9.98705e-05, gnorm=1.986, loss_scale=2, train_wall=11, gb_free=2.8, wall=197751 2021-06-21 01:34:47 | INFO | train_inner | epoch 006: 2281 / 3002 loss=2.561, ppl=5.9, wps=5871.1, ups=0.09, wpb=64887, bsz=128, num_updates=17189, lr=9.98705e-05, gnorm=1.968, loss_scale=2, train_wall=11, gb_free=2.8, wall=197762 2021-06-21 01:34:58 | INFO | train_inner | epoch 006: 2282 / 3002 loss=2.299, ppl=4.92, wps=5769.7, ups=0.09, wpb=64842, bsz=128, num_updates=17190, lr=9.98705e-05, gnorm=1.974, loss_scale=2, train_wall=11, gb_free=2.8, wall=197773 2021-06-21 01:35:10 | INFO | train_inner | epoch 006: 2283 / 3002 loss=2.39, ppl=5.24, wps=5875.5, ups=0.09, wpb=64939, bsz=128, num_updates=17191, lr=9.98705e-05, gnorm=1.964, loss_scale=2, train_wall=11, gb_free=2.8, wall=197784 2021-06-21 01:35:21 | INFO | train_inner | epoch 006: 2284 / 3002 loss=2.521, ppl=5.74, wps=5818.3, ups=0.09, wpb=64865, bsz=128, num_updates=17192, lr=9.98705e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=197795 2021-06-21 01:35:31 | INFO | train_inner | epoch 006: 2285 / 3002 loss=2.485, ppl=5.6, wps=6016.1, ups=0.09, wpb=64856, bsz=128, num_updates=17193, lr=9.98704e-05, gnorm=2.687, loss_scale=2, train_wall=10, gb_free=2.8, wall=197806 2021-06-21 01:35:42 | INFO | train_inner | epoch 006: 2286 / 3002 loss=2.423, ppl=5.36, wps=5964.1, ups=0.09, wpb=64844, bsz=128, num_updates=17194, lr=9.98704e-05, gnorm=2.832, loss_scale=2, train_wall=10, gb_free=2.8, wall=197817 2021-06-21 01:35:54 | INFO | train_inner | epoch 006: 2287 / 3002 loss=2.443, ppl=5.44, wps=5755.7, ups=0.09, wpb=64818, bsz=128, num_updates=17195, lr=9.98704e-05, gnorm=1.867, loss_scale=2, train_wall=11, gb_free=2.8, wall=197828 2021-06-21 01:36:05 | INFO | train_inner | epoch 006: 2288 / 3002 loss=2.549, ppl=5.85, wps=5821.5, ups=0.09, wpb=64765, bsz=128, num_updates=17196, lr=9.98704e-05, gnorm=1.957, loss_scale=2, train_wall=11, gb_free=2.8, wall=197839 2021-06-21 01:36:16 | INFO | train_inner | epoch 006: 2289 / 3002 loss=2.546, ppl=5.84, wps=5991.5, ups=0.09, wpb=64871, bsz=128, num_updates=17197, lr=9.98704e-05, gnorm=1.948, loss_scale=2, train_wall=10, gb_free=2.8, wall=197850 2021-06-21 01:36:27 | INFO | train_inner | epoch 006: 2290 / 3002 loss=2.642, ppl=6.24, wps=5886.3, ups=0.09, wpb=64880, bsz=128, num_updates=17198, lr=9.98704e-05, gnorm=1.977, loss_scale=2, train_wall=11, gb_free=2.8, wall=197861 2021-06-21 01:36:38 | INFO | train_inner | epoch 006: 2291 / 3002 loss=2.531, ppl=5.78, wps=5841.8, ups=0.09, wpb=64829, bsz=128, num_updates=17199, lr=9.98704e-05, gnorm=2.037, loss_scale=2, train_wall=11, gb_free=2.8, wall=197872 2021-06-21 01:36:49 | INFO | train_inner | epoch 006: 2292 / 3002 loss=2.594, ppl=6.04, wps=5847.2, ups=0.09, wpb=64783, bsz=128, num_updates=17200, lr=9.98704e-05, gnorm=2.164, loss_scale=2, train_wall=11, gb_free=2.8, wall=197883 2021-06-21 01:37:00 | INFO | train_inner | epoch 006: 2293 / 3002 loss=2.401, ppl=5.28, wps=5779.2, ups=0.09, wpb=64813, bsz=128, num_updates=17201, lr=9.98704e-05, gnorm=1.943, loss_scale=2, train_wall=11, gb_free=2.8, wall=197894 2021-06-21 01:37:11 | INFO | train_inner | epoch 006: 2294 / 3002 loss=2.632, ppl=6.2, wps=5846.4, ups=0.09, wpb=64803, bsz=128, num_updates=17202, lr=9.98704e-05, gnorm=1.943, loss_scale=2, train_wall=11, gb_free=2.8, wall=197905 2021-06-21 01:37:22 | INFO | train_inner | epoch 006: 2295 / 3002 loss=2.57, ppl=5.94, wps=5810.2, ups=0.09, wpb=64863, bsz=128, num_updates=17203, lr=9.98704e-05, gnorm=1.948, loss_scale=2, train_wall=11, gb_free=2.8, wall=197917 2021-06-21 01:37:33 | INFO | train_inner | epoch 006: 2296 / 3002 loss=2.543, ppl=5.83, wps=5936.1, ups=0.09, wpb=64837, bsz=128, num_updates=17204, lr=9.98704e-05, gnorm=2.034, loss_scale=2, train_wall=10, gb_free=2.8, wall=197927 2021-06-21 01:37:44 | INFO | train_inner | epoch 006: 2297 / 3002 loss=2.497, ppl=5.64, wps=6025.7, ups=0.09, wpb=64814, bsz=128, num_updates=17205, lr=9.98703e-05, gnorm=3.519, loss_scale=2, train_wall=10, gb_free=2.8, wall=197938 2021-06-21 01:37:55 | INFO | train_inner | epoch 006: 2298 / 3002 loss=2.591, ppl=6.02, wps=5832.7, ups=0.09, wpb=64838, bsz=128, num_updates=17206, lr=9.98703e-05, gnorm=1.971, loss_scale=2, train_wall=11, gb_free=2.8, wall=197949 2021-06-21 01:38:06 | INFO | train_inner | epoch 006: 2299 / 3002 loss=2.29, ppl=4.89, wps=5906.7, ups=0.09, wpb=64900, bsz=128, num_updates=17207, lr=9.98703e-05, gnorm=1.881, loss_scale=2, train_wall=11, gb_free=2.8, wall=197960 2021-06-21 01:38:17 | INFO | train_inner | epoch 006: 2300 / 3002 loss=2.469, ppl=5.54, wps=5857.3, ups=0.09, wpb=64845, bsz=128, num_updates=17208, lr=9.98703e-05, gnorm=1.898, loss_scale=2, train_wall=11, gb_free=2.8, wall=197971 2021-06-21 01:38:28 | INFO | train_inner | epoch 006: 2301 / 3002 loss=2.444, ppl=5.44, wps=5800.9, ups=0.09, wpb=64910, bsz=128, num_updates=17209, lr=9.98703e-05, gnorm=2.023, loss_scale=2, train_wall=11, gb_free=2.8, wall=197983 2021-06-21 01:38:39 | INFO | train_inner | epoch 006: 2302 / 3002 loss=2.503, ppl=5.67, wps=5930.9, ups=0.09, wpb=64876, bsz=128, num_updates=17210, lr=9.98703e-05, gnorm=2.079, loss_scale=2, train_wall=10, gb_free=2.8, wall=197994 2021-06-21 01:38:50 | INFO | train_inner | epoch 006: 2303 / 3002 loss=2.337, ppl=5.05, wps=5895, ups=0.09, wpb=64797, bsz=128, num_updates=17211, lr=9.98703e-05, gnorm=1.948, loss_scale=2, train_wall=11, gb_free=2.8, wall=198005 2021-06-21 01:39:01 | INFO | train_inner | epoch 006: 2304 / 3002 loss=2.43, ppl=5.39, wps=5841.1, ups=0.09, wpb=64862, bsz=128, num_updates=17212, lr=9.98703e-05, gnorm=1.961, loss_scale=2, train_wall=11, gb_free=2.8, wall=198016 2021-06-21 01:39:12 | INFO | train_inner | epoch 006: 2305 / 3002 loss=2.509, ppl=5.69, wps=5820.5, ups=0.09, wpb=64810, bsz=128, num_updates=17213, lr=9.98703e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=198027 2021-06-21 01:39:23 | INFO | train_inner | epoch 006: 2306 / 3002 loss=2.565, ppl=5.92, wps=5893.2, ups=0.09, wpb=64764, bsz=128, num_updates=17214, lr=9.98703e-05, gnorm=1.959, loss_scale=2, train_wall=11, gb_free=2.8, wall=198038 2021-06-21 01:39:34 | INFO | train_inner | epoch 006: 2307 / 3002 loss=2.482, ppl=5.59, wps=5874.7, ups=0.09, wpb=64897, bsz=128, num_updates=17215, lr=9.98703e-05, gnorm=1.915, loss_scale=2, train_wall=11, gb_free=2.8, wall=198049 2021-06-21 01:39:46 | INFO | train_inner | epoch 006: 2308 / 3002 loss=2.468, ppl=5.53, wps=5749.3, ups=0.09, wpb=64797, bsz=128, num_updates=17216, lr=9.98703e-05, gnorm=1.884, loss_scale=2, train_wall=11, gb_free=2.8, wall=198060 2021-06-21 01:39:57 | INFO | train_inner | epoch 006: 2309 / 3002 loss=2.434, ppl=5.4, wps=5919.5, ups=0.09, wpb=64781, bsz=128, num_updates=17217, lr=9.98703e-05, gnorm=1.89, loss_scale=2, train_wall=10, gb_free=2.8, wall=198071 2021-06-21 01:40:08 | INFO | train_inner | epoch 006: 2310 / 3002 loss=2.477, ppl=5.57, wps=5841.3, ups=0.09, wpb=64873, bsz=128, num_updates=17218, lr=9.98702e-05, gnorm=1.942, loss_scale=2, train_wall=11, gb_free=2.8, wall=198082 2021-06-21 01:40:19 | INFO | train_inner | epoch 006: 2311 / 3002 loss=2.425, ppl=5.37, wps=5789.4, ups=0.09, wpb=64786, bsz=128, num_updates=17219, lr=9.98702e-05, gnorm=1.913, loss_scale=2, train_wall=11, gb_free=2.8, wall=198093 2021-06-21 01:40:30 | INFO | train_inner | epoch 006: 2312 / 3002 loss=2.444, ppl=5.44, wps=5863.4, ups=0.09, wpb=64862, bsz=128, num_updates=17220, lr=9.98702e-05, gnorm=2.045, loss_scale=2, train_wall=11, gb_free=2.8, wall=198104 2021-06-21 01:40:41 | INFO | train_inner | epoch 006: 2313 / 3002 loss=2.412, ppl=5.32, wps=6025.6, ups=0.09, wpb=64846, bsz=128, num_updates=17221, lr=9.98702e-05, gnorm=1.919, loss_scale=2, train_wall=10, gb_free=2.8, wall=198115 2021-06-21 01:40:52 | INFO | train_inner | epoch 006: 2314 / 3002 loss=2.476, ppl=5.57, wps=5891.5, ups=0.09, wpb=64814, bsz=128, num_updates=17222, lr=9.98702e-05, gnorm=1.934, loss_scale=2, train_wall=11, gb_free=2.8, wall=198126 2021-06-21 01:41:03 | INFO | train_inner | epoch 006: 2315 / 3002 loss=2.607, ppl=6.09, wps=5756.5, ups=0.09, wpb=64891, bsz=128, num_updates=17223, lr=9.98702e-05, gnorm=2.273, loss_scale=2, train_wall=11, gb_free=2.8, wall=198137 2021-06-21 01:41:14 | INFO | train_inner | epoch 006: 2316 / 3002 loss=2.479, ppl=5.58, wps=5813.5, ups=0.09, wpb=64847, bsz=128, num_updates=17224, lr=9.98702e-05, gnorm=19.984, loss_scale=2, train_wall=11, gb_free=2.8, wall=198149 2021-06-21 01:41:25 | INFO | train_inner | epoch 006: 2317 / 3002 loss=2.75, ppl=6.73, wps=5802.3, ups=0.09, wpb=64799, bsz=128, num_updates=17225, lr=9.98702e-05, gnorm=1.991, loss_scale=2, train_wall=11, gb_free=2.8, wall=198160 2021-06-21 01:41:36 | INFO | train_inner | epoch 006: 2318 / 3002 loss=2.434, ppl=5.41, wps=5853.4, ups=0.09, wpb=64850, bsz=128, num_updates=17226, lr=9.98702e-05, gnorm=1.928, loss_scale=2, train_wall=11, gb_free=2.8, wall=198171 2021-06-21 01:41:47 | INFO | train_inner | epoch 006: 2319 / 3002 loss=2.574, ppl=5.96, wps=5933.6, ups=0.09, wpb=64819, bsz=128, num_updates=17227, lr=9.98702e-05, gnorm=1.972, loss_scale=2, train_wall=10, gb_free=2.8, wall=198182 2021-06-21 01:41:58 | INFO | train_inner | epoch 006: 2320 / 3002 loss=2.566, ppl=5.92, wps=5886.1, ups=0.09, wpb=64786, bsz=128, num_updates=17228, lr=9.98702e-05, gnorm=2.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=198193 2021-06-21 01:42:09 | INFO | train_inner | epoch 006: 2321 / 3002 loss=2.483, ppl=5.59, wps=5870.1, ups=0.09, wpb=64782, bsz=128, num_updates=17229, lr=9.98702e-05, gnorm=2.013, loss_scale=2, train_wall=11, gb_free=2.8, wall=198204 2021-06-21 01:42:21 | INFO | train_inner | epoch 006: 2322 / 3002 loss=2.546, ppl=5.84, wps=5828.3, ups=0.09, wpb=64755, bsz=128, num_updates=17230, lr=9.98701e-05, gnorm=2.077, loss_scale=2, train_wall=11, gb_free=2.8, wall=198215 2021-06-21 01:42:32 | INFO | train_inner | epoch 006: 2323 / 3002 loss=2.403, ppl=5.29, wps=5852.3, ups=0.09, wpb=64893, bsz=128, num_updates=17231, lr=9.98701e-05, gnorm=1.987, loss_scale=2, train_wall=11, gb_free=2.8, wall=198226 2021-06-21 01:42:43 | INFO | train_inner | epoch 006: 2324 / 3002 loss=2.408, ppl=5.31, wps=5835.3, ups=0.09, wpb=64906, bsz=128, num_updates=17232, lr=9.98701e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=198237 2021-06-21 01:42:54 | INFO | train_inner | epoch 006: 2325 / 3002 loss=2.468, ppl=5.53, wps=5780.6, ups=0.09, wpb=64745, bsz=128, num_updates=17233, lr=9.98701e-05, gnorm=2.043, loss_scale=2, train_wall=11, gb_free=2.8, wall=198248 2021-06-21 01:43:05 | INFO | train_inner | epoch 006: 2326 / 3002 loss=2.48, ppl=5.58, wps=5842.1, ups=0.09, wpb=64833, bsz=128, num_updates=17234, lr=9.98701e-05, gnorm=2.037, loss_scale=2, train_wall=11, gb_free=2.8, wall=198259 2021-06-21 01:43:16 | INFO | train_inner | epoch 006: 2327 / 3002 loss=2.352, ppl=5.11, wps=5952, ups=0.09, wpb=64963, bsz=128, num_updates=17235, lr=9.98701e-05, gnorm=1.953, loss_scale=4, train_wall=10, gb_free=2.8, wall=198270 2021-06-21 01:43:27 | INFO | train_inner | epoch 006: 2328 / 3002 loss=2.425, ppl=5.37, wps=5881.6, ups=0.09, wpb=64854, bsz=128, num_updates=17236, lr=9.98701e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=198281 2021-06-21 01:43:38 | INFO | train_inner | epoch 006: 2329 / 3002 loss=2.383, ppl=5.22, wps=5684.7, ups=0.09, wpb=64777, bsz=128, num_updates=17237, lr=9.98701e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=198293 2021-06-21 01:43:49 | INFO | train_inner | epoch 006: 2330 / 3002 loss=2.314, ppl=4.97, wps=5874.1, ups=0.09, wpb=64845, bsz=128, num_updates=17238, lr=9.98701e-05, gnorm=2.007, loss_scale=4, train_wall=11, gb_free=2.8, wall=198304 2021-06-21 01:44:01 | INFO | train_inner | epoch 006: 2331 / 3002 loss=2.577, ppl=5.97, wps=5779.7, ups=0.09, wpb=64864, bsz=128, num_updates=17239, lr=9.98701e-05, gnorm=2.332, loss_scale=4, train_wall=11, gb_free=2.8, wall=198315 2021-06-21 01:44:12 | INFO | train_inner | epoch 006: 2332 / 3002 loss=2.474, ppl=5.56, wps=5896.2, ups=0.09, wpb=64856, bsz=128, num_updates=17240, lr=9.98701e-05, gnorm=1.947, loss_scale=4, train_wall=11, gb_free=2.8, wall=198326 2021-06-21 01:44:23 | INFO | train_inner | epoch 006: 2333 / 3002 loss=2.365, ppl=5.15, wps=5817.7, ups=0.09, wpb=64882, bsz=128, num_updates=17241, lr=9.98701e-05, gnorm=1.914, loss_scale=4, train_wall=11, gb_free=2.8, wall=198337 2021-06-21 01:44:34 | INFO | train_inner | epoch 006: 2334 / 3002 loss=2.415, ppl=5.33, wps=5708.4, ups=0.09, wpb=64814, bsz=128, num_updates=17242, lr=9.98701e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=198348 2021-06-21 01:44:45 | INFO | train_inner | epoch 006: 2335 / 3002 loss=2.7, ppl=6.5, wps=5867.6, ups=0.09, wpb=64817, bsz=128, num_updates=17243, lr=9.987e-05, gnorm=1.969, loss_scale=4, train_wall=11, gb_free=2.8, wall=198360 2021-06-21 01:44:56 | INFO | train_inner | epoch 006: 2336 / 3002 loss=2.501, ppl=5.66, wps=5948.3, ups=0.09, wpb=64795, bsz=128, num_updates=17244, lr=9.987e-05, gnorm=2.013, loss_scale=4, train_wall=10, gb_free=2.8, wall=198370 2021-06-21 01:45:07 | INFO | train_inner | epoch 006: 2337 / 3002 loss=2.31, ppl=4.96, wps=5938.1, ups=0.09, wpb=64891, bsz=128, num_updates=17245, lr=9.987e-05, gnorm=1.938, loss_scale=4, train_wall=10, gb_free=2.8, wall=198381 2021-06-21 01:45:18 | INFO | train_inner | epoch 006: 2338 / 3002 loss=2.442, ppl=5.43, wps=5849.7, ups=0.09, wpb=64859, bsz=128, num_updates=17246, lr=9.987e-05, gnorm=2.336, loss_scale=4, train_wall=11, gb_free=2.8, wall=198392 2021-06-21 01:45:29 | INFO | train_inner | epoch 006: 2339 / 3002 loss=2.518, ppl=5.73, wps=6096.6, ups=0.09, wpb=64951, bsz=128, num_updates=17247, lr=9.987e-05, gnorm=1.98, loss_scale=4, train_wall=10, gb_free=2.8, wall=198403 2021-06-21 01:45:40 | INFO | train_inner | epoch 006: 2340 / 3002 loss=2.379, ppl=5.2, wps=5799.5, ups=0.09, wpb=64857, bsz=128, num_updates=17248, lr=9.987e-05, gnorm=2.443, loss_scale=4, train_wall=11, gb_free=2.8, wall=198414 2021-06-21 01:45:51 | INFO | train_inner | epoch 006: 2341 / 3002 loss=2.478, ppl=5.57, wps=5856.8, ups=0.09, wpb=64802, bsz=128, num_updates=17249, lr=9.987e-05, gnorm=5.134, loss_scale=4, train_wall=11, gb_free=2.8, wall=198425 2021-06-21 01:46:03 | INFO | train_inner | epoch 006: 2342 / 3002 loss=2.622, ppl=6.16, wps=5633.5, ups=0.09, wpb=64815, bsz=128, num_updates=17250, lr=9.987e-05, gnorm=2.071, loss_scale=4, train_wall=11, gb_free=2.8, wall=198437 2021-06-21 01:46:14 | INFO | train_inner | epoch 006: 2343 / 3002 loss=2.432, ppl=5.4, wps=5871.9, ups=0.09, wpb=64847, bsz=128, num_updates=17251, lr=9.987e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=198448 2021-06-21 01:46:25 | INFO | train_inner | epoch 006: 2344 / 3002 loss=2.537, ppl=5.8, wps=5867.4, ups=0.09, wpb=64928, bsz=128, num_updates=17252, lr=9.987e-05, gnorm=1.89, loss_scale=4, train_wall=11, gb_free=2.8, wall=198459 2021-06-21 01:46:36 | INFO | train_inner | epoch 006: 2345 / 3002 loss=2.474, ppl=5.56, wps=5882.3, ups=0.09, wpb=64848, bsz=128, num_updates=17253, lr=9.987e-05, gnorm=1.933, loss_scale=4, train_wall=11, gb_free=2.8, wall=198470 2021-06-21 01:46:47 | INFO | train_inner | epoch 006: 2346 / 3002 loss=2.393, ppl=5.25, wps=5845.1, ups=0.09, wpb=64851, bsz=128, num_updates=17254, lr=9.987e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=198481 2021-06-21 01:46:57 | INFO | train_inner | epoch 006: 2347 / 3002 loss=2.454, ppl=5.48, wps=6092.9, ups=0.09, wpb=64831, bsz=128, num_updates=17255, lr=9.98699e-05, gnorm=1.921, loss_scale=4, train_wall=10, gb_free=2.8, wall=198492 2021-06-21 01:47:09 | INFO | train_inner | epoch 006: 2348 / 3002 loss=2.533, ppl=5.79, wps=5784.8, ups=0.09, wpb=64782, bsz=128, num_updates=17256, lr=9.98699e-05, gnorm=1.984, loss_scale=4, train_wall=11, gb_free=2.8, wall=198503 2021-06-21 01:47:20 | INFO | train_inner | epoch 006: 2349 / 3002 loss=2.399, ppl=5.27, wps=5782.1, ups=0.09, wpb=64752, bsz=128, num_updates=17257, lr=9.98699e-05, gnorm=1.969, loss_scale=4, train_wall=11, gb_free=2.8, wall=198514 2021-06-21 01:47:31 | INFO | train_inner | epoch 006: 2350 / 3002 loss=2.493, ppl=5.63, wps=5811.7, ups=0.09, wpb=64797, bsz=128, num_updates=17258, lr=9.98699e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=198525 2021-06-21 01:47:42 | INFO | train_inner | epoch 006: 2351 / 3002 loss=2.407, ppl=5.3, wps=5839.9, ups=0.09, wpb=64829, bsz=128, num_updates=17259, lr=9.98699e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=198536 2021-06-21 01:47:53 | INFO | train_inner | epoch 006: 2352 / 3002 loss=2.519, ppl=5.73, wps=5880.4, ups=0.09, wpb=64867, bsz=128, num_updates=17260, lr=9.98699e-05, gnorm=2.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=198547 2021-06-21 01:48:04 | INFO | train_inner | epoch 006: 2353 / 3002 loss=2.356, ppl=5.12, wps=5987.9, ups=0.09, wpb=64786, bsz=128, num_updates=17261, lr=9.98699e-05, gnorm=1.965, loss_scale=4, train_wall=10, gb_free=2.8, wall=198558 2021-06-21 01:48:15 | INFO | train_inner | epoch 006: 2354 / 3002 loss=2.369, ppl=5.17, wps=5831.4, ups=0.09, wpb=64815, bsz=128, num_updates=17262, lr=9.98699e-05, gnorm=1.938, loss_scale=4, train_wall=11, gb_free=2.8, wall=198569 2021-06-21 01:48:26 | INFO | train_inner | epoch 006: 2355 / 3002 loss=2.529, ppl=5.77, wps=5953.7, ups=0.09, wpb=64842, bsz=128, num_updates=17263, lr=9.98699e-05, gnorm=2.036, loss_scale=4, train_wall=10, gb_free=2.8, wall=198580 2021-06-21 01:48:37 | INFO | train_inner | epoch 006: 2356 / 3002 loss=2.385, ppl=5.22, wps=5925.9, ups=0.09, wpb=64823, bsz=128, num_updates=17264, lr=9.98699e-05, gnorm=2.027, loss_scale=4, train_wall=10, gb_free=2.8, wall=198591 2021-06-21 01:48:48 | INFO | train_inner | epoch 006: 2357 / 3002 loss=2.5, ppl=5.66, wps=6059.4, ups=0.09, wpb=64852, bsz=128, num_updates=17265, lr=9.98699e-05, gnorm=1.939, loss_scale=4, train_wall=10, gb_free=2.8, wall=198602 2021-06-21 01:48:59 | INFO | train_inner | epoch 006: 2358 / 3002 loss=2.456, ppl=5.49, wps=5826, ups=0.09, wpb=64786, bsz=128, num_updates=17266, lr=9.98699e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=198613 2021-06-21 01:49:10 | INFO | train_inner | epoch 006: 2359 / 3002 loss=2.437, ppl=5.41, wps=5886.5, ups=0.09, wpb=64835, bsz=128, num_updates=17267, lr=9.98699e-05, gnorm=1.882, loss_scale=4, train_wall=11, gb_free=2.8, wall=198624 2021-06-21 01:49:21 | INFO | train_inner | epoch 006: 2360 / 3002 loss=2.527, ppl=5.76, wps=5879.4, ups=0.09, wpb=64816, bsz=128, num_updates=17268, lr=9.98698e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=198635 2021-06-21 01:49:32 | INFO | train_inner | epoch 006: 2361 / 3002 loss=2.513, ppl=5.71, wps=5817.8, ups=0.09, wpb=64837, bsz=128, num_updates=17269, lr=9.98698e-05, gnorm=1.972, loss_scale=4, train_wall=11, gb_free=2.8, wall=198646 2021-06-21 01:49:43 | INFO | train_inner | epoch 006: 2362 / 3002 loss=2.57, ppl=5.94, wps=5777.1, ups=0.09, wpb=64758, bsz=128, num_updates=17270, lr=9.98698e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=198657 2021-06-21 01:49:54 | INFO | train_inner | epoch 006: 2363 / 3002 loss=2.393, ppl=5.25, wps=5931.1, ups=0.09, wpb=64789, bsz=128, num_updates=17271, lr=9.98698e-05, gnorm=2.08, loss_scale=4, train_wall=10, gb_free=2.8, wall=198668 2021-06-21 01:50:05 | INFO | train_inner | epoch 006: 2364 / 3002 loss=2.39, ppl=5.24, wps=5832.7, ups=0.09, wpb=64877, bsz=128, num_updates=17272, lr=9.98698e-05, gnorm=2.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=198679 2021-06-21 01:50:16 | INFO | train_inner | epoch 006: 2365 / 3002 loss=2.453, ppl=5.47, wps=5834.4, ups=0.09, wpb=64749, bsz=128, num_updates=17273, lr=9.98698e-05, gnorm=1.975, loss_scale=4, train_wall=11, gb_free=2.8, wall=198691 2021-06-21 01:50:27 | INFO | train_inner | epoch 006: 2366 / 3002 loss=2.43, ppl=5.39, wps=5816.7, ups=0.09, wpb=64796, bsz=128, num_updates=17274, lr=9.98698e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=198702 2021-06-21 01:50:38 | INFO | train_inner | epoch 006: 2367 / 3002 loss=2.558, ppl=5.89, wps=5806.6, ups=0.09, wpb=64790, bsz=128, num_updates=17275, lr=9.98698e-05, gnorm=1.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=198713 2021-06-21 01:50:50 | INFO | train_inner | epoch 006: 2368 / 3002 loss=2.518, ppl=5.73, wps=5846.9, ups=0.09, wpb=64867, bsz=128, num_updates=17276, lr=9.98698e-05, gnorm=1.988, loss_scale=4, train_wall=11, gb_free=2.8, wall=198724 2021-06-21 01:51:01 | INFO | train_inner | epoch 006: 2369 / 3002 loss=2.72, ppl=6.59, wps=5809.7, ups=0.09, wpb=64834, bsz=128, num_updates=17277, lr=9.98698e-05, gnorm=2.095, loss_scale=4, train_wall=11, gb_free=2.8, wall=198735 2021-06-21 01:51:12 | INFO | train_inner | epoch 006: 2370 / 3002 loss=2.656, ppl=6.3, wps=5877.9, ups=0.09, wpb=64929, bsz=128, num_updates=17278, lr=9.98698e-05, gnorm=1.932, loss_scale=4, train_wall=11, gb_free=2.8, wall=198746 2021-06-21 01:51:23 | INFO | train_inner | epoch 006: 2371 / 3002 loss=2.438, ppl=5.42, wps=5903.7, ups=0.09, wpb=64950, bsz=128, num_updates=17279, lr=9.98698e-05, gnorm=1.969, loss_scale=4, train_wall=11, gb_free=2.8, wall=198757 2021-06-21 01:51:34 | INFO | train_inner | epoch 006: 2372 / 3002 loss=2.525, ppl=5.76, wps=5907.5, ups=0.09, wpb=64812, bsz=128, num_updates=17280, lr=9.98697e-05, gnorm=1.945, loss_scale=4, train_wall=11, gb_free=2.8, wall=198768 2021-06-21 01:51:45 | INFO | train_inner | epoch 006: 2373 / 3002 loss=2.549, ppl=5.85, wps=6002.9, ups=0.09, wpb=64813, bsz=128, num_updates=17281, lr=9.98697e-05, gnorm=1.904, loss_scale=4, train_wall=10, gb_free=2.8, wall=198779 2021-06-21 01:51:56 | INFO | train_inner | epoch 006: 2374 / 3002 loss=2.528, ppl=5.77, wps=5924.7, ups=0.09, wpb=64843, bsz=128, num_updates=17282, lr=9.98697e-05, gnorm=1.925, loss_scale=4, train_wall=11, gb_free=2.8, wall=198790 2021-06-21 01:52:06 | INFO | train_inner | epoch 006: 2375 / 3002 loss=2.409, ppl=5.31, wps=5909.3, ups=0.09, wpb=64792, bsz=128, num_updates=17283, lr=9.98697e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=198801 2021-06-21 01:52:18 | INFO | train_inner | epoch 006: 2376 / 3002 loss=2.553, ppl=5.87, wps=5827.9, ups=0.09, wpb=64786, bsz=128, num_updates=17284, lr=9.98697e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=198812 2021-06-21 01:52:29 | INFO | train_inner | epoch 006: 2377 / 3002 loss=2.587, ppl=6.01, wps=5795.6, ups=0.09, wpb=64912, bsz=128, num_updates=17285, lr=9.98697e-05, gnorm=2.023, loss_scale=4, train_wall=11, gb_free=2.8, wall=198823 2021-06-21 01:52:40 | INFO | train_inner | epoch 006: 2378 / 3002 loss=2.518, ppl=5.73, wps=5771.7, ups=0.09, wpb=64753, bsz=128, num_updates=17286, lr=9.98697e-05, gnorm=1.925, loss_scale=4, train_wall=11, gb_free=2.8, wall=198834 2021-06-21 01:52:51 | INFO | train_inner | epoch 006: 2379 / 3002 loss=2.493, ppl=5.63, wps=5883.3, ups=0.09, wpb=64932, bsz=128, num_updates=17287, lr=9.98697e-05, gnorm=2.014, loss_scale=4, train_wall=11, gb_free=2.8, wall=198845 2021-06-21 01:53:02 | INFO | train_inner | epoch 006: 2380 / 3002 loss=2.441, ppl=5.43, wps=5927, ups=0.09, wpb=64881, bsz=128, num_updates=17288, lr=9.98697e-05, gnorm=3.029, loss_scale=4, train_wall=11, gb_free=2.8, wall=198856 2021-06-21 01:53:13 | INFO | train_inner | epoch 006: 2381 / 3002 loss=2.654, ppl=6.3, wps=5914, ups=0.09, wpb=64813, bsz=128, num_updates=17289, lr=9.98697e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=198867 2021-06-21 01:53:24 | INFO | train_inner | epoch 006: 2382 / 3002 loss=2.388, ppl=5.24, wps=5772.7, ups=0.09, wpb=64901, bsz=128, num_updates=17290, lr=9.98697e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=198879 2021-06-21 01:53:35 | INFO | train_inner | epoch 006: 2383 / 3002 loss=2.375, ppl=5.19, wps=5875.4, ups=0.09, wpb=64950, bsz=128, num_updates=17291, lr=9.98697e-05, gnorm=1.937, loss_scale=4, train_wall=11, gb_free=2.8, wall=198890 2021-06-21 01:53:46 | INFO | train_inner | epoch 006: 2384 / 3002 loss=2.528, ppl=5.77, wps=5913.7, ups=0.09, wpb=64805, bsz=128, num_updates=17292, lr=9.98697e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=198901 2021-06-21 01:53:57 | INFO | train_inner | epoch 006: 2385 / 3002 loss=2.444, ppl=5.44, wps=5922.2, ups=0.09, wpb=64893, bsz=128, num_updates=17293, lr=9.98696e-05, gnorm=2.023, loss_scale=4, train_wall=10, gb_free=2.8, wall=198912 2021-06-21 01:54:08 | INFO | train_inner | epoch 006: 2386 / 3002 loss=2.422, ppl=5.36, wps=5801.4, ups=0.09, wpb=64867, bsz=128, num_updates=17294, lr=9.98696e-05, gnorm=1.907, loss_scale=4, train_wall=11, gb_free=2.8, wall=198923 2021-06-21 01:54:19 | INFO | train_inner | epoch 006: 2387 / 3002 loss=2.379, ppl=5.2, wps=5838.6, ups=0.09, wpb=64816, bsz=128, num_updates=17295, lr=9.98696e-05, gnorm=1.902, loss_scale=4, train_wall=11, gb_free=2.8, wall=198934 2021-06-21 01:54:31 | INFO | train_inner | epoch 006: 2388 / 3002 loss=2.526, ppl=5.76, wps=5755, ups=0.09, wpb=64748, bsz=128, num_updates=17296, lr=9.98696e-05, gnorm=2.064, loss_scale=4, train_wall=11, gb_free=2.8, wall=198945 2021-06-21 01:54:42 | INFO | train_inner | epoch 006: 2389 / 3002 loss=2.605, ppl=6.08, wps=5872.7, ups=0.09, wpb=64811, bsz=128, num_updates=17297, lr=9.98696e-05, gnorm=2.032, loss_scale=4, train_wall=11, gb_free=2.8, wall=198956 2021-06-21 01:54:53 | INFO | train_inner | epoch 006: 2390 / 3002 loss=2.402, ppl=5.29, wps=5846.8, ups=0.09, wpb=64858, bsz=128, num_updates=17298, lr=9.98696e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=198967 2021-06-21 01:55:04 | INFO | train_inner | epoch 006: 2391 / 3002 loss=2.463, ppl=5.51, wps=5770.7, ups=0.09, wpb=64792, bsz=128, num_updates=17299, lr=9.98696e-05, gnorm=1.934, loss_scale=4, train_wall=11, gb_free=2.8, wall=198978 2021-06-21 01:55:15 | INFO | train_inner | epoch 006: 2392 / 3002 loss=2.594, ppl=6.04, wps=5907.5, ups=0.09, wpb=64864, bsz=128, num_updates=17300, lr=9.98696e-05, gnorm=1.861, loss_scale=4, train_wall=11, gb_free=2.8, wall=198989 2021-06-21 01:55:26 | INFO | train_inner | epoch 006: 2393 / 3002 loss=2.491, ppl=5.62, wps=5959.4, ups=0.09, wpb=64847, bsz=128, num_updates=17301, lr=9.98696e-05, gnorm=1.946, loss_scale=4, train_wall=10, gb_free=2.8, wall=199000 2021-06-21 01:55:37 | INFO | train_inner | epoch 006: 2394 / 3002 loss=2.52, ppl=5.74, wps=5875.2, ups=0.09, wpb=64791, bsz=128, num_updates=17302, lr=9.98696e-05, gnorm=2.128, loss_scale=4, train_wall=11, gb_free=2.8, wall=199011 2021-06-21 01:55:48 | INFO | train_inner | epoch 006: 2395 / 3002 loss=2.593, ppl=6.03, wps=5891.5, ups=0.09, wpb=64804, bsz=128, num_updates=17303, lr=9.98696e-05, gnorm=2.331, loss_scale=4, train_wall=11, gb_free=2.8, wall=199022 2021-06-21 01:55:59 | INFO | train_inner | epoch 006: 2396 / 3002 loss=2.354, ppl=5.11, wps=5870.9, ups=0.09, wpb=64831, bsz=128, num_updates=17304, lr=9.98696e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=199033 2021-06-21 01:56:10 | INFO | train_inner | epoch 006: 2397 / 3002 loss=2.546, ppl=5.84, wps=5969.9, ups=0.09, wpb=64859, bsz=128, num_updates=17305, lr=9.98695e-05, gnorm=1.899, loss_scale=4, train_wall=10, gb_free=2.8, wall=199044 2021-06-21 01:56:21 | INFO | train_inner | epoch 006: 2398 / 3002 loss=2.345, ppl=5.08, wps=5864, ups=0.09, wpb=64793, bsz=128, num_updates=17306, lr=9.98695e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=199055 2021-06-21 01:56:32 | INFO | train_inner | epoch 006: 2399 / 3002 loss=2.353, ppl=5.11, wps=5879.5, ups=0.09, wpb=64833, bsz=128, num_updates=17307, lr=9.98695e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=199066 2021-06-21 01:56:43 | INFO | train_inner | epoch 006: 2400 / 3002 loss=2.443, ppl=5.44, wps=5887.8, ups=0.09, wpb=64850, bsz=128, num_updates=17308, lr=9.98695e-05, gnorm=1.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=199077 2021-06-21 01:56:54 | INFO | train_inner | epoch 006: 2401 / 3002 loss=2.435, ppl=5.41, wps=5953, ups=0.09, wpb=64741, bsz=128, num_updates=17309, lr=9.98695e-05, gnorm=1.942, loss_scale=4, train_wall=10, gb_free=2.8, wall=199088 2021-06-21 01:57:05 | INFO | train_inner | epoch 006: 2402 / 3002 loss=2.408, ppl=5.31, wps=5930.1, ups=0.09, wpb=64894, bsz=128, num_updates=17310, lr=9.98695e-05, gnorm=1.965, loss_scale=4, train_wall=10, gb_free=2.8, wall=199099 2021-06-21 01:57:16 | INFO | train_inner | epoch 006: 2403 / 3002 loss=2.244, ppl=4.74, wps=5756, ups=0.09, wpb=64773, bsz=128, num_updates=17311, lr=9.98695e-05, gnorm=1.921, loss_scale=4, train_wall=11, gb_free=2.8, wall=199110 2021-06-21 01:57:27 | INFO | train_inner | epoch 006: 2404 / 3002 loss=2.571, ppl=5.94, wps=5887.1, ups=0.09, wpb=64681, bsz=128, num_updates=17312, lr=9.98695e-05, gnorm=1.933, loss_scale=4, train_wall=11, gb_free=2.8, wall=199121 2021-06-21 01:57:38 | INFO | train_inner | epoch 006: 2405 / 3002 loss=2.463, ppl=5.51, wps=5956.5, ups=0.09, wpb=64870, bsz=128, num_updates=17313, lr=9.98695e-05, gnorm=2.178, loss_scale=4, train_wall=10, gb_free=2.8, wall=199132 2021-06-21 01:57:49 | INFO | train_inner | epoch 006: 2406 / 3002 loss=2.455, ppl=5.48, wps=5790.6, ups=0.09, wpb=64795, bsz=128, num_updates=17314, lr=9.98695e-05, gnorm=2.761, loss_scale=4, train_wall=11, gb_free=2.8, wall=199143 2021-06-21 01:58:00 | INFO | train_inner | epoch 006: 2407 / 3002 loss=2.549, ppl=5.85, wps=5834.7, ups=0.09, wpb=64809, bsz=128, num_updates=17315, lr=9.98695e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=199155 2021-06-21 01:58:11 | INFO | train_inner | epoch 006: 2408 / 3002 loss=2.49, ppl=5.62, wps=5866, ups=0.09, wpb=64773, bsz=128, num_updates=17316, lr=9.98695e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=199166 2021-06-21 01:58:22 | INFO | train_inner | epoch 006: 2409 / 3002 loss=2.408, ppl=5.31, wps=5920, ups=0.09, wpb=64804, bsz=128, num_updates=17317, lr=9.98695e-05, gnorm=1.945, loss_scale=4, train_wall=10, gb_free=2.8, wall=199177 2021-06-21 01:58:33 | INFO | train_inner | epoch 006: 2410 / 3002 loss=2.348, ppl=5.09, wps=5820.6, ups=0.09, wpb=64761, bsz=128, num_updates=17318, lr=9.98694e-05, gnorm=14.683, loss_scale=4, train_wall=11, gb_free=2.8, wall=199188 2021-06-21 01:58:44 | INFO | train_inner | epoch 006: 2411 / 3002 loss=2.464, ppl=5.52, wps=5819.3, ups=0.09, wpb=64873, bsz=128, num_updates=17319, lr=9.98694e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=199199 2021-06-21 01:58:56 | INFO | train_inner | epoch 006: 2412 / 3002 loss=2.386, ppl=5.23, wps=5821.9, ups=0.09, wpb=64771, bsz=128, num_updates=17320, lr=9.98694e-05, gnorm=1.976, loss_scale=4, train_wall=11, gb_free=2.8, wall=199210 2021-06-21 01:59:07 | INFO | train_inner | epoch 006: 2413 / 3002 loss=2.507, ppl=5.68, wps=5833.7, ups=0.09, wpb=64811, bsz=128, num_updates=17321, lr=9.98694e-05, gnorm=1.931, loss_scale=4, train_wall=11, gb_free=2.8, wall=199221 2021-06-21 01:59:18 | INFO | train_inner | epoch 006: 2414 / 3002 loss=2.537, ppl=5.8, wps=5817.8, ups=0.09, wpb=64835, bsz=128, num_updates=17322, lr=9.98694e-05, gnorm=2.19, loss_scale=4, train_wall=11, gb_free=2.8, wall=199232 2021-06-21 01:59:29 | INFO | train_inner | epoch 006: 2415 / 3002 loss=2.539, ppl=5.81, wps=5826.6, ups=0.09, wpb=64780, bsz=128, num_updates=17323, lr=9.98694e-05, gnorm=2.75, loss_scale=4, train_wall=11, gb_free=2.8, wall=199243 2021-06-21 01:59:40 | INFO | train_inner | epoch 006: 2416 / 3002 loss=2.514, ppl=5.71, wps=5925.7, ups=0.09, wpb=64835, bsz=128, num_updates=17324, lr=9.98694e-05, gnorm=2.168, loss_scale=4, train_wall=10, gb_free=2.8, wall=199254 2021-06-21 01:59:51 | INFO | train_inner | epoch 006: 2417 / 3002 loss=2.553, ppl=5.87, wps=5842.6, ups=0.09, wpb=64791, bsz=128, num_updates=17325, lr=9.98694e-05, gnorm=2.095, loss_scale=4, train_wall=11, gb_free=2.8, wall=199265 2021-06-21 02:00:02 | INFO | train_inner | epoch 006: 2418 / 3002 loss=2.692, ppl=6.46, wps=5841, ups=0.09, wpb=64743, bsz=128, num_updates=17326, lr=9.98694e-05, gnorm=2.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=199276 2021-06-21 02:00:13 | INFO | train_inner | epoch 006: 2419 / 3002 loss=2.434, ppl=5.4, wps=5870.9, ups=0.09, wpb=64912, bsz=128, num_updates=17327, lr=9.98694e-05, gnorm=2.499, loss_scale=4, train_wall=11, gb_free=2.8, wall=199287 2021-06-21 02:00:24 | INFO | train_inner | epoch 006: 2420 / 3002 loss=2.513, ppl=5.71, wps=5929.1, ups=0.09, wpb=64851, bsz=128, num_updates=17328, lr=9.98694e-05, gnorm=9.314, loss_scale=4, train_wall=10, gb_free=2.8, wall=199298 2021-06-21 02:00:35 | INFO | train_inner | epoch 006: 2421 / 3002 loss=2.36, ppl=5.13, wps=5952.7, ups=0.09, wpb=64844, bsz=128, num_updates=17329, lr=9.98694e-05, gnorm=2.431, loss_scale=4, train_wall=10, gb_free=2.8, wall=199309 2021-06-21 02:00:46 | INFO | train_inner | epoch 006: 2422 / 3002 loss=2.616, ppl=6.13, wps=5803.5, ups=0.09, wpb=64827, bsz=128, num_updates=17330, lr=9.98693e-05, gnorm=2.4, loss_scale=4, train_wall=11, gb_free=2.8, wall=199320 2021-06-21 02:00:57 | INFO | train_inner | epoch 006: 2423 / 3002 loss=2.368, ppl=5.16, wps=5822.6, ups=0.09, wpb=64844, bsz=128, num_updates=17331, lr=9.98693e-05, gnorm=2.074, loss_scale=4, train_wall=11, gb_free=2.8, wall=199332 2021-06-21 02:01:08 | INFO | train_inner | epoch 006: 2424 / 3002 loss=2.651, ppl=6.28, wps=5802.3, ups=0.09, wpb=64787, bsz=128, num_updates=17332, lr=9.98693e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=199343 2021-06-21 02:01:20 | INFO | train_inner | epoch 006: 2425 / 3002 loss=2.629, ppl=6.19, wps=5812, ups=0.09, wpb=64865, bsz=128, num_updates=17333, lr=9.98693e-05, gnorm=2.092, loss_scale=4, train_wall=11, gb_free=2.8, wall=199354 2021-06-21 02:01:31 | INFO | train_inner | epoch 006: 2426 / 3002 loss=2.598, ppl=6.05, wps=5875.5, ups=0.09, wpb=64751, bsz=128, num_updates=17334, lr=9.98693e-05, gnorm=1.981, loss_scale=4, train_wall=11, gb_free=2.8, wall=199365 2021-06-21 02:01:42 | INFO | train_inner | epoch 006: 2427 / 3002 loss=2.42, ppl=5.35, wps=5831.1, ups=0.09, wpb=64789, bsz=128, num_updates=17335, lr=9.98693e-05, gnorm=2.144, loss_scale=4, train_wall=11, gb_free=2.8, wall=199376 2021-06-21 02:01:53 | INFO | train_inner | epoch 006: 2428 / 3002 loss=2.64, ppl=6.23, wps=5824, ups=0.09, wpb=64889, bsz=128, num_updates=17336, lr=9.98693e-05, gnorm=2.012, loss_scale=4, train_wall=11, gb_free=2.8, wall=199387 2021-06-21 02:02:04 | INFO | train_inner | epoch 006: 2429 / 3002 loss=2.559, ppl=5.89, wps=5941.7, ups=0.09, wpb=64859, bsz=128, num_updates=17337, lr=9.98693e-05, gnorm=2.169, loss_scale=4, train_wall=10, gb_free=2.8, wall=199398 2021-06-21 02:02:15 | INFO | train_inner | epoch 006: 2430 / 3002 loss=2.516, ppl=5.72, wps=5799.6, ups=0.09, wpb=64839, bsz=128, num_updates=17338, lr=9.98693e-05, gnorm=2.014, loss_scale=4, train_wall=11, gb_free=2.8, wall=199409 2021-06-21 02:02:26 | INFO | train_inner | epoch 006: 2431 / 3002 loss=2.502, ppl=5.66, wps=5763, ups=0.09, wpb=64811, bsz=128, num_updates=17339, lr=9.98693e-05, gnorm=2.071, loss_scale=4, train_wall=11, gb_free=2.8, wall=199421 2021-06-21 02:02:37 | INFO | train_inner | epoch 006: 2432 / 3002 loss=2.393, ppl=5.25, wps=5811.1, ups=0.09, wpb=64818, bsz=128, num_updates=17340, lr=9.98693e-05, gnorm=2.199, loss_scale=4, train_wall=11, gb_free=2.8, wall=199432 2021-06-21 02:02:49 | INFO | train_inner | epoch 006: 2433 / 3002 loss=2.537, ppl=5.8, wps=5821.1, ups=0.09, wpb=64816, bsz=128, num_updates=17341, lr=9.98693e-05, gnorm=1.975, loss_scale=4, train_wall=11, gb_free=2.8, wall=199443 2021-06-21 02:03:00 | INFO | train_inner | epoch 006: 2434 / 3002 loss=2.62, ppl=6.15, wps=5809.4, ups=0.09, wpb=64779, bsz=128, num_updates=17342, lr=9.98693e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=199454 2021-06-21 02:03:11 | INFO | train_inner | epoch 006: 2435 / 3002 loss=2.558, ppl=5.89, wps=5739.6, ups=0.09, wpb=64752, bsz=128, num_updates=17343, lr=9.98692e-05, gnorm=1.883, loss_scale=4, train_wall=11, gb_free=2.8, wall=199465 2021-06-21 02:03:22 | INFO | train_inner | epoch 006: 2436 / 3002 loss=2.507, ppl=5.68, wps=6000.9, ups=0.09, wpb=64759, bsz=128, num_updates=17344, lr=9.98692e-05, gnorm=1.915, loss_scale=4, train_wall=10, gb_free=2.8, wall=199476 2021-06-21 02:03:33 | INFO | train_inner | epoch 006: 2437 / 3002 loss=2.388, ppl=5.23, wps=5927.5, ups=0.09, wpb=64880, bsz=128, num_updates=17345, lr=9.98692e-05, gnorm=2.023, loss_scale=4, train_wall=10, gb_free=2.8, wall=199487 2021-06-21 02:03:44 | INFO | train_inner | epoch 006: 2438 / 3002 loss=2.355, ppl=5.12, wps=5906.2, ups=0.09, wpb=64808, bsz=128, num_updates=17346, lr=9.98692e-05, gnorm=1.926, loss_scale=4, train_wall=10, gb_free=2.8, wall=199498 2021-06-21 02:03:55 | INFO | train_inner | epoch 006: 2439 / 3002 loss=2.436, ppl=5.41, wps=5889, ups=0.09, wpb=64924, bsz=128, num_updates=17347, lr=9.98692e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=199509 2021-06-21 02:04:06 | INFO | train_inner | epoch 006: 2440 / 3002 loss=2.484, ppl=5.6, wps=5719.3, ups=0.09, wpb=64745, bsz=128, num_updates=17348, lr=9.98692e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=199520 2021-06-21 02:04:17 | INFO | train_inner | epoch 006: 2441 / 3002 loss=2.524, ppl=5.75, wps=5908.2, ups=0.09, wpb=64847, bsz=128, num_updates=17349, lr=9.98692e-05, gnorm=2.011, loss_scale=4, train_wall=11, gb_free=2.8, wall=199531 2021-06-21 02:04:28 | INFO | train_inner | epoch 006: 2442 / 3002 loss=2.451, ppl=5.47, wps=5881.3, ups=0.09, wpb=64798, bsz=128, num_updates=17350, lr=9.98692e-05, gnorm=15.028, loss_scale=4, train_wall=11, gb_free=2.8, wall=199542 2021-06-21 02:04:39 | INFO | train_inner | epoch 006: 2443 / 3002 loss=2.279, ppl=4.85, wps=5845.5, ups=0.09, wpb=64814, bsz=128, num_updates=17351, lr=9.98692e-05, gnorm=1.966, loss_scale=4, train_wall=11, gb_free=2.8, wall=199553 2021-06-21 02:04:50 | INFO | train_inner | epoch 006: 2444 / 3002 loss=2.343, ppl=5.07, wps=5791.4, ups=0.09, wpb=64837, bsz=128, num_updates=17352, lr=9.98692e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=199565 2021-06-21 02:05:01 | INFO | train_inner | epoch 006: 2445 / 3002 loss=2.511, ppl=5.7, wps=5827.1, ups=0.09, wpb=64795, bsz=128, num_updates=17353, lr=9.98692e-05, gnorm=2.067, loss_scale=4, train_wall=11, gb_free=2.8, wall=199576 2021-06-21 02:05:12 | INFO | train_inner | epoch 006: 2446 / 3002 loss=2.477, ppl=5.57, wps=5846, ups=0.09, wpb=64794, bsz=128, num_updates=17354, lr=9.98692e-05, gnorm=1.905, loss_scale=4, train_wall=11, gb_free=2.8, wall=199587 2021-06-21 02:05:23 | INFO | train_inner | epoch 006: 2447 / 3002 loss=2.566, ppl=5.92, wps=5906.9, ups=0.09, wpb=64838, bsz=128, num_updates=17355, lr=9.98691e-05, gnorm=2.184, loss_scale=4, train_wall=10, gb_free=2.8, wall=199598 2021-06-21 02:05:35 | INFO | train_inner | epoch 006: 2448 / 3002 loss=2.561, ppl=5.9, wps=5854.3, ups=0.09, wpb=64863, bsz=128, num_updates=17356, lr=9.98691e-05, gnorm=2.021, loss_scale=4, train_wall=11, gb_free=2.8, wall=199609 2021-06-21 02:05:46 | INFO | train_inner | epoch 006: 2449 / 3002 loss=2.425, ppl=5.37, wps=5889, ups=0.09, wpb=64775, bsz=128, num_updates=17357, lr=9.98691e-05, gnorm=1.983, loss_scale=4, train_wall=11, gb_free=2.8, wall=199620 2021-06-21 02:05:56 | INFO | train_inner | epoch 006: 2450 / 3002 loss=2.608, ppl=6.1, wps=5972.9, ups=0.09, wpb=64767, bsz=128, num_updates=17358, lr=9.98691e-05, gnorm=2.55, loss_scale=4, train_wall=10, gb_free=2.8, wall=199631 2021-06-21 02:06:08 | INFO | train_inner | epoch 006: 2451 / 3002 loss=2.461, ppl=5.51, wps=5814.6, ups=0.09, wpb=64853, bsz=128, num_updates=17359, lr=9.98691e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=199642 2021-06-21 02:06:19 | INFO | train_inner | epoch 006: 2452 / 3002 loss=2.345, ppl=5.08, wps=5831, ups=0.09, wpb=64847, bsz=128, num_updates=17360, lr=9.98691e-05, gnorm=2.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=199653 2021-06-21 02:06:30 | INFO | train_inner | epoch 006: 2453 / 3002 loss=2.47, ppl=5.54, wps=5831.6, ups=0.09, wpb=64788, bsz=128, num_updates=17361, lr=9.98691e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=199664 2021-06-21 02:06:41 | INFO | train_inner | epoch 006: 2454 / 3002 loss=2.5, ppl=5.66, wps=5763.4, ups=0.09, wpb=64829, bsz=128, num_updates=17362, lr=9.98691e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=199675 2021-06-21 02:06:52 | INFO | train_inner | epoch 006: 2455 / 3002 loss=2.309, ppl=4.95, wps=5939.7, ups=0.09, wpb=64915, bsz=128, num_updates=17363, lr=9.98691e-05, gnorm=1.936, loss_scale=8, train_wall=10, gb_free=2.8, wall=199686 2021-06-21 02:07:03 | INFO | train_inner | epoch 006: 2456 / 3002 loss=2.537, ppl=5.81, wps=5885, ups=0.09, wpb=64823, bsz=128, num_updates=17364, lr=9.98691e-05, gnorm=1.894, loss_scale=8, train_wall=11, gb_free=2.8, wall=199697 2021-06-21 02:07:14 | INFO | train_inner | epoch 006: 2457 / 3002 loss=2.572, ppl=5.95, wps=6001.1, ups=0.09, wpb=64801, bsz=128, num_updates=17365, lr=9.98691e-05, gnorm=1.905, loss_scale=8, train_wall=10, gb_free=2.8, wall=199708 2021-06-21 02:07:25 | INFO | train_inner | epoch 006: 2458 / 3002 loss=2.478, ppl=5.57, wps=5987.3, ups=0.09, wpb=64847, bsz=128, num_updates=17366, lr=9.98691e-05, gnorm=1.935, loss_scale=8, train_wall=10, gb_free=2.8, wall=199719 2021-06-21 02:07:36 | INFO | train_inner | epoch 006: 2459 / 3002 loss=2.41, ppl=5.31, wps=5804.8, ups=0.09, wpb=64792, bsz=128, num_updates=17367, lr=9.98691e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=199730 2021-06-21 02:07:47 | INFO | train_inner | epoch 006: 2460 / 3002 loss=2.526, ppl=5.76, wps=5976, ups=0.09, wpb=64879, bsz=128, num_updates=17368, lr=9.9869e-05, gnorm=1.908, loss_scale=8, train_wall=10, gb_free=2.8, wall=199741 2021-06-21 02:07:58 | INFO | train_inner | epoch 006: 2461 / 3002 loss=2.419, ppl=5.35, wps=5927.2, ups=0.09, wpb=64906, bsz=128, num_updates=17369, lr=9.9869e-05, gnorm=2.212, loss_scale=8, train_wall=10, gb_free=2.8, wall=199752 2021-06-21 02:08:09 | INFO | train_inner | epoch 006: 2462 / 3002 loss=2.424, ppl=5.37, wps=5869.9, ups=0.09, wpb=64757, bsz=128, num_updates=17370, lr=9.9869e-05, gnorm=2.044, loss_scale=8, train_wall=11, gb_free=2.8, wall=199763 2021-06-21 02:08:20 | INFO | train_inner | epoch 006: 2463 / 3002 loss=2.512, ppl=5.7, wps=5880.6, ups=0.09, wpb=64816, bsz=128, num_updates=17371, lr=9.9869e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=199774 2021-06-21 02:08:30 | INFO | train_inner | epoch 006: 2464 / 3002 loss=2.422, ppl=5.36, wps=6036.5, ups=0.09, wpb=64832, bsz=128, num_updates=17372, lr=9.9869e-05, gnorm=1.987, loss_scale=8, train_wall=10, gb_free=2.8, wall=199785 2021-06-21 02:08:41 | INFO | train_inner | epoch 006: 2465 / 3002 loss=2.482, ppl=5.58, wps=5870, ups=0.09, wpb=64835, bsz=128, num_updates=17373, lr=9.9869e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=199796 2021-06-21 02:08:52 | INFO | train_inner | epoch 006: 2466 / 3002 loss=2.536, ppl=5.8, wps=5852.8, ups=0.09, wpb=64782, bsz=128, num_updates=17374, lr=9.9869e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=199807 2021-06-21 02:09:04 | INFO | train_inner | epoch 006: 2467 / 3002 loss=2.572, ppl=5.94, wps=5820.5, ups=0.09, wpb=64749, bsz=128, num_updates=17375, lr=9.9869e-05, gnorm=2.12, loss_scale=8, train_wall=11, gb_free=2.8, wall=199818 2021-06-21 02:09:14 | INFO | train_inner | epoch 006: 2468 / 3002 loss=2.411, ppl=5.32, wps=6061.9, ups=0.09, wpb=64868, bsz=128, num_updates=17376, lr=9.9869e-05, gnorm=1.946, loss_scale=8, train_wall=10, gb_free=2.8, wall=199829 2021-06-21 02:09:25 | INFO | train_inner | epoch 006: 2469 / 3002 loss=2.499, ppl=5.65, wps=5845.1, ups=0.09, wpb=64802, bsz=128, num_updates=17377, lr=9.9869e-05, gnorm=2.566, loss_scale=8, train_wall=11, gb_free=2.8, wall=199840 2021-06-21 02:09:36 | INFO | train_inner | epoch 006: 2470 / 3002 loss=2.596, ppl=6.05, wps=5953.7, ups=0.09, wpb=64838, bsz=128, num_updates=17378, lr=9.9869e-05, gnorm=2.914, loss_scale=8, train_wall=10, gb_free=2.8, wall=199851 2021-06-21 02:09:48 | INFO | train_inner | epoch 006: 2471 / 3002 loss=2.547, ppl=5.84, wps=5744.3, ups=0.09, wpb=64827, bsz=128, num_updates=17379, lr=9.9869e-05, gnorm=1.977, loss_scale=8, train_wall=11, gb_free=2.8, wall=199862 2021-06-21 02:09:59 | INFO | train_inner | epoch 006: 2472 / 3002 loss=2.522, ppl=5.74, wps=5793.2, ups=0.09, wpb=64928, bsz=128, num_updates=17380, lr=9.98689e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=199873 2021-06-21 02:10:10 | INFO | train_inner | epoch 006: 2473 / 3002 loss=2.51, ppl=5.7, wps=5752.3, ups=0.09, wpb=64859, bsz=128, num_updates=17381, lr=9.98689e-05, gnorm=2.045, loss_scale=8, train_wall=11, gb_free=2.8, wall=199884 2021-06-21 02:10:21 | INFO | train_inner | epoch 006: 2474 / 3002 loss=2.441, ppl=5.43, wps=5848.4, ups=0.09, wpb=64773, bsz=128, num_updates=17382, lr=9.98689e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=199895 2021-06-21 02:10:32 | INFO | train_inner | epoch 006: 2475 / 3002 loss=2.499, ppl=5.65, wps=5870.3, ups=0.09, wpb=64839, bsz=128, num_updates=17383, lr=9.98689e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=199906 2021-06-21 02:10:43 | INFO | train_inner | epoch 006: 2476 / 3002 loss=2.523, ppl=5.75, wps=5824.9, ups=0.09, wpb=64772, bsz=128, num_updates=17384, lr=9.98689e-05, gnorm=2.176, loss_scale=8, train_wall=11, gb_free=2.8, wall=199918 2021-06-21 02:10:54 | INFO | train_inner | epoch 006: 2477 / 3002 loss=2.486, ppl=5.6, wps=5918.4, ups=0.09, wpb=64870, bsz=128, num_updates=17385, lr=9.98689e-05, gnorm=1.92, loss_scale=8, train_wall=10, gb_free=2.8, wall=199929 2021-06-21 02:11:05 | INFO | train_inner | epoch 006: 2478 / 3002 loss=2.404, ppl=5.29, wps=5830.5, ups=0.09, wpb=64888, bsz=128, num_updates=17386, lr=9.98689e-05, gnorm=1.932, loss_scale=8, train_wall=11, gb_free=2.8, wall=199940 2021-06-21 02:11:16 | INFO | train_inner | epoch 006: 2479 / 3002 loss=2.492, ppl=5.63, wps=5971.2, ups=0.09, wpb=64905, bsz=128, num_updates=17387, lr=9.98689e-05, gnorm=2.382, loss_scale=8, train_wall=10, gb_free=2.8, wall=199951 2021-06-21 02:11:27 | INFO | train_inner | epoch 006: 2480 / 3002 loss=2.498, ppl=5.65, wps=5811.3, ups=0.09, wpb=64879, bsz=128, num_updates=17388, lr=9.98689e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=199962 2021-06-21 02:11:38 | INFO | train_inner | epoch 006: 2481 / 3002 loss=2.487, ppl=5.61, wps=5944.6, ups=0.09, wpb=64798, bsz=128, num_updates=17389, lr=9.98689e-05, gnorm=2.174, loss_scale=8, train_wall=10, gb_free=2.8, wall=199973 2021-06-21 02:11:49 | INFO | train_inner | epoch 006: 2482 / 3002 loss=2.325, ppl=5.01, wps=5821.9, ups=0.09, wpb=64830, bsz=128, num_updates=17390, lr=9.98689e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=199984 2021-06-21 02:12:00 | INFO | train_inner | epoch 006: 2483 / 3002 loss=2.404, ppl=5.29, wps=5897.3, ups=0.09, wpb=64816, bsz=128, num_updates=17391, lr=9.98689e-05, gnorm=2.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=199995 2021-06-21 02:12:11 | INFO | train_inner | epoch 006: 2484 / 3002 loss=2.417, ppl=5.34, wps=5866, ups=0.09, wpb=64866, bsz=128, num_updates=17392, lr=9.98689e-05, gnorm=2.549, loss_scale=8, train_wall=11, gb_free=2.8, wall=200006 2021-06-21 02:12:22 | INFO | train_inner | epoch 006: 2485 / 3002 loss=2.448, ppl=5.46, wps=5921, ups=0.09, wpb=64842, bsz=128, num_updates=17393, lr=9.98688e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=200017 2021-06-21 02:12:33 | INFO | train_inner | epoch 006: 2486 / 3002 loss=2.411, ppl=5.32, wps=5863.8, ups=0.09, wpb=64794, bsz=128, num_updates=17394, lr=9.98688e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=200028 2021-06-21 02:12:44 | INFO | train_inner | epoch 006: 2487 / 3002 loss=2.563, ppl=5.91, wps=5895.4, ups=0.09, wpb=64886, bsz=128, num_updates=17395, lr=9.98688e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=200039 2021-06-21 02:12:56 | INFO | train_inner | epoch 006: 2488 / 3002 loss=2.632, ppl=6.2, wps=5854.7, ups=0.09, wpb=64825, bsz=128, num_updates=17396, lr=9.98688e-05, gnorm=2.267, loss_scale=8, train_wall=11, gb_free=2.8, wall=200050 2021-06-21 02:13:07 | INFO | train_inner | epoch 006: 2489 / 3002 loss=2.36, ppl=5.13, wps=5905.9, ups=0.09, wpb=64845, bsz=128, num_updates=17397, lr=9.98688e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=200061 2021-06-21 02:13:18 | INFO | train_inner | epoch 006: 2490 / 3002 loss=2.456, ppl=5.49, wps=5795.7, ups=0.09, wpb=64826, bsz=128, num_updates=17398, lr=9.98688e-05, gnorm=4.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=200072 2021-06-21 02:13:29 | INFO | train_inner | epoch 006: 2491 / 3002 loss=2.501, ppl=5.66, wps=5854.2, ups=0.09, wpb=64845, bsz=128, num_updates=17399, lr=9.98688e-05, gnorm=2.321, loss_scale=8, train_wall=11, gb_free=2.8, wall=200083 2021-06-21 02:13:40 | INFO | train_inner | epoch 006: 2492 / 3002 loss=2.519, ppl=5.73, wps=5880.7, ups=0.09, wpb=64833, bsz=128, num_updates=17400, lr=9.98688e-05, gnorm=2.285, loss_scale=8, train_wall=11, gb_free=2.8, wall=200094 2021-06-21 02:13:51 | INFO | train_inner | epoch 006: 2493 / 3002 loss=2.387, ppl=5.23, wps=5791.2, ups=0.09, wpb=64843, bsz=128, num_updates=17401, lr=9.98688e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=200105 2021-06-21 02:14:02 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 2021-06-21 02:14:13 | INFO | train_inner | epoch 006: 2495 / 3002 loss=2.295, ppl=4.91, wps=2925.5, ups=0.05, wpb=64826, bsz=128, num_updates=17402, lr=9.98688e-05, gnorm=2.034, loss_scale=4, train_wall=21, gb_free=2.8, wall=200128 2021-06-21 02:14:24 | INFO | train_inner | epoch 006: 2496 / 3002 loss=2.527, ppl=5.76, wps=5834.9, ups=0.09, wpb=64812, bsz=128, num_updates=17403, lr=9.98688e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=200139 2021-06-21 02:14:35 | INFO | train_inner | epoch 006: 2497 / 3002 loss=2.422, ppl=5.36, wps=5849.7, ups=0.09, wpb=64869, bsz=128, num_updates=17404, lr=9.98688e-05, gnorm=2.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=200150 2021-06-21 02:14:46 | INFO | train_inner | epoch 006: 2498 / 3002 loss=2.381, ppl=5.21, wps=5896.6, ups=0.09, wpb=64806, bsz=128, num_updates=17405, lr=9.98687e-05, gnorm=2.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=200161 2021-06-21 02:14:57 | INFO | train_inner | epoch 006: 2499 / 3002 loss=2.387, ppl=5.23, wps=5873, ups=0.09, wpb=64852, bsz=128, num_updates=17406, lr=9.98687e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=200172 2021-06-21 02:15:09 | INFO | train_inner | epoch 006: 2500 / 3002 loss=2.445, ppl=5.44, wps=5815.9, ups=0.09, wpb=64771, bsz=128, num_updates=17407, lr=9.98687e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=200183 2021-06-21 02:15:20 | INFO | train_inner | epoch 006: 2501 / 3002 loss=2.451, ppl=5.47, wps=5813.6, ups=0.09, wpb=64830, bsz=128, num_updates=17408, lr=9.98687e-05, gnorm=2.13, loss_scale=4, train_wall=11, gb_free=2.8, wall=200194 2021-06-21 02:15:31 | INFO | train_inner | epoch 006: 2502 / 3002 loss=2.509, ppl=5.69, wps=5830, ups=0.09, wpb=64836, bsz=128, num_updates=17409, lr=9.98687e-05, gnorm=1.934, loss_scale=4, train_wall=11, gb_free=2.8, wall=200205 2021-06-21 02:15:42 | INFO | train_inner | epoch 006: 2503 / 3002 loss=2.551, ppl=5.86, wps=5814.4, ups=0.09, wpb=64777, bsz=128, num_updates=17410, lr=9.98687e-05, gnorm=2.163, loss_scale=4, train_wall=11, gb_free=2.8, wall=200216 2021-06-21 02:15:53 | INFO | train_inner | epoch 006: 2504 / 3002 loss=2.452, ppl=5.47, wps=5888.4, ups=0.09, wpb=64915, bsz=128, num_updates=17411, lr=9.98687e-05, gnorm=1.92, loss_scale=4, train_wall=11, gb_free=2.8, wall=200227 2021-06-21 02:16:04 | INFO | train_inner | epoch 006: 2505 / 3002 loss=2.515, ppl=5.72, wps=5862.1, ups=0.09, wpb=64811, bsz=128, num_updates=17412, lr=9.98687e-05, gnorm=1.903, loss_scale=4, train_wall=11, gb_free=2.8, wall=200238 2021-06-21 02:16:15 | INFO | train_inner | epoch 006: 2506 / 3002 loss=2.434, ppl=5.4, wps=5823.2, ups=0.09, wpb=64838, bsz=128, num_updates=17413, lr=9.98687e-05, gnorm=2.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=200250 2021-06-21 02:16:26 | INFO | train_inner | epoch 006: 2507 / 3002 loss=2.388, ppl=5.23, wps=5839.4, ups=0.09, wpb=64728, bsz=128, num_updates=17414, lr=9.98687e-05, gnorm=5.922, loss_scale=4, train_wall=11, gb_free=2.8, wall=200261 2021-06-21 02:16:37 | INFO | train_inner | epoch 006: 2508 / 3002 loss=2.451, ppl=5.47, wps=5933.8, ups=0.09, wpb=64894, bsz=128, num_updates=17415, lr=9.98687e-05, gnorm=2.017, loss_scale=4, train_wall=10, gb_free=2.8, wall=200272 2021-06-21 02:16:48 | INFO | train_inner | epoch 006: 2509 / 3002 loss=2.505, ppl=5.68, wps=5897.6, ups=0.09, wpb=64786, bsz=128, num_updates=17416, lr=9.98687e-05, gnorm=1.962, loss_scale=4, train_wall=11, gb_free=2.8, wall=200283 2021-06-21 02:16:59 | INFO | train_inner | epoch 006: 2510 / 3002 loss=2.597, ppl=6.05, wps=5819.5, ups=0.09, wpb=64863, bsz=128, num_updates=17417, lr=9.98687e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=200294 2021-06-21 02:17:10 | INFO | train_inner | epoch 006: 2511 / 3002 loss=2.564, ppl=5.91, wps=5859.6, ups=0.09, wpb=64845, bsz=128, num_updates=17418, lr=9.98686e-05, gnorm=1.98, loss_scale=4, train_wall=11, gb_free=2.8, wall=200305 2021-06-21 02:17:22 | INFO | train_inner | epoch 006: 2512 / 3002 loss=2.474, ppl=5.55, wps=5779.3, ups=0.09, wpb=64825, bsz=128, num_updates=17419, lr=9.98686e-05, gnorm=10.283, loss_scale=4, train_wall=11, gb_free=2.8, wall=200316 2021-06-21 02:17:33 | INFO | train_inner | epoch 006: 2513 / 3002 loss=2.566, ppl=5.92, wps=5803.1, ups=0.09, wpb=64825, bsz=128, num_updates=17420, lr=9.98686e-05, gnorm=1.932, loss_scale=4, train_wall=11, gb_free=2.8, wall=200327 2021-06-21 02:17:44 | INFO | train_inner | epoch 006: 2514 / 3002 loss=2.454, ppl=5.48, wps=5825.6, ups=0.09, wpb=64728, bsz=128, num_updates=17421, lr=9.98686e-05, gnorm=2.228, loss_scale=4, train_wall=11, gb_free=2.8, wall=200338 2021-06-21 02:17:55 | INFO | train_inner | epoch 006: 2515 / 3002 loss=2.462, ppl=5.51, wps=5842.5, ups=0.09, wpb=64834, bsz=128, num_updates=17422, lr=9.98686e-05, gnorm=3.636, loss_scale=4, train_wall=11, gb_free=2.8, wall=200349 2021-06-21 02:18:06 | INFO | train_inner | epoch 006: 2516 / 3002 loss=2.516, ppl=5.72, wps=5868.8, ups=0.09, wpb=64884, bsz=128, num_updates=17423, lr=9.98686e-05, gnorm=2.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=200360 2021-06-21 02:18:17 | INFO | train_inner | epoch 006: 2517 / 3002 loss=2.631, ppl=6.2, wps=5873.3, ups=0.09, wpb=64846, bsz=128, num_updates=17424, lr=9.98686e-05, gnorm=2.276, loss_scale=4, train_wall=11, gb_free=2.8, wall=200371 2021-06-21 02:18:28 | INFO | train_inner | epoch 006: 2518 / 3002 loss=2.43, ppl=5.39, wps=5800.9, ups=0.09, wpb=64831, bsz=128, num_updates=17425, lr=9.98686e-05, gnorm=2.203, loss_scale=4, train_wall=11, gb_free=2.8, wall=200383 2021-06-21 02:18:39 | INFO | train_inner | epoch 006: 2519 / 3002 loss=2.427, ppl=5.38, wps=5873.3, ups=0.09, wpb=64899, bsz=128, num_updates=17426, lr=9.98686e-05, gnorm=2.078, loss_scale=4, train_wall=11, gb_free=2.8, wall=200394 2021-06-21 02:18:50 | INFO | train_inner | epoch 006: 2520 / 3002 loss=2.632, ppl=6.2, wps=5941.5, ups=0.09, wpb=64813, bsz=128, num_updates=17427, lr=9.98686e-05, gnorm=2.045, loss_scale=4, train_wall=10, gb_free=2.8, wall=200405 2021-06-21 02:19:01 | INFO | train_inner | epoch 006: 2521 / 3002 loss=2.465, ppl=5.52, wps=5795.8, ups=0.09, wpb=64877, bsz=128, num_updates=17428, lr=9.98686e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=200416 2021-06-21 02:19:12 | INFO | train_inner | epoch 006: 2522 / 3002 loss=2.444, ppl=5.44, wps=5946.3, ups=0.09, wpb=64890, bsz=128, num_updates=17429, lr=9.98686e-05, gnorm=1.946, loss_scale=4, train_wall=10, gb_free=2.8, wall=200427 2021-06-21 02:19:23 | INFO | train_inner | epoch 006: 2523 / 3002 loss=2.497, ppl=5.64, wps=5878, ups=0.09, wpb=64753, bsz=128, num_updates=17430, lr=9.98685e-05, gnorm=1.964, loss_scale=4, train_wall=11, gb_free=2.8, wall=200438 2021-06-21 02:19:34 | INFO | train_inner | epoch 006: 2524 / 3002 loss=2.612, ppl=6.12, wps=5896.6, ups=0.09, wpb=64764, bsz=128, num_updates=17431, lr=9.98685e-05, gnorm=1.977, loss_scale=4, train_wall=11, gb_free=2.8, wall=200449 2021-06-21 02:19:46 | INFO | train_inner | epoch 006: 2525 / 3002 loss=2.392, ppl=5.25, wps=5749.3, ups=0.09, wpb=64763, bsz=128, num_updates=17432, lr=9.98685e-05, gnorm=2.046, loss_scale=4, train_wall=11, gb_free=2.8, wall=200460 2021-06-21 02:19:56 | INFO | train_inner | epoch 006: 2526 / 3002 loss=2.384, ppl=5.22, wps=5997.4, ups=0.09, wpb=64835, bsz=128, num_updates=17433, lr=9.98685e-05, gnorm=1.991, loss_scale=4, train_wall=10, gb_free=2.8, wall=200471 2021-06-21 02:20:07 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-21 02:20:18 | INFO | train_inner | epoch 006: 2528 / 3002 loss=2.437, ppl=5.42, wps=2937.4, ups=0.05, wpb=64812, bsz=128, num_updates=17434, lr=9.98685e-05, gnorm=1.944, loss_scale=2, train_wall=21, gb_free=2.8, wall=200493 2021-06-21 02:20:30 | INFO | train_inner | epoch 006: 2529 / 3002 loss=2.534, ppl=5.79, wps=5865.2, ups=0.09, wpb=64884, bsz=128, num_updates=17435, lr=9.98685e-05, gnorm=2.247, loss_scale=2, train_wall=11, gb_free=2.8, wall=200504 2021-06-21 02:20:40 | INFO | train_inner | epoch 006: 2530 / 3002 loss=2.464, ppl=5.52, wps=5926, ups=0.09, wpb=64792, bsz=128, num_updates=17436, lr=9.98685e-05, gnorm=2.032, loss_scale=2, train_wall=10, gb_free=2.8, wall=200515 2021-06-21 02:20:51 | INFO | train_inner | epoch 006: 2531 / 3002 loss=2.439, ppl=5.42, wps=5948.3, ups=0.09, wpb=64835, bsz=128, num_updates=17437, lr=9.98685e-05, gnorm=2.016, loss_scale=2, train_wall=10, gb_free=2.8, wall=200526 2021-06-21 02:21:02 | INFO | train_inner | epoch 006: 2532 / 3002 loss=2.572, ppl=5.94, wps=5857.3, ups=0.09, wpb=64838, bsz=128, num_updates=17438, lr=9.98685e-05, gnorm=2.147, loss_scale=2, train_wall=11, gb_free=2.8, wall=200537 2021-06-21 02:21:13 | INFO | train_inner | epoch 006: 2533 / 3002 loss=2.517, ppl=5.72, wps=5907.2, ups=0.09, wpb=64848, bsz=128, num_updates=17439, lr=9.98685e-05, gnorm=2.01, loss_scale=2, train_wall=11, gb_free=2.8, wall=200548 2021-06-21 02:21:24 | INFO | train_inner | epoch 006: 2534 / 3002 loss=2.418, ppl=5.34, wps=5895.1, ups=0.09, wpb=64937, bsz=128, num_updates=17440, lr=9.98685e-05, gnorm=1.943, loss_scale=2, train_wall=11, gb_free=2.8, wall=200559 2021-06-21 02:21:35 | INFO | train_inner | epoch 006: 2535 / 3002 loss=2.48, ppl=5.58, wps=5937.6, ups=0.09, wpb=64826, bsz=128, num_updates=17441, lr=9.98685e-05, gnorm=2.012, loss_scale=2, train_wall=10, gb_free=2.8, wall=200570 2021-06-21 02:21:46 | INFO | train_inner | epoch 006: 2536 / 3002 loss=2.382, ppl=5.21, wps=5875.3, ups=0.09, wpb=64809, bsz=128, num_updates=17442, lr=9.98685e-05, gnorm=1.866, loss_scale=2, train_wall=11, gb_free=2.8, wall=200581 2021-06-21 02:21:57 | INFO | train_inner | epoch 006: 2537 / 3002 loss=2.629, ppl=6.19, wps=5857.3, ups=0.09, wpb=64875, bsz=128, num_updates=17443, lr=9.98684e-05, gnorm=1.961, loss_scale=2, train_wall=11, gb_free=2.8, wall=200592 2021-06-21 02:22:09 | INFO | train_inner | epoch 006: 2538 / 3002 loss=2.44, ppl=5.43, wps=5791.1, ups=0.09, wpb=64777, bsz=128, num_updates=17444, lr=9.98684e-05, gnorm=1.864, loss_scale=2, train_wall=11, gb_free=2.8, wall=200603 2021-06-21 02:22:20 | INFO | train_inner | epoch 006: 2539 / 3002 loss=2.421, ppl=5.36, wps=5815.7, ups=0.09, wpb=64849, bsz=128, num_updates=17445, lr=9.98684e-05, gnorm=2.056, loss_scale=2, train_wall=11, gb_free=2.8, wall=200614 2021-06-21 02:22:31 | INFO | train_inner | epoch 006: 2540 / 3002 loss=2.365, ppl=5.15, wps=5869.1, ups=0.09, wpb=64931, bsz=128, num_updates=17446, lr=9.98684e-05, gnorm=1.944, loss_scale=2, train_wall=11, gb_free=2.8, wall=200625 2021-06-21 02:22:42 | INFO | train_inner | epoch 006: 2541 / 3002 loss=2.334, ppl=5.04, wps=5828.8, ups=0.09, wpb=64889, bsz=128, num_updates=17447, lr=9.98684e-05, gnorm=1.96, loss_scale=2, train_wall=11, gb_free=2.8, wall=200636 2021-06-21 02:22:53 | INFO | train_inner | epoch 006: 2542 / 3002 loss=2.449, ppl=5.46, wps=5878.6, ups=0.09, wpb=64871, bsz=128, num_updates=17448, lr=9.98684e-05, gnorm=4.393, loss_scale=2, train_wall=11, gb_free=2.8, wall=200647 2021-06-21 02:23:04 | INFO | train_inner | epoch 006: 2543 / 3002 loss=2.587, ppl=6.01, wps=5873.6, ups=0.09, wpb=64858, bsz=128, num_updates=17449, lr=9.98684e-05, gnorm=2.017, loss_scale=2, train_wall=11, gb_free=2.8, wall=200658 2021-06-21 02:23:15 | INFO | train_inner | epoch 006: 2544 / 3002 loss=2.569, ppl=5.93, wps=5983.4, ups=0.09, wpb=64848, bsz=128, num_updates=17450, lr=9.98684e-05, gnorm=2.337, loss_scale=2, train_wall=10, gb_free=2.8, wall=200669 2021-06-21 02:23:26 | INFO | train_inner | epoch 006: 2545 / 3002 loss=2.401, ppl=5.28, wps=5870.4, ups=0.09, wpb=64802, bsz=128, num_updates=17451, lr=9.98684e-05, gnorm=1.884, loss_scale=2, train_wall=11, gb_free=2.8, wall=200680 2021-06-21 02:23:37 | INFO | train_inner | epoch 006: 2546 / 3002 loss=2.496, ppl=5.64, wps=5812.7, ups=0.09, wpb=64858, bsz=128, num_updates=17452, lr=9.98684e-05, gnorm=2, loss_scale=2, train_wall=11, gb_free=2.8, wall=200691 2021-06-21 02:23:48 | INFO | train_inner | epoch 006: 2547 / 3002 loss=2.516, ppl=5.72, wps=5758.8, ups=0.09, wpb=64802, bsz=128, num_updates=17453, lr=9.98684e-05, gnorm=1.941, loss_scale=2, train_wall=11, gb_free=2.8, wall=200703 2021-06-21 02:24:00 | INFO | train_inner | epoch 006: 2548 / 3002 loss=2.443, ppl=5.44, wps=5809, ups=0.09, wpb=64860, bsz=128, num_updates=17454, lr=9.98684e-05, gnorm=1.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=200714 2021-06-21 02:24:11 | INFO | train_inner | epoch 006: 2549 / 3002 loss=2.499, ppl=5.65, wps=5760.4, ups=0.09, wpb=64812, bsz=128, num_updates=17455, lr=9.98683e-05, gnorm=1.937, loss_scale=2, train_wall=11, gb_free=2.8, wall=200725 2021-06-21 02:24:22 | INFO | train_inner | epoch 006: 2550 / 3002 loss=2.509, ppl=5.69, wps=5846.4, ups=0.09, wpb=64804, bsz=128, num_updates=17456, lr=9.98683e-05, gnorm=3.284, loss_scale=2, train_wall=11, gb_free=2.8, wall=200736 2021-06-21 02:24:33 | INFO | train_inner | epoch 006: 2551 / 3002 loss=2.415, ppl=5.33, wps=5857.9, ups=0.09, wpb=64846, bsz=128, num_updates=17457, lr=9.98683e-05, gnorm=1.927, loss_scale=2, train_wall=11, gb_free=2.8, wall=200747 2021-06-21 02:24:44 | INFO | train_inner | epoch 006: 2552 / 3002 loss=2.379, ppl=5.2, wps=5937.8, ups=0.09, wpb=64857, bsz=128, num_updates=17458, lr=9.98683e-05, gnorm=1.922, loss_scale=2, train_wall=10, gb_free=2.8, wall=200758 2021-06-21 02:24:55 | INFO | train_inner | epoch 006: 2553 / 3002 loss=2.497, ppl=5.64, wps=5914.3, ups=0.09, wpb=64821, bsz=128, num_updates=17459, lr=9.98683e-05, gnorm=2.038, loss_scale=2, train_wall=10, gb_free=2.8, wall=200769 2021-06-21 02:25:06 | INFO | train_inner | epoch 006: 2554 / 3002 loss=2.55, ppl=5.86, wps=5809.7, ups=0.09, wpb=64818, bsz=128, num_updates=17460, lr=9.98683e-05, gnorm=3.451, loss_scale=2, train_wall=11, gb_free=2.8, wall=200780 2021-06-21 02:25:17 | INFO | train_inner | epoch 006: 2555 / 3002 loss=2.364, ppl=5.15, wps=5968.3, ups=0.09, wpb=64842, bsz=128, num_updates=17461, lr=9.98683e-05, gnorm=1.983, loss_scale=2, train_wall=10, gb_free=2.8, wall=200791 2021-06-21 02:25:28 | INFO | train_inner | epoch 006: 2556 / 3002 loss=2.533, ppl=5.79, wps=5780.8, ups=0.09, wpb=64801, bsz=128, num_updates=17462, lr=9.98683e-05, gnorm=1.947, loss_scale=2, train_wall=11, gb_free=2.8, wall=200802 2021-06-21 02:25:39 | INFO | train_inner | epoch 006: 2557 / 3002 loss=2.309, ppl=4.96, wps=5905.9, ups=0.09, wpb=64902, bsz=128, num_updates=17463, lr=9.98683e-05, gnorm=1.949, loss_scale=2, train_wall=11, gb_free=2.8, wall=200813 2021-06-21 02:25:50 | INFO | train_inner | epoch 006: 2558 / 3002 loss=2.506, ppl=5.68, wps=5822.2, ups=0.09, wpb=64773, bsz=128, num_updates=17464, lr=9.98683e-05, gnorm=1.934, loss_scale=2, train_wall=11, gb_free=2.8, wall=200825 2021-06-21 02:26:01 | INFO | train_inner | epoch 006: 2559 / 3002 loss=2.624, ppl=6.17, wps=6100.9, ups=0.09, wpb=64847, bsz=128, num_updates=17465, lr=9.98683e-05, gnorm=1.961, loss_scale=2, train_wall=10, gb_free=2.8, wall=200835 2021-06-21 02:26:12 | INFO | train_inner | epoch 006: 2560 / 3002 loss=2.489, ppl=5.61, wps=5850.6, ups=0.09, wpb=64742, bsz=128, num_updates=17466, lr=9.98683e-05, gnorm=1.838, loss_scale=2, train_wall=11, gb_free=2.8, wall=200846 2021-06-21 02:26:23 | INFO | train_inner | epoch 006: 2561 / 3002 loss=2.448, ppl=5.46, wps=5918.2, ups=0.09, wpb=64812, bsz=128, num_updates=17467, lr=9.98683e-05, gnorm=1.948, loss_scale=2, train_wall=11, gb_free=2.8, wall=200857 2021-06-21 02:26:34 | INFO | train_inner | epoch 006: 2562 / 3002 loss=2.477, ppl=5.57, wps=5875.8, ups=0.09, wpb=64767, bsz=128, num_updates=17468, lr=9.98682e-05, gnorm=2.044, loss_scale=2, train_wall=11, gb_free=2.8, wall=200868 2021-06-21 02:26:45 | INFO | train_inner | epoch 006: 2563 / 3002 loss=2.397, ppl=5.27, wps=5849.2, ups=0.09, wpb=64843, bsz=128, num_updates=17469, lr=9.98682e-05, gnorm=2.04, loss_scale=2, train_wall=11, gb_free=2.8, wall=200879 2021-06-21 02:26:56 | INFO | train_inner | epoch 006: 2564 / 3002 loss=2.545, ppl=5.84, wps=5780.2, ups=0.09, wpb=64745, bsz=128, num_updates=17470, lr=9.98682e-05, gnorm=2.082, loss_scale=2, train_wall=11, gb_free=2.8, wall=200890 2021-06-21 02:27:07 | INFO | train_inner | epoch 006: 2565 / 3002 loss=2.481, ppl=5.58, wps=5813.1, ups=0.09, wpb=64750, bsz=128, num_updates=17471, lr=9.98682e-05, gnorm=11.151, loss_scale=2, train_wall=11, gb_free=2.8, wall=200902 2021-06-21 02:27:18 | INFO | train_inner | epoch 006: 2566 / 3002 loss=2.311, ppl=4.96, wps=5830.9, ups=0.09, wpb=64847, bsz=128, num_updates=17472, lr=9.98682e-05, gnorm=6.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=200913 2021-06-21 02:27:29 | INFO | train_inner | epoch 006: 2567 / 3002 loss=2.449, ppl=5.46, wps=5903.5, ups=0.09, wpb=64719, bsz=128, num_updates=17473, lr=9.98682e-05, gnorm=1.947, loss_scale=2, train_wall=11, gb_free=2.8, wall=200924 2021-06-21 02:27:40 | INFO | train_inner | epoch 006: 2568 / 3002 loss=2.609, ppl=6.1, wps=5855, ups=0.09, wpb=64848, bsz=128, num_updates=17474, lr=9.98682e-05, gnorm=2.173, loss_scale=2, train_wall=11, gb_free=2.8, wall=200935 2021-06-21 02:27:51 | INFO | train_inner | epoch 006: 2569 / 3002 loss=2.361, ppl=5.14, wps=6115.6, ups=0.09, wpb=64874, bsz=128, num_updates=17475, lr=9.98682e-05, gnorm=2.008, loss_scale=2, train_wall=10, gb_free=2.8, wall=200945 2021-06-21 02:28:02 | INFO | train_inner | epoch 006: 2570 / 3002 loss=2.325, ppl=5.01, wps=5838.9, ups=0.09, wpb=64790, bsz=128, num_updates=17476, lr=9.98682e-05, gnorm=1.988, loss_scale=2, train_wall=11, gb_free=2.8, wall=200956 2021-06-21 02:28:13 | INFO | train_inner | epoch 006: 2571 / 3002 loss=2.487, ppl=5.6, wps=5851.2, ups=0.09, wpb=64784, bsz=128, num_updates=17477, lr=9.98682e-05, gnorm=2.094, loss_scale=2, train_wall=11, gb_free=2.8, wall=200968 2021-06-21 02:28:24 | INFO | train_inner | epoch 006: 2572 / 3002 loss=2.503, ppl=5.67, wps=5935.6, ups=0.09, wpb=64926, bsz=128, num_updates=17478, lr=9.98682e-05, gnorm=4.368, loss_scale=2, train_wall=10, gb_free=2.8, wall=200978 2021-06-21 02:28:35 | INFO | train_inner | epoch 006: 2573 / 3002 loss=2.299, ppl=4.92, wps=5820.4, ups=0.09, wpb=64904, bsz=128, num_updates=17479, lr=9.98682e-05, gnorm=1.923, loss_scale=2, train_wall=11, gb_free=2.8, wall=200990 2021-06-21 02:28:46 | INFO | train_inner | epoch 006: 2574 / 3002 loss=2.51, ppl=5.7, wps=5904.5, ups=0.09, wpb=64870, bsz=128, num_updates=17480, lr=9.98681e-05, gnorm=2.145, loss_scale=2, train_wall=11, gb_free=2.8, wall=201001 2021-06-21 02:28:57 | INFO | train_inner | epoch 006: 2575 / 3002 loss=2.602, ppl=6.07, wps=5870.7, ups=0.09, wpb=64841, bsz=128, num_updates=17481, lr=9.98681e-05, gnorm=2.136, loss_scale=2, train_wall=11, gb_free=2.8, wall=201012 2021-06-21 02:29:08 | INFO | train_inner | epoch 006: 2576 / 3002 loss=2.359, ppl=5.13, wps=5941.3, ups=0.09, wpb=64833, bsz=128, num_updates=17482, lr=9.98681e-05, gnorm=2.151, loss_scale=2, train_wall=10, gb_free=2.8, wall=201023 2021-06-21 02:29:19 | INFO | train_inner | epoch 006: 2577 / 3002 loss=2.38, ppl=5.2, wps=5848.8, ups=0.09, wpb=64780, bsz=128, num_updates=17483, lr=9.98681e-05, gnorm=2.002, loss_scale=2, train_wall=11, gb_free=2.8, wall=201034 2021-06-21 02:29:30 | INFO | train_inner | epoch 006: 2578 / 3002 loss=2.406, ppl=5.3, wps=5868.4, ups=0.09, wpb=64849, bsz=128, num_updates=17484, lr=9.98681e-05, gnorm=2.107, loss_scale=2, train_wall=11, gb_free=2.8, wall=201045 2021-06-21 02:29:42 | INFO | train_inner | epoch 006: 2579 / 3002 loss=2.424, ppl=5.37, wps=5771.8, ups=0.09, wpb=64840, bsz=128, num_updates=17485, lr=9.98681e-05, gnorm=2.187, loss_scale=2, train_wall=11, gb_free=2.8, wall=201056 2021-06-21 02:29:53 | INFO | train_inner | epoch 006: 2580 / 3002 loss=2.515, ppl=5.72, wps=5929.3, ups=0.09, wpb=64835, bsz=128, num_updates=17486, lr=9.98681e-05, gnorm=2.015, loss_scale=2, train_wall=10, gb_free=2.8, wall=201067 2021-06-21 02:30:04 | INFO | train_inner | epoch 006: 2581 / 3002 loss=2.624, ppl=6.17, wps=5741.2, ups=0.09, wpb=64809, bsz=128, num_updates=17487, lr=9.98681e-05, gnorm=2.591, loss_scale=2, train_wall=11, gb_free=2.8, wall=201078 2021-06-21 02:30:15 | INFO | train_inner | epoch 006: 2582 / 3002 loss=2.635, ppl=6.21, wps=5981.2, ups=0.09, wpb=64857, bsz=128, num_updates=17488, lr=9.98681e-05, gnorm=5.219, loss_scale=2, train_wall=10, gb_free=2.8, wall=201089 2021-06-21 02:30:26 | INFO | train_inner | epoch 006: 2583 / 3002 loss=2.532, ppl=5.78, wps=5906.1, ups=0.09, wpb=64812, bsz=128, num_updates=17489, lr=9.98681e-05, gnorm=1.944, loss_scale=2, train_wall=11, gb_free=2.8, wall=201100 2021-06-21 02:30:37 | INFO | train_inner | epoch 006: 2584 / 3002 loss=2.547, ppl=5.84, wps=5881.9, ups=0.09, wpb=64887, bsz=128, num_updates=17490, lr=9.98681e-05, gnorm=2.159, loss_scale=2, train_wall=11, gb_free=2.8, wall=201111 2021-06-21 02:30:48 | INFO | train_inner | epoch 006: 2585 / 3002 loss=2.555, ppl=5.87, wps=5879.2, ups=0.09, wpb=64871, bsz=128, num_updates=17491, lr=9.98681e-05, gnorm=2.019, loss_scale=2, train_wall=11, gb_free=2.8, wall=201122 2021-06-21 02:30:59 | INFO | train_inner | epoch 006: 2586 / 3002 loss=2.432, ppl=5.4, wps=5921.9, ups=0.09, wpb=64918, bsz=128, num_updates=17492, lr=9.98681e-05, gnorm=1.975, loss_scale=2, train_wall=10, gb_free=2.8, wall=201133 2021-06-21 02:31:10 | INFO | train_inner | epoch 006: 2587 / 3002 loss=2.381, ppl=5.21, wps=5824, ups=0.09, wpb=64879, bsz=128, num_updates=17493, lr=9.9868e-05, gnorm=2.14, loss_scale=2, train_wall=11, gb_free=2.8, wall=201144 2021-06-21 02:31:21 | INFO | train_inner | epoch 006: 2588 / 3002 loss=2.471, ppl=5.55, wps=5804.8, ups=0.09, wpb=64880, bsz=128, num_updates=17494, lr=9.9868e-05, gnorm=5.84, loss_scale=2, train_wall=11, gb_free=2.8, wall=201155 2021-06-21 02:31:32 | INFO | train_inner | epoch 006: 2589 / 3002 loss=2.481, ppl=5.58, wps=5758.3, ups=0.09, wpb=64858, bsz=128, num_updates=17495, lr=9.9868e-05, gnorm=2.484, loss_scale=2, train_wall=11, gb_free=2.8, wall=201167 2021-06-21 02:31:43 | INFO | train_inner | epoch 006: 2590 / 3002 loss=2.526, ppl=5.76, wps=5980.1, ups=0.09, wpb=64900, bsz=128, num_updates=17496, lr=9.9868e-05, gnorm=1.997, loss_scale=2, train_wall=10, gb_free=2.8, wall=201177 2021-06-21 02:31:54 | INFO | train_inner | epoch 006: 2591 / 3002 loss=2.675, ppl=6.38, wps=5951.5, ups=0.09, wpb=64804, bsz=128, num_updates=17497, lr=9.9868e-05, gnorm=3.496, loss_scale=2, train_wall=10, gb_free=2.8, wall=201188 2021-06-21 02:32:05 | INFO | train_inner | epoch 006: 2592 / 3002 loss=2.416, ppl=5.34, wps=5829.3, ups=0.09, wpb=64768, bsz=128, num_updates=17498, lr=9.9868e-05, gnorm=2.399, loss_scale=2, train_wall=11, gb_free=2.8, wall=201199 2021-06-21 02:32:16 | INFO | train_inner | epoch 006: 2593 / 3002 loss=2.469, ppl=5.54, wps=5921.8, ups=0.09, wpb=64804, bsz=128, num_updates=17499, lr=9.9868e-05, gnorm=2.67, loss_scale=2, train_wall=10, gb_free=2.8, wall=201210 2021-06-21 02:32:27 | INFO | train_inner | epoch 006: 2594 / 3002 loss=2.489, ppl=5.61, wps=5890.7, ups=0.09, wpb=64809, bsz=128, num_updates=17500, lr=9.9868e-05, gnorm=14.66, loss_scale=2, train_wall=11, gb_free=2.8, wall=201221 2021-06-21 02:32:38 | INFO | train_inner | epoch 006: 2595 / 3002 loss=2.636, ppl=6.22, wps=5910.7, ups=0.09, wpb=64833, bsz=128, num_updates=17501, lr=9.9868e-05, gnorm=1.977, loss_scale=2, train_wall=11, gb_free=2.8, wall=201232 2021-06-21 02:32:49 | INFO | train_inner | epoch 006: 2596 / 3002 loss=2.501, ppl=5.66, wps=5844.9, ups=0.09, wpb=64892, bsz=128, num_updates=17502, lr=9.9868e-05, gnorm=7.298, loss_scale=2, train_wall=11, gb_free=2.8, wall=201243 2021-06-21 02:33:00 | INFO | train_inner | epoch 006: 2597 / 3002 loss=2.538, ppl=5.81, wps=5946, ups=0.09, wpb=64674, bsz=128, num_updates=17503, lr=9.9868e-05, gnorm=2.509, loss_scale=2, train_wall=10, gb_free=2.8, wall=201254 2021-06-21 02:33:11 | INFO | train_inner | epoch 006: 2598 / 3002 loss=2.502, ppl=5.66, wps=5940.1, ups=0.09, wpb=64887, bsz=128, num_updates=17504, lr=9.9868e-05, gnorm=9.989, loss_scale=2, train_wall=10, gb_free=2.8, wall=201265 2021-06-21 02:33:22 | INFO | train_inner | epoch 006: 2599 / 3002 loss=2.502, ppl=5.66, wps=5889.8, ups=0.09, wpb=64775, bsz=128, num_updates=17505, lr=9.98679e-05, gnorm=2.523, loss_scale=2, train_wall=11, gb_free=2.8, wall=201276 2021-06-21 02:33:33 | INFO | train_inner | epoch 006: 2600 / 3002 loss=2.564, ppl=5.91, wps=5881.5, ups=0.09, wpb=64757, bsz=128, num_updates=17506, lr=9.98679e-05, gnorm=4.279, loss_scale=2, train_wall=11, gb_free=2.8, wall=201287 2021-06-21 02:33:44 | INFO | train_inner | epoch 006: 2601 / 3002 loss=2.471, ppl=5.54, wps=5881.8, ups=0.09, wpb=64904, bsz=128, num_updates=17507, lr=9.98679e-05, gnorm=2.379, loss_scale=2, train_wall=11, gb_free=2.8, wall=201298 2021-06-21 02:33:55 | INFO | train_inner | epoch 006: 2602 / 3002 loss=2.329, ppl=5.02, wps=5840.2, ups=0.09, wpb=64836, bsz=128, num_updates=17508, lr=9.98679e-05, gnorm=2.949, loss_scale=2, train_wall=11, gb_free=2.8, wall=201309 2021-06-21 02:34:06 | INFO | train_inner | epoch 006: 2603 / 3002 loss=2.59, ppl=6.02, wps=5801.3, ups=0.09, wpb=64803, bsz=128, num_updates=17509, lr=9.98679e-05, gnorm=2.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=201321 2021-06-21 02:34:18 | INFO | train_inner | epoch 006: 2604 / 3002 loss=2.588, ppl=6.01, wps=5736.2, ups=0.09, wpb=64784, bsz=128, num_updates=17510, lr=9.98679e-05, gnorm=2.188, loss_scale=2, train_wall=11, gb_free=2.8, wall=201332 2021-06-21 02:34:28 | INFO | train_inner | epoch 006: 2605 / 3002 loss=2.454, ppl=5.48, wps=5957.2, ups=0.09, wpb=64892, bsz=128, num_updates=17511, lr=9.98679e-05, gnorm=2.997, loss_scale=2, train_wall=10, gb_free=2.8, wall=201343 2021-06-21 02:34:40 | INFO | train_inner | epoch 006: 2606 / 3002 loss=2.464, ppl=5.52, wps=5781.3, ups=0.09, wpb=64843, bsz=128, num_updates=17512, lr=9.98679e-05, gnorm=2.692, loss_scale=2, train_wall=11, gb_free=2.8, wall=201354 2021-06-21 02:34:51 | INFO | train_inner | epoch 006: 2607 / 3002 loss=2.391, ppl=5.24, wps=5823.2, ups=0.09, wpb=64816, bsz=128, num_updates=17513, lr=9.98679e-05, gnorm=11.301, loss_scale=2, train_wall=11, gb_free=2.8, wall=201365 2021-06-21 02:35:02 | INFO | train_inner | epoch 006: 2608 / 3002 loss=2.601, ppl=6.07, wps=5741.5, ups=0.09, wpb=64778, bsz=128, num_updates=17514, lr=9.98679e-05, gnorm=4.659, loss_scale=2, train_wall=11, gb_free=2.8, wall=201376 2021-06-21 02:35:13 | INFO | train_inner | epoch 006: 2609 / 3002 loss=2.488, ppl=5.61, wps=5753.6, ups=0.09, wpb=64764, bsz=128, num_updates=17515, lr=9.98679e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=201388 2021-06-21 02:35:24 | INFO | train_inner | epoch 006: 2610 / 3002 loss=2.433, ppl=5.4, wps=5945.2, ups=0.09, wpb=64909, bsz=128, num_updates=17516, lr=9.98679e-05, gnorm=2.382, loss_scale=2, train_wall=10, gb_free=2.8, wall=201399 2021-06-21 02:35:35 | INFO | train_inner | epoch 006: 2611 / 3002 loss=2.446, ppl=5.45, wps=5883.8, ups=0.09, wpb=64827, bsz=128, num_updates=17517, lr=9.98679e-05, gnorm=2.092, loss_scale=2, train_wall=11, gb_free=2.8, wall=201410 2021-06-21 02:35:47 | INFO | train_inner | epoch 006: 2612 / 3002 loss=2.429, ppl=5.39, wps=5693.1, ups=0.09, wpb=64765, bsz=128, num_updates=17518, lr=9.98678e-05, gnorm=2.444, loss_scale=2, train_wall=11, gb_free=2.8, wall=201421 2021-06-21 02:35:58 | INFO | train_inner | epoch 006: 2613 / 3002 loss=2.346, ppl=5.09, wps=5834.5, ups=0.09, wpb=64808, bsz=128, num_updates=17519, lr=9.98678e-05, gnorm=2.21, loss_scale=2, train_wall=11, gb_free=2.8, wall=201432 2021-06-21 02:36:09 | INFO | train_inner | epoch 006: 2614 / 3002 loss=2.417, ppl=5.34, wps=5814.4, ups=0.09, wpb=64880, bsz=128, num_updates=17520, lr=9.98678e-05, gnorm=4.11, loss_scale=2, train_wall=11, gb_free=2.8, wall=201443 2021-06-21 02:36:20 | INFO | train_inner | epoch 006: 2615 / 3002 loss=2.516, ppl=5.72, wps=5906.8, ups=0.09, wpb=64908, bsz=128, num_updates=17521, lr=9.98678e-05, gnorm=2.793, loss_scale=2, train_wall=11, gb_free=2.8, wall=201454 2021-06-21 02:36:31 | INFO | train_inner | epoch 006: 2616 / 3002 loss=2.489, ppl=5.62, wps=5742.1, ups=0.09, wpb=64889, bsz=128, num_updates=17522, lr=9.98678e-05, gnorm=1.941, loss_scale=2, train_wall=11, gb_free=2.8, wall=201466 2021-06-21 02:36:42 | INFO | train_inner | epoch 006: 2617 / 3002 loss=2.502, ppl=5.67, wps=5928.7, ups=0.09, wpb=64860, bsz=128, num_updates=17523, lr=9.98678e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=201476 2021-06-21 02:36:53 | INFO | train_inner | epoch 006: 2618 / 3002 loss=2.44, ppl=5.43, wps=5795.9, ups=0.09, wpb=64774, bsz=128, num_updates=17524, lr=9.98678e-05, gnorm=2.416, loss_scale=2, train_wall=11, gb_free=2.8, wall=201488 2021-06-21 02:37:04 | INFO | train_inner | epoch 006: 2619 / 3002 loss=2.402, ppl=5.29, wps=5888.7, ups=0.09, wpb=64859, bsz=128, num_updates=17525, lr=9.98678e-05, gnorm=5.03, loss_scale=2, train_wall=11, gb_free=2.8, wall=201499 2021-06-21 02:37:15 | INFO | train_inner | epoch 006: 2620 / 3002 loss=2.599, ppl=6.06, wps=5795.5, ups=0.09, wpb=64731, bsz=128, num_updates=17526, lr=9.98678e-05, gnorm=2.492, loss_scale=2, train_wall=11, gb_free=2.8, wall=201510 2021-06-21 02:37:27 | INFO | train_inner | epoch 006: 2621 / 3002 loss=2.385, ppl=5.22, wps=5883.7, ups=0.09, wpb=64918, bsz=128, num_updates=17527, lr=9.98678e-05, gnorm=4.929, loss_scale=2, train_wall=11, gb_free=2.8, wall=201521 2021-06-21 02:37:38 | INFO | train_inner | epoch 006: 2622 / 3002 loss=2.493, ppl=5.63, wps=5824.9, ups=0.09, wpb=64780, bsz=128, num_updates=17528, lr=9.98678e-05, gnorm=2.256, loss_scale=2, train_wall=11, gb_free=2.8, wall=201532 2021-06-21 02:37:49 | INFO | train_inner | epoch 006: 2623 / 3002 loss=2.566, ppl=5.92, wps=5754.3, ups=0.09, wpb=64831, bsz=128, num_updates=17529, lr=9.98678e-05, gnorm=1.964, loss_scale=2, train_wall=11, gb_free=2.8, wall=201543 2021-06-21 02:38:00 | INFO | train_inner | epoch 006: 2624 / 3002 loss=2.528, ppl=5.77, wps=5888.7, ups=0.09, wpb=64924, bsz=128, num_updates=17530, lr=9.98677e-05, gnorm=2.686, loss_scale=2, train_wall=11, gb_free=2.8, wall=201554 2021-06-21 02:38:11 | INFO | train_inner | epoch 006: 2625 / 3002 loss=2.347, ppl=5.09, wps=5876.3, ups=0.09, wpb=64847, bsz=128, num_updates=17531, lr=9.98677e-05, gnorm=2.057, loss_scale=2, train_wall=11, gb_free=2.8, wall=201565 2021-06-21 02:38:22 | INFO | train_inner | epoch 006: 2626 / 3002 loss=2.602, ppl=6.07, wps=5907.3, ups=0.09, wpb=64817, bsz=128, num_updates=17532, lr=9.98677e-05, gnorm=2.55, loss_scale=2, train_wall=11, gb_free=2.8, wall=201576 2021-06-21 02:38:33 | INFO | train_inner | epoch 006: 2627 / 3002 loss=2.636, ppl=6.22, wps=6007.1, ups=0.09, wpb=64778, bsz=128, num_updates=17533, lr=9.98677e-05, gnorm=21.152, loss_scale=2, train_wall=10, gb_free=2.8, wall=201587 2021-06-21 02:38:44 | INFO | train_inner | epoch 006: 2628 / 3002 loss=2.512, ppl=5.7, wps=5798.8, ups=0.09, wpb=64785, bsz=128, num_updates=17534, lr=9.98677e-05, gnorm=7.077, loss_scale=2, train_wall=11, gb_free=2.8, wall=201598 2021-06-21 02:38:55 | INFO | train_inner | epoch 006: 2629 / 3002 loss=2.571, ppl=5.94, wps=5791.5, ups=0.09, wpb=64814, bsz=128, num_updates=17535, lr=9.98677e-05, gnorm=2.162, loss_scale=2, train_wall=11, gb_free=2.8, wall=201609 2021-06-21 02:39:06 | INFO | train_inner | epoch 006: 2630 / 3002 loss=2.491, ppl=5.62, wps=5839.4, ups=0.09, wpb=64839, bsz=128, num_updates=17536, lr=9.98677e-05, gnorm=2.081, loss_scale=2, train_wall=11, gb_free=2.8, wall=201621 2021-06-21 02:39:17 | INFO | train_inner | epoch 006: 2631 / 3002 loss=2.64, ppl=6.23, wps=5796, ups=0.09, wpb=64793, bsz=128, num_updates=17537, lr=9.98677e-05, gnorm=2.465, loss_scale=2, train_wall=11, gb_free=2.8, wall=201632 2021-06-21 02:39:28 | INFO | train_inner | epoch 006: 2632 / 3002 loss=2.59, ppl=6.02, wps=5832.6, ups=0.09, wpb=64761, bsz=128, num_updates=17538, lr=9.98677e-05, gnorm=9.498, loss_scale=2, train_wall=11, gb_free=2.8, wall=201643 2021-06-21 02:39:39 | INFO | train_inner | epoch 006: 2633 / 3002 loss=2.447, ppl=5.45, wps=5934.2, ups=0.09, wpb=64781, bsz=128, num_updates=17539, lr=9.98677e-05, gnorm=2.833, loss_scale=2, train_wall=10, gb_free=2.8, wall=201654 2021-06-21 02:39:50 | INFO | train_inner | epoch 006: 2634 / 3002 loss=2.514, ppl=5.71, wps=5856.6, ups=0.09, wpb=64812, bsz=128, num_updates=17540, lr=9.98677e-05, gnorm=3.543, loss_scale=2, train_wall=11, gb_free=2.8, wall=201665 2021-06-21 02:40:02 | INFO | train_inner | epoch 006: 2635 / 3002 loss=2.611, ppl=6.11, wps=5747.6, ups=0.09, wpb=64722, bsz=128, num_updates=17541, lr=9.98677e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=201676 2021-06-21 02:40:13 | INFO | train_inner | epoch 006: 2636 / 3002 loss=2.46, ppl=5.5, wps=5918.2, ups=0.09, wpb=64873, bsz=128, num_updates=17542, lr=9.98677e-05, gnorm=3.064, loss_scale=2, train_wall=11, gb_free=2.8, wall=201687 2021-06-21 02:40:24 | INFO | train_inner | epoch 006: 2637 / 3002 loss=2.407, ppl=5.3, wps=5801, ups=0.09, wpb=64839, bsz=128, num_updates=17543, lr=9.98676e-05, gnorm=3.312, loss_scale=2, train_wall=11, gb_free=2.8, wall=201698 2021-06-21 02:40:35 | INFO | train_inner | epoch 006: 2638 / 3002 loss=2.32, ppl=4.99, wps=5905.6, ups=0.09, wpb=64925, bsz=128, num_updates=17544, lr=9.98676e-05, gnorm=1.991, loss_scale=2, train_wall=11, gb_free=2.8, wall=201709 2021-06-21 02:40:46 | INFO | train_inner | epoch 006: 2639 / 3002 loss=2.425, ppl=5.37, wps=5913.3, ups=0.09, wpb=64881, bsz=128, num_updates=17545, lr=9.98676e-05, gnorm=2.21, loss_scale=2, train_wall=10, gb_free=2.8, wall=201720 2021-06-21 02:40:57 | INFO | train_inner | epoch 006: 2640 / 3002 loss=2.486, ppl=5.6, wps=5734.3, ups=0.09, wpb=64862, bsz=128, num_updates=17546, lr=9.98676e-05, gnorm=25.834, loss_scale=2, train_wall=11, gb_free=2.8, wall=201731 2021-06-21 02:41:08 | INFO | train_inner | epoch 006: 2641 / 3002 loss=2.612, ppl=6.11, wps=5894.6, ups=0.09, wpb=64855, bsz=128, num_updates=17547, lr=9.98676e-05, gnorm=22.968, loss_scale=2, train_wall=11, gb_free=2.8, wall=201742 2021-06-21 02:41:19 | INFO | train_inner | epoch 006: 2642 / 3002 loss=2.549, ppl=5.85, wps=5871.1, ups=0.09, wpb=64835, bsz=128, num_updates=17548, lr=9.98676e-05, gnorm=2.43, loss_scale=2, train_wall=11, gb_free=2.8, wall=201754 2021-06-21 02:41:30 | INFO | train_inner | epoch 006: 2643 / 3002 loss=2.583, ppl=5.99, wps=5799.6, ups=0.09, wpb=64792, bsz=128, num_updates=17549, lr=9.98676e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=201765 2021-06-21 02:41:42 | INFO | train_inner | epoch 006: 2644 / 3002 loss=2.378, ppl=5.2, wps=5773.4, ups=0.09, wpb=64806, bsz=128, num_updates=17550, lr=9.98676e-05, gnorm=7.509, loss_scale=2, train_wall=11, gb_free=2.8, wall=201776 2021-06-21 02:41:53 | INFO | train_inner | epoch 006: 2645 / 3002 loss=2.491, ppl=5.62, wps=5916.7, ups=0.09, wpb=64762, bsz=128, num_updates=17551, lr=9.98676e-05, gnorm=4.091, loss_scale=2, train_wall=10, gb_free=2.8, wall=201787 2021-06-21 02:42:04 | INFO | train_inner | epoch 006: 2646 / 3002 loss=2.52, ppl=5.74, wps=5794.4, ups=0.09, wpb=64873, bsz=128, num_updates=17552, lr=9.98676e-05, gnorm=6.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=201798 2021-06-21 02:42:15 | INFO | train_inner | epoch 006: 2647 / 3002 loss=2.518, ppl=5.73, wps=5776, ups=0.09, wpb=64798, bsz=128, num_updates=17553, lr=9.98676e-05, gnorm=2.308, loss_scale=2, train_wall=11, gb_free=2.8, wall=201809 2021-06-21 02:42:26 | INFO | train_inner | epoch 006: 2648 / 3002 loss=2.324, ppl=5.01, wps=5732.1, ups=0.09, wpb=64842, bsz=128, num_updates=17554, lr=9.98676e-05, gnorm=3.176, loss_scale=2, train_wall=11, gb_free=2.8, wall=201821 2021-06-21 02:42:37 | INFO | train_inner | epoch 006: 2649 / 3002 loss=2.495, ppl=5.64, wps=5843.1, ups=0.09, wpb=64888, bsz=128, num_updates=17555, lr=9.98675e-05, gnorm=2.288, loss_scale=2, train_wall=11, gb_free=2.8, wall=201832 2021-06-21 02:42:49 | INFO | train_inner | epoch 006: 2650 / 3002 loss=2.446, ppl=5.45, wps=5800.7, ups=0.09, wpb=64804, bsz=128, num_updates=17556, lr=9.98675e-05, gnorm=2.89, loss_scale=2, train_wall=11, gb_free=2.8, wall=201843 2021-06-21 02:43:00 | INFO | train_inner | epoch 006: 2651 / 3002 loss=2.609, ppl=6.1, wps=5823, ups=0.09, wpb=64814, bsz=128, num_updates=17557, lr=9.98675e-05, gnorm=3.937, loss_scale=2, train_wall=11, gb_free=2.8, wall=201854 2021-06-21 02:43:11 | INFO | train_inner | epoch 006: 2652 / 3002 loss=2.563, ppl=5.91, wps=5820.3, ups=0.09, wpb=64859, bsz=128, num_updates=17558, lr=9.98675e-05, gnorm=4.498, loss_scale=2, train_wall=11, gb_free=2.8, wall=201865 2021-06-21 02:43:22 | INFO | train_inner | epoch 006: 2653 / 3002 loss=2.548, ppl=5.85, wps=5823.3, ups=0.09, wpb=64897, bsz=128, num_updates=17559, lr=9.98675e-05, gnorm=2.604, loss_scale=2, train_wall=11, gb_free=2.8, wall=201876 2021-06-21 02:43:33 | INFO | train_inner | epoch 006: 2654 / 3002 loss=2.513, ppl=5.71, wps=5844.8, ups=0.09, wpb=64834, bsz=128, num_updates=17560, lr=9.98675e-05, gnorm=2.352, loss_scale=2, train_wall=11, gb_free=2.8, wall=201887 2021-06-21 02:43:44 | INFO | train_inner | epoch 006: 2655 / 3002 loss=2.566, ppl=5.92, wps=5779.9, ups=0.09, wpb=64845, bsz=128, num_updates=17561, lr=9.98675e-05, gnorm=4.874, loss_scale=4, train_wall=11, gb_free=2.8, wall=201899 2021-06-21 02:43:55 | INFO | train_inner | epoch 006: 2656 / 3002 loss=2.503, ppl=5.67, wps=5846.3, ups=0.09, wpb=64859, bsz=128, num_updates=17562, lr=9.98675e-05, gnorm=2.412, loss_scale=4, train_wall=11, gb_free=2.8, wall=201910 2021-06-21 02:44:06 | INFO | train_inner | epoch 006: 2657 / 3002 loss=2.562, ppl=5.91, wps=5823.9, ups=0.09, wpb=64779, bsz=128, num_updates=17563, lr=9.98675e-05, gnorm=2.419, loss_scale=4, train_wall=11, gb_free=2.8, wall=201921 2021-06-21 02:44:18 | INFO | train_inner | epoch 006: 2658 / 3002 loss=2.519, ppl=5.73, wps=5875.8, ups=0.09, wpb=64830, bsz=128, num_updates=17564, lr=9.98675e-05, gnorm=2.341, loss_scale=4, train_wall=11, gb_free=2.8, wall=201932 2021-06-21 02:44:28 | INFO | train_inner | epoch 006: 2659 / 3002 loss=2.325, ppl=5.01, wps=5928.3, ups=0.09, wpb=64879, bsz=128, num_updates=17565, lr=9.98675e-05, gnorm=2.194, loss_scale=4, train_wall=10, gb_free=2.8, wall=201943 2021-06-21 02:44:39 | INFO | train_inner | epoch 006: 2660 / 3002 loss=2.475, ppl=5.56, wps=5941, ups=0.09, wpb=64789, bsz=128, num_updates=17566, lr=9.98675e-05, gnorm=2.316, loss_scale=4, train_wall=10, gb_free=2.8, wall=201954 2021-06-21 02:44:51 | INFO | train_inner | epoch 006: 2661 / 3002 loss=2.617, ppl=6.13, wps=5784.5, ups=0.09, wpb=64800, bsz=128, num_updates=17567, lr=9.98675e-05, gnorm=2.894, loss_scale=4, train_wall=11, gb_free=2.8, wall=201965 2021-06-21 02:45:02 | INFO | train_inner | epoch 006: 2662 / 3002 loss=2.533, ppl=5.79, wps=5840, ups=0.09, wpb=64737, bsz=128, num_updates=17568, lr=9.98674e-05, gnorm=2.342, loss_scale=4, train_wall=11, gb_free=2.8, wall=201976 2021-06-21 02:45:13 | INFO | train_inner | epoch 006: 2663 / 3002 loss=2.618, ppl=6.14, wps=5896.3, ups=0.09, wpb=64805, bsz=128, num_updates=17569, lr=9.98674e-05, gnorm=2.116, loss_scale=4, train_wall=11, gb_free=2.8, wall=201987 2021-06-21 02:45:24 | INFO | train_inner | epoch 006: 2664 / 3002 loss=2.626, ppl=6.17, wps=5815.7, ups=0.09, wpb=64803, bsz=128, num_updates=17570, lr=9.98674e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=201998 2021-06-21 02:45:35 | INFO | train_inner | epoch 006: 2665 / 3002 loss=2.583, ppl=5.99, wps=5857.5, ups=0.09, wpb=64847, bsz=128, num_updates=17571, lr=9.98674e-05, gnorm=2.768, loss_scale=4, train_wall=11, gb_free=2.8, wall=202009 2021-06-21 02:45:46 | INFO | train_inner | epoch 006: 2666 / 3002 loss=2.582, ppl=5.99, wps=5954.4, ups=0.09, wpb=64747, bsz=128, num_updates=17572, lr=9.98674e-05, gnorm=2.141, loss_scale=4, train_wall=10, gb_free=2.8, wall=202020 2021-06-21 02:45:57 | INFO | train_inner | epoch 006: 2667 / 3002 loss=2.483, ppl=5.59, wps=5951.1, ups=0.09, wpb=64843, bsz=128, num_updates=17573, lr=9.98674e-05, gnorm=4.99, loss_scale=4, train_wall=10, gb_free=2.8, wall=202031 2021-06-21 02:46:08 | INFO | train_inner | epoch 006: 2668 / 3002 loss=2.57, ppl=5.94, wps=5909.9, ups=0.09, wpb=64915, bsz=128, num_updates=17574, lr=9.98674e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=202042 2021-06-21 02:46:19 | INFO | train_inner | epoch 006: 2669 / 3002 loss=2.577, ppl=5.97, wps=5805.1, ups=0.09, wpb=64833, bsz=128, num_updates=17575, lr=9.98674e-05, gnorm=2.307, loss_scale=4, train_wall=11, gb_free=2.8, wall=202053 2021-06-21 02:46:30 | INFO | train_inner | epoch 006: 2670 / 3002 loss=2.452, ppl=5.47, wps=5804.3, ups=0.09, wpb=64826, bsz=128, num_updates=17576, lr=9.98674e-05, gnorm=2.077, loss_scale=4, train_wall=11, gb_free=2.8, wall=202064 2021-06-21 02:46:41 | INFO | train_inner | epoch 006: 2671 / 3002 loss=2.368, ppl=5.16, wps=5718.2, ups=0.09, wpb=64905, bsz=128, num_updates=17577, lr=9.98674e-05, gnorm=2.155, loss_scale=4, train_wall=11, gb_free=2.8, wall=202076 2021-06-21 02:46:53 | INFO | train_inner | epoch 006: 2672 / 3002 loss=2.608, ppl=6.1, wps=5768.4, ups=0.09, wpb=64907, bsz=128, num_updates=17578, lr=9.98674e-05, gnorm=2.155, loss_scale=4, train_wall=11, gb_free=2.8, wall=202087 2021-06-21 02:47:03 | INFO | train_inner | epoch 006: 2673 / 3002 loss=2.47, ppl=5.54, wps=5967.2, ups=0.09, wpb=64865, bsz=128, num_updates=17579, lr=9.98674e-05, gnorm=3.094, loss_scale=4, train_wall=10, gb_free=2.8, wall=202098 2021-06-21 02:47:15 | INFO | train_inner | epoch 006: 2674 / 3002 loss=2.576, ppl=5.96, wps=5848, ups=0.09, wpb=64815, bsz=128, num_updates=17580, lr=9.98673e-05, gnorm=2.194, loss_scale=4, train_wall=11, gb_free=2.8, wall=202109 2021-06-21 02:47:26 | INFO | train_inner | epoch 006: 2675 / 3002 loss=2.51, ppl=5.7, wps=5865.4, ups=0.09, wpb=64803, bsz=128, num_updates=17581, lr=9.98673e-05, gnorm=2.283, loss_scale=4, train_wall=11, gb_free=2.8, wall=202120 2021-06-21 02:47:37 | INFO | train_inner | epoch 006: 2676 / 3002 loss=2.534, ppl=5.79, wps=5850.6, ups=0.09, wpb=64802, bsz=128, num_updates=17582, lr=9.98673e-05, gnorm=3.584, loss_scale=4, train_wall=11, gb_free=2.8, wall=202131 2021-06-21 02:47:48 | INFO | train_inner | epoch 006: 2677 / 3002 loss=2.491, ppl=5.62, wps=5885.4, ups=0.09, wpb=64815, bsz=128, num_updates=17583, lr=9.98673e-05, gnorm=2.099, loss_scale=4, train_wall=11, gb_free=2.8, wall=202142 2021-06-21 02:47:59 | INFO | train_inner | epoch 006: 2678 / 3002 loss=2.471, ppl=5.54, wps=5973.9, ups=0.09, wpb=64839, bsz=128, num_updates=17584, lr=9.98673e-05, gnorm=2.106, loss_scale=4, train_wall=10, gb_free=2.8, wall=202153 2021-06-21 02:48:10 | INFO | train_inner | epoch 006: 2679 / 3002 loss=2.393, ppl=5.25, wps=5863, ups=0.09, wpb=64802, bsz=128, num_updates=17585, lr=9.98673e-05, gnorm=4.153, loss_scale=4, train_wall=11, gb_free=2.8, wall=202164 2021-06-21 02:48:21 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 2021-06-21 02:48:31 | INFO | train_inner | epoch 006: 2681 / 3002 loss=2.433, ppl=5.4, wps=2964.5, ups=0.05, wpb=64819, bsz=128, num_updates=17586, lr=9.98673e-05, gnorm=2.385, loss_scale=2, train_wall=21, gb_free=2.8, wall=202186 2021-06-21 02:48:42 | INFO | train_inner | epoch 006: 2682 / 3002 loss=2.396, ppl=5.26, wps=5952.6, ups=0.09, wpb=64762, bsz=128, num_updates=17587, lr=9.98673e-05, gnorm=2.031, loss_scale=2, train_wall=10, gb_free=2.8, wall=202197 2021-06-21 02:48:53 | INFO | train_inner | epoch 006: 2683 / 3002 loss=2.404, ppl=5.29, wps=5843.6, ups=0.09, wpb=64768, bsz=128, num_updates=17588, lr=9.98673e-05, gnorm=2.54, loss_scale=2, train_wall=11, gb_free=2.8, wall=202208 2021-06-21 02:49:04 | INFO | train_inner | epoch 006: 2684 / 3002 loss=2.468, ppl=5.53, wps=5869.7, ups=0.09, wpb=64802, bsz=128, num_updates=17589, lr=9.98673e-05, gnorm=2.583, loss_scale=2, train_wall=11, gb_free=2.8, wall=202219 2021-06-21 02:49:15 | INFO | train_inner | epoch 006: 2685 / 3002 loss=2.491, ppl=5.62, wps=5898, ups=0.09, wpb=64841, bsz=128, num_updates=17590, lr=9.98673e-05, gnorm=2.187, loss_scale=2, train_wall=11, gb_free=2.8, wall=202230 2021-06-21 02:49:27 | INFO | train_inner | epoch 006: 2686 / 3002 loss=2.399, ppl=5.27, wps=5777.9, ups=0.09, wpb=64805, bsz=128, num_updates=17591, lr=9.98673e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=202241 2021-06-21 02:49:38 | INFO | train_inner | epoch 006: 2687 / 3002 loss=2.543, ppl=5.83, wps=5753, ups=0.09, wpb=64827, bsz=128, num_updates=17592, lr=9.98673e-05, gnorm=13.96, loss_scale=2, train_wall=11, gb_free=2.8, wall=202252 2021-06-21 02:49:49 | INFO | train_inner | epoch 006: 2688 / 3002 loss=2.457, ppl=5.49, wps=5978.1, ups=0.09, wpb=64879, bsz=128, num_updates=17593, lr=9.98672e-05, gnorm=9.345, loss_scale=2, train_wall=10, gb_free=2.8, wall=202263 2021-06-21 02:50:00 | INFO | train_inner | epoch 006: 2689 / 3002 loss=2.605, ppl=6.09, wps=5788.9, ups=0.09, wpb=64764, bsz=128, num_updates=17594, lr=9.98672e-05, gnorm=2.196, loss_scale=2, train_wall=11, gb_free=2.8, wall=202274 2021-06-21 02:50:11 | INFO | train_inner | epoch 006: 2690 / 3002 loss=2.545, ppl=5.84, wps=5927.8, ups=0.09, wpb=64760, bsz=128, num_updates=17595, lr=9.98672e-05, gnorm=2.745, loss_scale=2, train_wall=10, gb_free=2.8, wall=202285 2021-06-21 02:50:22 | INFO | train_inner | epoch 006: 2691 / 3002 loss=2.365, ppl=5.15, wps=5926.3, ups=0.09, wpb=64883, bsz=128, num_updates=17596, lr=9.98672e-05, gnorm=4.403, loss_scale=2, train_wall=10, gb_free=2.8, wall=202296 2021-06-21 02:50:33 | INFO | train_inner | epoch 006: 2692 / 3002 loss=2.602, ppl=6.07, wps=5817.4, ups=0.09, wpb=64844, bsz=128, num_updates=17597, lr=9.98672e-05, gnorm=3.03, loss_scale=2, train_wall=11, gb_free=2.8, wall=202307 2021-06-21 02:50:44 | INFO | train_inner | epoch 006: 2693 / 3002 loss=2.472, ppl=5.55, wps=5889.9, ups=0.09, wpb=64876, bsz=128, num_updates=17598, lr=9.98672e-05, gnorm=2.208, loss_scale=2, train_wall=11, gb_free=2.8, wall=202318 2021-06-21 02:50:55 | INFO | train_inner | epoch 006: 2694 / 3002 loss=2.563, ppl=5.91, wps=5907.8, ups=0.09, wpb=64872, bsz=128, num_updates=17599, lr=9.98672e-05, gnorm=4.899, loss_scale=2, train_wall=11, gb_free=2.8, wall=202329 2021-06-21 02:51:06 | INFO | train_inner | epoch 006: 2695 / 3002 loss=2.613, ppl=6.12, wps=5814.1, ups=0.09, wpb=64801, bsz=128, num_updates=17600, lr=9.98672e-05, gnorm=2.395, loss_scale=2, train_wall=11, gb_free=2.8, wall=202340 2021-06-21 02:51:17 | INFO | train_inner | epoch 006: 2696 / 3002 loss=2.462, ppl=5.51, wps=5940.8, ups=0.09, wpb=64862, bsz=128, num_updates=17601, lr=9.98672e-05, gnorm=2.356, loss_scale=2, train_wall=10, gb_free=2.8, wall=202351 2021-06-21 02:51:28 | INFO | train_inner | epoch 006: 2697 / 3002 loss=2.588, ppl=6.01, wps=5911, ups=0.09, wpb=64884, bsz=128, num_updates=17602, lr=9.98672e-05, gnorm=2.339, loss_scale=2, train_wall=11, gb_free=2.8, wall=202362 2021-06-21 02:51:39 | INFO | train_inner | epoch 006: 2698 / 3002 loss=2.685, ppl=6.43, wps=5689, ups=0.09, wpb=64864, bsz=128, num_updates=17603, lr=9.98672e-05, gnorm=4.553, loss_scale=2, train_wall=11, gb_free=2.8, wall=202374 2021-06-21 02:51:50 | INFO | train_inner | epoch 006: 2699 / 3002 loss=2.536, ppl=5.8, wps=5864.7, ups=0.09, wpb=64873, bsz=128, num_updates=17604, lr=9.98672e-05, gnorm=2.323, loss_scale=2, train_wall=11, gb_free=2.8, wall=202385 2021-06-21 02:52:02 | INFO | train_inner | epoch 006: 2700 / 3002 loss=2.671, ppl=6.37, wps=5844.2, ups=0.09, wpb=64733, bsz=128, num_updates=17605, lr=9.98671e-05, gnorm=2.743, loss_scale=2, train_wall=11, gb_free=2.8, wall=202396 2021-06-21 02:52:13 | INFO | train_inner | epoch 006: 2701 / 3002 loss=2.52, ppl=5.74, wps=5897.7, ups=0.09, wpb=64869, bsz=128, num_updates=17606, lr=9.98671e-05, gnorm=3.276, loss_scale=2, train_wall=11, gb_free=2.8, wall=202407 2021-06-21 02:52:24 | INFO | train_inner | epoch 006: 2702 / 3002 loss=2.548, ppl=5.85, wps=5813.4, ups=0.09, wpb=64815, bsz=128, num_updates=17607, lr=9.98671e-05, gnorm=4.147, loss_scale=2, train_wall=11, gb_free=2.8, wall=202418 2021-06-21 02:52:35 | INFO | train_inner | epoch 006: 2703 / 3002 loss=2.483, ppl=5.59, wps=5937.2, ups=0.09, wpb=64848, bsz=128, num_updates=17608, lr=9.98671e-05, gnorm=7.738, loss_scale=2, train_wall=10, gb_free=2.8, wall=202429 2021-06-21 02:52:46 | INFO | train_inner | epoch 006: 2704 / 3002 loss=2.504, ppl=5.67, wps=5782.9, ups=0.09, wpb=64759, bsz=128, num_updates=17609, lr=9.98671e-05, gnorm=4.388, loss_scale=2, train_wall=11, gb_free=2.8, wall=202440 2021-06-21 02:52:57 | INFO | train_inner | epoch 006: 2705 / 3002 loss=2.467, ppl=5.53, wps=5869.2, ups=0.09, wpb=64730, bsz=128, num_updates=17610, lr=9.98671e-05, gnorm=4.575, loss_scale=2, train_wall=11, gb_free=2.8, wall=202451 2021-06-21 02:53:08 | INFO | train_inner | epoch 006: 2706 / 3002 loss=2.441, ppl=5.43, wps=5933.6, ups=0.09, wpb=64761, bsz=128, num_updates=17611, lr=9.98671e-05, gnorm=2.056, loss_scale=2, train_wall=10, gb_free=2.8, wall=202462 2021-06-21 02:53:19 | INFO | train_inner | epoch 006: 2707 / 3002 loss=2.436, ppl=5.41, wps=5974.4, ups=0.09, wpb=64859, bsz=128, num_updates=17612, lr=9.98671e-05, gnorm=2.14, loss_scale=2, train_wall=10, gb_free=2.8, wall=202473 2021-06-21 02:53:30 | INFO | train_inner | epoch 006: 2708 / 3002 loss=2.535, ppl=5.8, wps=5848.6, ups=0.09, wpb=64848, bsz=128, num_updates=17613, lr=9.98671e-05, gnorm=2.503, loss_scale=2, train_wall=11, gb_free=2.8, wall=202484 2021-06-21 02:53:41 | INFO | train_inner | epoch 006: 2709 / 3002 loss=2.523, ppl=5.75, wps=5880.3, ups=0.09, wpb=64675, bsz=128, num_updates=17614, lr=9.98671e-05, gnorm=2.148, loss_scale=2, train_wall=11, gb_free=2.8, wall=202495 2021-06-21 02:53:52 | INFO | train_inner | epoch 006: 2710 / 3002 loss=2.673, ppl=6.38, wps=5856, ups=0.09, wpb=64794, bsz=128, num_updates=17615, lr=9.98671e-05, gnorm=12.997, loss_scale=2, train_wall=11, gb_free=2.8, wall=202506 2021-06-21 02:54:03 | INFO | train_inner | epoch 006: 2711 / 3002 loss=2.686, ppl=6.43, wps=5842.4, ups=0.09, wpb=64843, bsz=128, num_updates=17616, lr=9.98671e-05, gnorm=3.337, loss_scale=2, train_wall=11, gb_free=2.8, wall=202517 2021-06-21 02:54:14 | INFO | train_inner | epoch 006: 2712 / 3002 loss=2.507, ppl=5.68, wps=5952.7, ups=0.09, wpb=64902, bsz=128, num_updates=17617, lr=9.98671e-05, gnorm=2.051, loss_scale=2, train_wall=10, gb_free=2.8, wall=202528 2021-06-21 02:54:25 | INFO | train_inner | epoch 006: 2713 / 3002 loss=2.393, ppl=5.25, wps=5890, ups=0.09, wpb=64847, bsz=128, num_updates=17618, lr=9.9867e-05, gnorm=2.999, loss_scale=2, train_wall=11, gb_free=2.8, wall=202539 2021-06-21 02:54:36 | INFO | train_inner | epoch 006: 2714 / 3002 loss=2.449, ppl=5.46, wps=5800.2, ups=0.09, wpb=64775, bsz=128, num_updates=17619, lr=9.9867e-05, gnorm=2.146, loss_scale=2, train_wall=11, gb_free=2.8, wall=202550 2021-06-21 02:54:47 | INFO | train_inner | epoch 006: 2715 / 3002 loss=2.537, ppl=5.8, wps=5949.1, ups=0.09, wpb=64843, bsz=128, num_updates=17620, lr=9.9867e-05, gnorm=2.102, loss_scale=2, train_wall=10, gb_free=2.8, wall=202561 2021-06-21 02:54:58 | INFO | train_inner | epoch 006: 2716 / 3002 loss=2.527, ppl=5.76, wps=5715.4, ups=0.09, wpb=64829, bsz=128, num_updates=17621, lr=9.9867e-05, gnorm=2.236, loss_scale=2, train_wall=11, gb_free=2.8, wall=202573 2021-06-21 02:55:09 | INFO | train_inner | epoch 006: 2717 / 3002 loss=2.561, ppl=5.9, wps=5823.5, ups=0.09, wpb=64840, bsz=128, num_updates=17622, lr=9.9867e-05, gnorm=14.238, loss_scale=2, train_wall=11, gb_free=2.8, wall=202584 2021-06-21 02:55:20 | INFO | train_inner | epoch 006: 2718 / 3002 loss=2.57, ppl=5.94, wps=5819.2, ups=0.09, wpb=64804, bsz=128, num_updates=17623, lr=9.9867e-05, gnorm=2.246, loss_scale=2, train_wall=11, gb_free=2.8, wall=202595 2021-06-21 02:55:32 | INFO | train_inner | epoch 006: 2719 / 3002 loss=2.545, ppl=5.83, wps=5788, ups=0.09, wpb=64823, bsz=128, num_updates=17624, lr=9.9867e-05, gnorm=2.329, loss_scale=2, train_wall=11, gb_free=2.8, wall=202606 2021-06-21 02:55:43 | INFO | train_inner | epoch 006: 2720 / 3002 loss=2.453, ppl=5.48, wps=5860.1, ups=0.09, wpb=64859, bsz=128, num_updates=17625, lr=9.9867e-05, gnorm=1.993, loss_scale=2, train_wall=11, gb_free=2.8, wall=202617 2021-06-21 02:55:54 | INFO | train_inner | epoch 006: 2721 / 3002 loss=2.593, ppl=6.03, wps=5907.5, ups=0.09, wpb=64905, bsz=128, num_updates=17626, lr=9.9867e-05, gnorm=3.159, loss_scale=2, train_wall=11, gb_free=2.8, wall=202628 2021-06-21 02:56:05 | INFO | train_inner | epoch 006: 2722 / 3002 loss=2.477, ppl=5.57, wps=5911.3, ups=0.09, wpb=64792, bsz=128, num_updates=17627, lr=9.9867e-05, gnorm=15.99, loss_scale=2, train_wall=11, gb_free=2.8, wall=202639 2021-06-21 02:56:16 | INFO | train_inner | epoch 006: 2723 / 3002 loss=2.454, ppl=5.48, wps=5925.9, ups=0.09, wpb=64873, bsz=128, num_updates=17628, lr=9.9867e-05, gnorm=2.108, loss_scale=2, train_wall=10, gb_free=2.8, wall=202650 2021-06-21 02:56:27 | INFO | train_inner | epoch 006: 2724 / 3002 loss=2.456, ppl=5.49, wps=5833, ups=0.09, wpb=64797, bsz=128, num_updates=17629, lr=9.9867e-05, gnorm=14.405, loss_scale=2, train_wall=11, gb_free=2.8, wall=202661 2021-06-21 02:56:38 | INFO | train_inner | epoch 006: 2725 / 3002 loss=2.66, ppl=6.32, wps=5814, ups=0.09, wpb=64877, bsz=128, num_updates=17630, lr=9.98669e-05, gnorm=2.202, loss_scale=2, train_wall=11, gb_free=2.8, wall=202672 2021-06-21 02:56:49 | INFO | train_inner | epoch 006: 2726 / 3002 loss=2.453, ppl=5.47, wps=5791.9, ups=0.09, wpb=64865, bsz=128, num_updates=17631, lr=9.98669e-05, gnorm=8.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=202683 2021-06-21 02:57:00 | INFO | train_inner | epoch 006: 2727 / 3002 loss=2.601, ppl=6.07, wps=5790.1, ups=0.09, wpb=64926, bsz=128, num_updates=17632, lr=9.98669e-05, gnorm=2.296, loss_scale=2, train_wall=11, gb_free=2.8, wall=202695 2021-06-21 02:57:11 | INFO | train_inner | epoch 006: 2728 / 3002 loss=2.504, ppl=5.67, wps=5853.5, ups=0.09, wpb=64897, bsz=128, num_updates=17633, lr=9.98669e-05, gnorm=2.655, loss_scale=2, train_wall=11, gb_free=2.8, wall=202706 2021-06-21 02:57:23 | INFO | train_inner | epoch 006: 2729 / 3002 loss=2.497, ppl=5.65, wps=5836.1, ups=0.09, wpb=64902, bsz=128, num_updates=17634, lr=9.98669e-05, gnorm=3.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=202717 2021-06-21 02:57:34 | INFO | train_inner | epoch 006: 2730 / 3002 loss=2.441, ppl=5.43, wps=5818.3, ups=0.09, wpb=64833, bsz=128, num_updates=17635, lr=9.98669e-05, gnorm=2.187, loss_scale=2, train_wall=11, gb_free=2.8, wall=202728 2021-06-21 02:57:45 | INFO | train_inner | epoch 006: 2731 / 3002 loss=2.608, ppl=6.1, wps=5795, ups=0.09, wpb=64715, bsz=128, num_updates=17636, lr=9.98669e-05, gnorm=2.088, loss_scale=2, train_wall=11, gb_free=2.8, wall=202739 2021-06-21 02:57:56 | INFO | train_inner | epoch 006: 2732 / 3002 loss=2.492, ppl=5.62, wps=5886, ups=0.09, wpb=64846, bsz=128, num_updates=17637, lr=9.98669e-05, gnorm=2.487, loss_scale=2, train_wall=11, gb_free=2.8, wall=202750 2021-06-21 02:58:07 | INFO | train_inner | epoch 006: 2733 / 3002 loss=2.583, ppl=5.99, wps=5842.7, ups=0.09, wpb=64762, bsz=128, num_updates=17638, lr=9.98669e-05, gnorm=2.288, loss_scale=2, train_wall=11, gb_free=2.8, wall=202761 2021-06-21 02:58:18 | INFO | train_inner | epoch 006: 2734 / 3002 loss=2.636, ppl=6.21, wps=5720.4, ups=0.09, wpb=64819, bsz=128, num_updates=17639, lr=9.98669e-05, gnorm=2.751, loss_scale=2, train_wall=11, gb_free=2.8, wall=202773 2021-06-21 02:58:29 | INFO | train_inner | epoch 006: 2735 / 3002 loss=2.541, ppl=5.82, wps=5885.4, ups=0.09, wpb=64898, bsz=128, num_updates=17640, lr=9.98669e-05, gnorm=2.064, loss_scale=2, train_wall=11, gb_free=2.8, wall=202784 2021-06-21 02:58:40 | INFO | train_inner | epoch 006: 2736 / 3002 loss=2.631, ppl=6.19, wps=5951.9, ups=0.09, wpb=64799, bsz=128, num_updates=17641, lr=9.98669e-05, gnorm=2.313, loss_scale=2, train_wall=10, gb_free=2.8, wall=202795 2021-06-21 02:58:51 | INFO | train_inner | epoch 006: 2737 / 3002 loss=2.522, ppl=5.74, wps=5977.8, ups=0.09, wpb=64883, bsz=128, num_updates=17642, lr=9.98669e-05, gnorm=2.052, loss_scale=2, train_wall=10, gb_free=2.8, wall=202805 2021-06-21 02:59:02 | INFO | train_inner | epoch 006: 2738 / 3002 loss=2.489, ppl=5.61, wps=5770.2, ups=0.09, wpb=64849, bsz=128, num_updates=17643, lr=9.98668e-05, gnorm=2.058, loss_scale=2, train_wall=11, gb_free=2.8, wall=202817 2021-06-21 02:59:13 | INFO | train_inner | epoch 006: 2739 / 3002 loss=2.491, ppl=5.62, wps=5950.7, ups=0.09, wpb=64806, bsz=128, num_updates=17644, lr=9.98668e-05, gnorm=2.047, loss_scale=2, train_wall=10, gb_free=2.8, wall=202827 2021-06-21 02:59:24 | INFO | train_inner | epoch 006: 2740 / 3002 loss=2.528, ppl=5.77, wps=5804.6, ups=0.09, wpb=64728, bsz=128, num_updates=17645, lr=9.98668e-05, gnorm=5.507, loss_scale=2, train_wall=11, gb_free=2.8, wall=202839 2021-06-21 02:59:35 | INFO | train_inner | epoch 006: 2741 / 3002 loss=2.418, ppl=5.34, wps=5900.1, ups=0.09, wpb=64852, bsz=128, num_updates=17646, lr=9.98668e-05, gnorm=2.496, loss_scale=2, train_wall=11, gb_free=2.8, wall=202850 2021-06-21 02:59:46 | INFO | train_inner | epoch 006: 2742 / 3002 loss=2.552, ppl=5.87, wps=5847, ups=0.09, wpb=64471, bsz=128, num_updates=17647, lr=9.98668e-05, gnorm=2.069, loss_scale=2, train_wall=11, gb_free=2.8, wall=202861 2021-06-21 02:59:57 | INFO | train_inner | epoch 006: 2743 / 3002 loss=2.488, ppl=5.61, wps=5904.1, ups=0.09, wpb=64799, bsz=128, num_updates=17648, lr=9.98668e-05, gnorm=2.077, loss_scale=2, train_wall=11, gb_free=2.8, wall=202872