step lambda train_loss train_ppl eval_ppl eval_bpb lr time_s best_ppl