{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "eval_steps": 500, "global_step": 14016, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.21, "learning_rate": 4.821632420091324e-05, "loss": 0.3322, "step": 500 }, { "epoch": 0.43, "learning_rate": 4.6432648401826485e-05, "loss": 0.2664, "step": 1000 }, { "epoch": 0.64, "learning_rate": 4.464897260273973e-05, "loss": 0.2412, "step": 1500 }, { "epoch": 0.86, "learning_rate": 4.286529680365297e-05, "loss": 0.2219, "step": 2000 }, { "epoch": 1.0, "eval_bleu": 21.2068, "eval_gen_len": 19.2422, "eval_loss": 0.18159246444702148, "eval_runtime": 154.1038, "eval_samples_per_second": 53.886, "eval_steps_per_second": 1.687, "step": 2336 }, { "epoch": 1.07, "learning_rate": 4.108162100456621e-05, "loss": 0.1887, "step": 2500 }, { "epoch": 1.28, "learning_rate": 3.929794520547945e-05, "loss": 0.1575, "step": 3000 }, { "epoch": 1.5, "learning_rate": 3.7514269406392696e-05, "loss": 0.1534, "step": 3500 }, { "epoch": 1.71, "learning_rate": 3.573059360730594e-05, "loss": 0.1553, "step": 4000 }, { "epoch": 1.93, "learning_rate": 3.394691780821918e-05, "loss": 0.1479, "step": 4500 }, { "epoch": 2.0, "eval_bleu": 21.6783, "eval_gen_len": 19.2471, "eval_loss": 0.16088075935840607, "eval_runtime": 154.8749, "eval_samples_per_second": 53.617, "eval_steps_per_second": 1.679, "step": 4672 }, { "epoch": 2.14, "learning_rate": 3.2163242009132423e-05, "loss": 0.1249, "step": 5000 }, { "epoch": 2.35, "learning_rate": 3.037956621004566e-05, "loss": 0.1117, "step": 5500 }, { "epoch": 2.57, "learning_rate": 2.8595890410958903e-05, "loss": 0.1119, "step": 6000 }, { "epoch": 2.78, "learning_rate": 2.681221461187215e-05, "loss": 0.1137, "step": 6500 }, { "epoch": 3.0, "learning_rate": 2.502853881278539e-05, "loss": 0.1092, "step": 7000 }, { "epoch": 3.0, "eval_bleu": 21.8526, "eval_gen_len": 19.2463, "eval_loss": 0.15343397855758667, "eval_runtime": 154.8601, "eval_samples_per_second": 53.623, "eval_steps_per_second": 1.679, "step": 7008 }, { "epoch": 3.21, "learning_rate": 2.324486301369863e-05, "loss": 0.0842, "step": 7500 }, { "epoch": 3.42, "learning_rate": 2.1461187214611872e-05, "loss": 0.085, "step": 8000 }, { "epoch": 3.64, "learning_rate": 1.9677511415525117e-05, "loss": 0.0853, "step": 8500 }, { "epoch": 3.85, "learning_rate": 1.7893835616438355e-05, "loss": 0.0856, "step": 9000 }, { "epoch": 4.0, "eval_bleu": 22.0841, "eval_gen_len": 19.2482, "eval_loss": 0.15252342820167542, "eval_runtime": 155.0415, "eval_samples_per_second": 53.56, "eval_steps_per_second": 1.677, "step": 9344 }, { "epoch": 4.07, "learning_rate": 1.61101598173516e-05, "loss": 0.0792, "step": 9500 }, { "epoch": 4.28, "learning_rate": 1.4326484018264841e-05, "loss": 0.0664, "step": 10000 }, { "epoch": 4.49, "learning_rate": 1.2542808219178081e-05, "loss": 0.0673, "step": 10500 }, { "epoch": 4.71, "learning_rate": 1.0759132420091326e-05, "loss": 0.0663, "step": 11000 }, { "epoch": 4.92, "learning_rate": 8.975456621004565e-06, "loss": 0.0667, "step": 11500 }, { "epoch": 5.0, "eval_bleu": 22.1943, "eval_gen_len": 19.2467, "eval_loss": 0.15876752138137817, "eval_runtime": 155.4094, "eval_samples_per_second": 53.433, "eval_steps_per_second": 1.673, "step": 11680 }, { "epoch": 5.14, "learning_rate": 7.191780821917809e-06, "loss": 0.0579, "step": 12000 }, { "epoch": 5.35, "learning_rate": 5.40810502283105e-06, "loss": 0.0545, "step": 12500 }, { "epoch": 5.57, "learning_rate": 3.6244292237442927e-06, "loss": 0.0543, "step": 13000 }, { "epoch": 5.78, "learning_rate": 1.8407534246575344e-06, "loss": 0.0534, "step": 13500 }, { "epoch": 5.99, "learning_rate": 5.7077625570776255e-08, "loss": 0.0549, "step": 14000 }, { "epoch": 6.0, "eval_bleu": 22.2237, "eval_gen_len": 19.2467, "eval_loss": 0.16120968759059906, "eval_runtime": 154.8857, "eval_samples_per_second": 53.614, "eval_steps_per_second": 1.679, "step": 14016 }, { "epoch": 6.0, "step": 14016, "total_flos": 3.006237250179072e+16, "train_loss": 0.12124856073222204, "train_runtime": 4033.4181, "train_samples_per_second": 111.169, "train_steps_per_second": 3.475 } ], "logging_steps": 500, "max_steps": 14016, "num_train_epochs": 6, "save_steps": 500, "total_flos": 3.006237250179072e+16, "trial_name": null, "trial_params": null }