{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.056338028169014, "eval_steps": 500, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0880281690140845, "grad_norm": 2641.419677734375, "learning_rate": 4.166666666666667e-06, "loss": 207.5718, "step": 100 }, { "epoch": 0.176056338028169, "grad_norm": 3060.73486328125, "learning_rate": 4.62962962962963e-06, "loss": 126.3771, "step": 200 }, { "epoch": 0.2640845070422535, "grad_norm": 808.4242553710938, "learning_rate": 4.166666666666667e-06, "loss": 125.2416, "step": 300 }, { "epoch": 0.352112676056338, "grad_norm": 1393.006591796875, "learning_rate": 3.7037037037037037e-06, "loss": 104.9298, "step": 400 }, { "epoch": 0.44014084507042256, "grad_norm": 1380.7303466796875, "learning_rate": 3.240740740740741e-06, "loss": 109.4596, "step": 500 }, { "epoch": 0.528169014084507, "grad_norm": 2457.56494140625, "learning_rate": 2.7777777777777783e-06, "loss": 110.524, "step": 600 }, { "epoch": 0.6161971830985915, "grad_norm": 2740.54052734375, "learning_rate": 2.314814814814815e-06, "loss": 114.8047, "step": 700 }, { "epoch": 0.704225352112676, "grad_norm": 1358.532470703125, "learning_rate": 1.8518518518518519e-06, "loss": 105.7686, "step": 800 }, { "epoch": 0.7922535211267606, "grad_norm": 2155.906982421875, "learning_rate": 1.3888888888888892e-06, "loss": 93.1012, "step": 900 }, { "epoch": 0.8802816901408451, "grad_norm": 2438.5126953125, "learning_rate": 9.259259259259259e-07, "loss": 108.8455, "step": 1000 }, { "epoch": 0.9683098591549296, "grad_norm": 1066.7130126953125, "learning_rate": 4.6296296296296297e-07, "loss": 99.7556, "step": 1100 }, { "epoch": 1.0, "eval_loss": 283.56231689453125, "eval_runtime": 22.2792, "eval_samples_per_second": 45.334, "eval_steps_per_second": 5.7, "step": 1136 }, { "epoch": 1.056338028169014, "grad_norm": 2160.288818359375, "learning_rate": 0.0, "loss": 106.0304, "step": 1200 } ], "logging_steps": 100, "max_steps": 1200, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 600, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }