pszemraj's picture
load model from drive and convert
908ec3f
raw
history blame contribute delete
No virus
6.29 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.2175637393767706,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 0.00039999999999999996,
"loss": 2.303,
"step": 2
},
{
"epoch": 0.05,
"learning_rate": 0.0005999749289566311,
"loss": 2.2906,
"step": 4
},
{
"epoch": 0.07,
"learning_rate": 0.000599774385751269,
"loss": 2.3184,
"step": 6
},
{
"epoch": 0.1,
"learning_rate": 0.0005993734334103396,
"loss": 2.3182,
"step": 8
},
{
"epoch": 0.12,
"learning_rate": 0.0005987723399838027,
"loss": 2.3544,
"step": 10
},
{
"epoch": 0.15,
"learning_rate": 0.0005979715073225829,
"loss": 2.3228,
"step": 12
},
{
"epoch": 0.17,
"learning_rate": 0.000596971470809918,
"loss": 2.3045,
"step": 14
},
{
"epoch": 0.19,
"learning_rate": 0.0005957728990034388,
"loss": 2.2987,
"step": 16
},
{
"epoch": 0.22,
"learning_rate": 0.0005943765931882153,
"loss": 2.3255,
"step": 18
},
{
"epoch": 0.24,
"learning_rate": 0.000592783486841071,
"loss": 2.3216,
"step": 20
},
{
"epoch": 0.27,
"learning_rate": 0.000590994645006523,
"loss": 2.3319,
"step": 22
},
{
"epoch": 0.29,
"learning_rate": 0.000589011263584764,
"loss": 2.3746,
"step": 24
},
{
"epoch": 0.31,
"learning_rate": 0.0005868346685321638,
"loss": 2.384,
"step": 26
},
{
"epoch": 0.34,
"learning_rate": 0.0005844663149748229,
"loss": 2.3483,
"step": 28
},
{
"epoch": 0.36,
"learning_rate": 0.0005819077862357724,
"loss": 2.3436,
"step": 30
},
{
"epoch": 0.39,
"learning_rate": 0.0005791607927764706,
"loss": 2.3331,
"step": 32
},
{
"epoch": 0.41,
"learning_rate": 0.0005762271710533015,
"loss": 2.3593,
"step": 34
},
{
"epoch": 0.44,
"learning_rate": 0.000573108882289844,
"loss": 2.283,
"step": 36
},
{
"epoch": 0.46,
"learning_rate": 0.0005698080111657278,
"loss": 2.3239,
"step": 38
},
{
"epoch": 0.48,
"learning_rate": 0.0005663267644229568,
"loss": 2.3732,
"step": 40
},
{
"epoch": 0.51,
"learning_rate": 0.0005626674693906273,
"loss": 2.3756,
"step": 42
},
{
"epoch": 0.53,
"learning_rate": 0.0005588325724290324,
"loss": 2.3442,
"step": 44
},
{
"epoch": 0.56,
"learning_rate": 0.0005548246372941892,
"loss": 2.3337,
"step": 46
},
{
"epoch": 0.58,
"learning_rate": 0.0005506463434238809,
"loss": 2.3417,
"step": 48
},
{
"epoch": 0.6,
"learning_rate": 0.0005463004841463656,
"loss": 2.3193,
"step": 50
},
{
"epoch": 0.63,
"learning_rate": 0.0005417899648129422,
"loss": 2.3054,
"step": 52
},
{
"epoch": 0.65,
"learning_rate": 0.0005371178008556277,
"loss": 2.3434,
"step": 54
},
{
"epoch": 0.68,
"learning_rate": 0.0005322871157712397,
"loss": 2.3201,
"step": 56
},
{
"epoch": 0.7,
"learning_rate": 0.0005273011390332353,
"loss": 2.3137,
"step": 58
},
{
"epoch": 0.73,
"learning_rate": 0.0005221632039327013,
"loss": 2.329,
"step": 60
},
{
"epoch": 0.75,
"learning_rate": 0.0005168767453499378,
"loss": 2.3401,
"step": 62
},
{
"epoch": 0.77,
"learning_rate": 0.0005114452974581268,
"loss": 2.3118,
"step": 64
},
{
"epoch": 0.8,
"learning_rate": 0.00050587249136062,
"loss": 2.3207,
"step": 66
},
{
"epoch": 0.82,
"learning_rate": 0.0005001620526634258,
"loss": 2.3203,
"step": 68
},
{
"epoch": 0.85,
"learning_rate": 0.0004943177989845176,
"loss": 2.3345,
"step": 70
},
{
"epoch": 0.87,
"learning_rate": 0.0004883436374016295,
"loss": 2.3096,
"step": 72
},
{
"epoch": 0.89,
"learning_rate": 0.000482243561840245,
"loss": 2.3224,
"step": 74
},
{
"epoch": 0.92,
"learning_rate": 0.00047602165040352534,
"loss": 2.3391,
"step": 76
},
{
"epoch": 0.94,
"learning_rate": 0.00046968206264596157,
"loss": 2.3305,
"step": 78
},
{
"epoch": 0.97,
"learning_rate": 0.00046322903679257474,
"loss": 2.3762,
"step": 80
},
{
"epoch": 0.99,
"learning_rate": 0.0004566668869055215,
"loss": 2.3577,
"step": 82
},
{
"epoch": 1.02,
"learning_rate": 0.00045,
"loss": 3.1011,
"step": 84
},
{
"epoch": 1.05,
"learning_rate": 0.0004432328331113847,
"loss": 2.228,
"step": 86
},
{
"epoch": 1.07,
"learning_rate": 0.00043636991031555014,
"loss": 2.159,
"step": 88
},
{
"epoch": 1.1,
"learning_rate": 0.00042941581970437604,
"loss": 2.282,
"step": 90
},
{
"epoch": 1.12,
"learning_rate": 0.00042237521031845504,
"loss": 2.282,
"step": 92
},
{
"epoch": 1.15,
"learning_rate": 0.00041525278903905525,
"loss": 2.2421,
"step": 94
},
{
"epoch": 1.17,
"learning_rate": 0.00040805331744141307,
"loss": 2.2593,
"step": 96
},
{
"epoch": 1.19,
"learning_rate": 0.0004007816086114626,
"loss": 2.2961,
"step": 98
},
{
"epoch": 1.22,
"learning_rate": 0.00039344252392812737,
"loss": 2.1614,
"step": 100
}
],
"max_steps": 246,
"num_train_epochs": 3,
"total_flos": 9.509605029875548e+17,
"trial_name": null,
"trial_params": null
}